##################################################################################################
# author: Jona Cederbaum
##################################################################################################
# description: FPCA-FAMM which predicts FPC weights and B, C, E and re-estimates the mean function
# including covariates as functional additive mixed model.
##################################################################################################
estimate_fpc_famm_fun <- function(curve_info, my_grid, phi_B_hat_grid, phi_C_hat_grid, phi_E_hat_grid,
nu_B_hat, nu_C_hat, nu_E_hat, t, N_B, N_C, N_E, use_bam_famm,
num_covariates, interaction, which_interaction, n, use_RI,
method, bs_y_famm, bs_int_famm, sigmasq, covariate, para_estim_famm,
para_estim_famm_nc, covariate_form, save_model_famm,
use_discrete){
###################
# initialize output
###################
results <- list()
######################
# extract curve values
######################
y_vec <- curve_info$y_vec
##########################
# construct data and ydata
# for function pffr
##########################
if(!use_RI){
data <- data.table(id_subject = as.factor(curve_info$subject_long),
id_word = as.factor(curve_info$word_long),
id_n = as.factor(curve_info$n_long))
}else{
data <- data.table(id_subject = as.factor(curve_info$subject_long),
id_n = as.factor(curve_info$n_long))
}
if(covariate){
for(i in 1:num_covariates){
data[, paste0("covariate.", i) := curve_info[[paste0("covariate.", i)]]]
if(interaction){
for(k in 1:num_covariates){
if(which_interaction[i, k] & (i < k)){
prod_help <- curve_info[[paste0("covariate.", i)]] * curve_info[[paste0("covariate.", k)]]
data[, paste0("inter_", i, "_", k) := prod_help]
}
}
}
}
}
data <- data[!duplicated(data$id_n), ] # only need one row per curve not per observation in data
ydata <- data.frame(.obs = curve_info$n_long, .index = t, .value = y_vec)
rownames(data) <- 1:n
################
# construct pcre
# terms for famm
################
names <- vector()
N_vec <- c(N_B, N_C, N_E)
if(!is.null(N_vec)){
funs_names <- c("_B", "_C", "_E")
for(i in seq_along(funs_names)){
if(i == 1){
id <- "id_subject"
}
if(i == 2){
id <- "id_word"
}
if(i == 3){
id <- "id_n"
}
if(N_vec[i] > 0){
names <- cbind(names, paste("pcre(id = ", id, ", efunctions = phi", funs_names[i],
"_hat_grid, evalues = nu", funs_names[i], "_hat, yind = my_grid)",
sep = ""))
}
}
listofbys_pc <- c(as.vector(names))
if(covariate){
names <- vector()
for(i in 1:num_covariates){
if(covariate_form[i] == "by"){
name_help <- paste("covariate.", i, "", sep = "")
}
if(covariate_form[i] == "smooth"){
if(all(data[[paste0("covariate.", i)]] %in% c(0, 1))){
stop("no smooth effects for dummy covariates allowed,
please use covariate_form = 'by' for dummy covariates")
}
name_help <- paste0("s(covariate.", i, ")")
}
names <- cbind(names, name_help)
}
if(interaction == FALSE){
listofbys_cov <- as.vector(names)
}else{
inter_names <- vector(mode = "character")
inter_by <- numeric()
for(i in 1:num_covariates){
for(k in 1:num_covariates){
if(which_interaction[i, k] & (i < k)){
if(!all(data[[paste0("covariate.", i)]] %in% c(0, 1))|!all(data[[paste0("covariate.", k)]] %in% c(0, 1))){
stop("interaction effects are only implemented between dummy covariates")
}
if(covariate_form[i] == "by" & covariate_form[k] == "by"){
inter_names <- cbind(inter_names, paste("inter_", i, "_", k, "", sep = ""))
}else{
warning("interaction effects are only implemented between dummy covariates acting as varying-coefficients")
}
}
}
}
listofbys_cov <- c(as.vector(names), c(inter_names))
}
listofbys <- c(listofbys_pc, listofbys_cov)
}else{
listofbys <- listofbys_pc
}
pred <- as.formula(paste("y_vec ~ 1 + ", paste(listofbys, collapse = " + "), sep = ""))
####################################
# get design matrices to get S.scale
####################################
if(use_bam_famm){
#######################
# FPC famm estimation/
# prediction using bam
#######################
famm_setup_get_scale <- pffr(pred, yind = my_grid, data = data, ydata = ydata,
algorithm = "bam", control = gam.control(trace = TRUE),
method = method, bs.yindex = bs_y_famm,
bs.int = bs_int_famm, fit = FALSE)
}else{
#######################
# FPC famm estimation/
# prediction using gam
#######################
famm_setup_get_scale <- pffr(pred, yind = my_grid, data = data, ydata = ydata,
algorithm = "gam", control = gam.control(trace = TRUE),
method = method, bs.yindex = bs_y_famm,
bs.int = bs_int_famm, fit = FALSE)
}
#################
# extract S.scale
#################
if(N_B > 0){
scale_B <- famm_setup_get_scale$smooth[[2]]$S.scale
if(N_C > 0){
scale_C <- famm_setup_get_scale$smooth[[3]]$S.scale
if(N_E > 0){
scale_E <- famm_setup_get_scale$smooth[[4]]$S.scale
scale_res <- c(scale_B, scale_C, scale_E)
}else{
scale_res <- c(scale_B, scale_C)
}
}else{
if(N_E > 0){
scale_E <- famm_setup_get_scale$smooth[[3]]$S.scale
scale_res <- c(scale_B, scale_E)
}else{
scale_res <- c(scale_B)
}
}
}else{
if(N_C > 0){
scale_C <- famm_setup_get_scale$smooth[[2]]$S.scale
if(N_E > 0){
scale_E <- famm_setup_get_scale$smooth[[3]]$S.scale
scale_res <- c(scale_C, scale_E)
}else{
scale_res <- c(scale_C)
}
}else{
if(N_E > 0){
scale_E <- famm_setup_get_scale$smooth[[2]]$S.scale
scale_res <- c(scale_E)
}else{
scale_res <- NULL
warning("no FPCs chosen at all")
}
}
}
#################
# specify sp_fix
#################
if(covariate){
num_smooth <- sum(covariate_form == "smooth") # add -1 for smooth effects
sp_fix <- c(-1, scale_res * sigmasq, rep(-1, (length(listofbys_cov) + num_smooth)))
}else{
sp_fix <- c(-1, scale_res * sigmasq)
}
##############
# set cluster
# if specified
##############
if(para_estim_famm & !use_discrete){
if(detectCores() > 1){
nc_use <- min(detectCores(), para_estim_famm_nc)
if(.Platform$OS.type == "unix"){
cl_estim <- makeForkCluster(nnodes = nc_use) # only runs on linux
}else{
cl_estim <- makeCluster(nc_use) # also runs on windows
}
}else{
cl_estim <- NULL
}
}else{
cl_estim <- NULL
}
###############
# estimate famm
###############
if(use_bam_famm){
if(use_discrete){
famm_estim <- pffr(pred, sp = sp_fix, yind = my_grid, data = data, ydata = ydata,
algorithm = "bam", control = gam.control(trace = TRUE), method = method,
bs.yindex = bs_y_famm, bs.int = bs_int_famm, discrete = TRUE, nthreads = para_estim_famm_nc)
}else{
famm_estim <- pffr(pred, sp = sp_fix, yind = my_grid, data = data, ydata = ydata,
algorithm = "bam", control = gam.control(trace = TRUE), method = method,
bs.yindex = bs_y_famm, bs.int = bs_int_famm, cluster = cl_estim)
}
}else{
famm_estim <- pffr(pred, sp = sp_fix, yind = my_grid, data = data, ydata = ydata,
algorithm = "gam", control = gam.control(trace = TRUE), method = method,
bs.yindex = bs_y_famm, bs.int = bs_int_famm)
}
##########################
# stop cluster if existing
##########################
if(!is.null(cl_estim)) stopCluster(cl_estim)
results[["intercept"]] <- famm_estim$coefficients[1]
results[["sigmasq"]] <- famm_estim$sig2
###############
# get residuals
###############
results[["residuals"]] <- famm_estim$residuals
#####################
# predict in parts
# to avoid many
# doubles
#####################
if(N_B > 0){
pred1 <- coef(famm_estim, n2 = length(my_grid))$smterms[[2]]$coef
pred1_sorted <- cbind(pred1[order(pred1$id_subject.vec), c("id_subject.vec", "value")],
use = rep(1:length(my_grid), times = length(unique(curve_info$subject_long))))
pred1_sorted_reshaped <- as.matrix(subset(reshape(pred1_sorted, idvar = "id_subject.vec", direction = "wide",
timevar = "use", times = "value", v.names = "value"),
select = - c(id_subject.vec)),
nrow = length(unique(curve_info$subject_long)), ncol = length(my_grid))
dimnames(pred1_sorted_reshaped)[[1]] <- unique(curve_info$subject_long_orig)
dimnames(pred1_sorted_reshaped)[[2]] <- NULL
results[["famm_predict_B"]] <- pred1_sorted_reshaped
}else{
results[["famm_predict_B"]] <- NA
}
if(N_C > 0){
if(N_B > 0){
pred2 <- coef(famm_estim, n2 = length(my_grid))$smterms[[3]]$coef
}else{
pred2 <- coef(famm_estim, n2 = length(my_grid))$smterms[[2]]$coef
}
pred2_sorted <- cbind(pred2[order(pred2$id_word.vec), c("id_word.vec", "value")],
use = rep(1:length(my_grid), times = length(unique(curve_info$word_long))))
pred2_sorted_reshaped <- as.matrix(subset(reshape(pred2_sorted, idvar = "id_word.vec", direction = "wide",
timevar = "use", times = "value", v.names = "value"),
select = - c(id_word.vec)),
nrow = length(unique(curve_info$word_long)), ncol = length(my_grid))
dimnames(pred2_sorted_reshaped)[[1]] <- unique(curve_info$word_long_orig)
dimnames(pred2_sorted_reshaped)[[2]] <- NULL
results[["famm_predict_C"]] <- pred2_sorted_reshaped
}else{
results[["famm_predict_C"]] <- NA
}
if(N_E > 0){
if(N_B > 0){
if(N_C > 0){
pred3 <- coef(famm_estim, n2 = length(my_grid))$smterms[[4]]$coef
}else{
pred3 <- coef(famm_estim, n2 = length(my_grid))$smterms[[3]]$coef
}
}else{
pred3 <- coef(famm_estim, n2 = length(my_grid))$smterms[[2]]$coef
}
pred3_sorted <- cbind(pred3[order(pred3$id_n.vec), c("id_n.vec", "value")],
use = rep(1:length(my_grid), times = length(unique(curve_info$n_long))))
pred3_sorted_reshaped <- as.matrix(subset(reshape(pred3_sorted, idvar = "id_n.vec", direction = "wide",
timevar = "use", times = "value", v.names = "value"),
select = - c(id_n.vec)),
nrow = length(unique(curve_info$n_long)), ncol = length(my_grid))
dimnames(pred3_sorted_reshaped)[[1]] <- unique(curve_info$n_long_orig)
dimnames(pred3_sorted_reshaped)[[2]] <- NULL
results[["famm_predict_E"]] <- pred3_sorted_reshaped
}else{
results[["famm_predict_E"]] <- NA
}
##################
# get covariate
# effects and
# confidence bands
##################
coef_use <- coef(famm_estim, n1 = length(my_grid), n2 = length(my_grid))
if(any(covariate_form == "smooth")){
newdata_smooth_mean <- data.table(id_subject = rep(1, length = length(my_grid)),
id_word = rep(1, length = length(my_grid)),
id_n = rep(1, length = length(my_grid)))
if(covariate){
for(i in 1:num_covariates){
if(covariate_form[i] == "by"){
mean_use <- mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", i)]])
newdata_smooth_mean[, paste0("covariate.", i) := rep(mean_use, length(my_grid))]
}else{
mean_use <- mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", i)]])
newdata_smooth_mean[, paste0("covariate.", i) := rep(mean_use, length(my_grid))]
}
if(interaction){
for(k in 1:num_covariates){
if(which_interaction[i, k] & (i < k)){
if(all(data[[paste0("covariate.", i)]] %in% c(0, 1)) & all(data[[paste0("covariate.", k)]] %in% c(0, 1))){
mean_use <- mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", i)]]) * mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", k)]])
newdata_smooth_mean[, paste0("inter_", i, "_", k) := rep(mean_use, length(my_grid))]
}else{
warning("interaction effects are only implemented between dummy covariates")
}
}
}
}
}
}
pred_smooth_mean <- predict(famm_estim, type = "iterms", newdata = newdata_smooth_mean)
results[["pred_smooth_mean"]] <- pred_smooth_mean
}
coef_use_sm <- coef_use$smterms
results[["famm_cb_mean"]] <- coef_use_sm[[1]]$coef
if(covariate){
for(i in 1:num_covariates){
if(covariate_form[i] == "by"){
results[[paste0("famm_cb_covariate.", i)]] <- coef_use_sm[[paste0("covariate.", i, "(yindex)")]][["coef"]]
}
if(covariate_form[i] == "smooth"){
results[[paste0("famm_cb_covariate.", i)]] <- coef_use_sm[[paste0("s(covariate.", i, ")")]][["coef"]]
results[[paste0("famm_cb_covariate.", i, "_mean")]] <- pred_smooth_mean[[paste0("ti(covariate.", i, ", yindex.vec)")]]
}
if(interaction){
for(k in 1:num_covariates){
if(which_interaction[i, k] & (i < k)){
if(all(curve_info[[paste0("covariate.", i)]] %in% c(0, 1)) & all(curve_info[[paste0("covariate.", k)]] %in% c(0, 1))){
results[[paste0("famm_cb_inter_", i, "_", k)]] <- coef_use_sm[[paste0("inter_", i, "_", k, "(yindex)")]][["coef"]]
}else{
warning("interaction effects are only implemented between dummy covariates")
}
}
}
}
}
}
###################################################
# get original basis weights of FPC-FAMM estimation
###################################################
#######
# for B
if(N_B > 0){
xid <- diag(length(unique(data$id_subject)))
xef <- phi_B_hat_grid
xi_B_hat_famm_help <- qr.coef(qr(xid %x% xef), as.vector(t(results[["famm_predict_B"]])))
xi_B_hat_famm <- t(matrix(xi_B_hat_famm_help, ncol = length(unique(data$id_subject)), nrow = N_B))
row.names(xi_B_hat_famm) <- unique(curve_info$subject_long_orig)
}else{
xi_B_hat_famm <- NA
}
results[["xi_B_hat_famm"]] <- xi_B_hat_famm
#######
# for C
if(N_C > 0){
xid <- diag(length(unique(data$id_word)))
xef <- phi_C_hat_grid
xi_C_hat_famm_help <- qr.coef(qr(xid %x% xef), as.vector(t(results[["famm_predict_C"]])))
xi_C_hat_famm <- t(matrix(xi_C_hat_famm_help, ncol = length(unique(data$id_word)), nrow = N_C))
row.names(xi_C_hat_famm) <- unique(curve_info$word_long_orig)
}else{
xi_C_hat_famm <- NA
}
results[["xi_C_hat_famm"]] <- xi_C_hat_famm
#######
# for E
if(N_E > 0){
xid <- diag(length(unique(data$id_n)))
xef <- phi_E_hat_grid
xi_E_hat_famm_help <- qr.coef(qr(xid %x% xef), as.vector(t(results[["famm_predict_E"]])))
xi_E_hat_famm <- t(matrix(xi_E_hat_famm_help, ncol = length(unique(data$id_n)), nrow = N_E))
row.names(xi_E_hat_famm) <- unique(curve_info$n_long_orig)
}else{
xi_E_hat_famm <- NA
}
results[["xi_E_hat_famm"]] <- xi_E_hat_famm
##################
# save famm object
# if specified
##################
if(save_model_famm){
results[["famm_estim"]] <- famm_estim
}
famm_estim <- NULL
}else{
warning(" no FPCs chosen at all")
}
###############
# return output
###############
return(results)
}
###################################################################################################