##################################################################################################
# author: Jona Cederbaum
##################################################################################################
# description: FPCA-FAMM which predicts FPC weights and B, C, E and re-estimates the mean function
# including covariates as functional additive mixed model.
##################################################################################################
estimate_fpc_famm_fun <- function(curve_info, my_grid, phi_B_hat_grid, phi_C_hat_grid, phi_E_hat_grid, 
                                  nu_B_hat, nu_C_hat, nu_E_hat, t, N_B, N_C, N_E, use_bam_famm, 
                                  num_covariates, interaction, which_interaction, n, use_RI, 
                                  method, bs_y_famm, bs_int_famm, sigmasq, covariate, para_estim_famm, 
                                  para_estim_famm_nc, covariate_form, save_model_famm,
                                  use_discrete){
  
  ###################
  # initialize output
  ###################
  results <- list() 
  
  ######################
  # extract curve values
  ######################
  y_vec <- curve_info$y_vec
  
  ##########################
  # construct data and ydata 
  # for function pffr
  ##########################
  if(!use_RI){
    data <- data.table(id_subject = as.factor(curve_info$subject_long), 
                       id_word = as.factor(curve_info$word_long), 
                       id_n = as.factor(curve_info$n_long))
  }else{
    data <- data.table(id_subject = as.factor(curve_info$subject_long), 
                       id_n = as.factor(curve_info$n_long))
  }
  
  if(covariate){
    for(i in 1:num_covariates){
      data[, paste0("covariate.", i) := curve_info[[paste0("covariate.", i)]]]      
      if(interaction){
        for(k in 1:num_covariates){
          if(which_interaction[i, k] & (i < k)){
            prod_help <- curve_info[[paste0("covariate.", i)]] * curve_info[[paste0("covariate.", k)]]
            data[, paste0("inter_", i, "_", k) := prod_help]              
          }
        }  
      }
    }
  }
  
  
  data <- data[!duplicated(data$id_n), ]  # only need one row per curve not per observation in data
  ydata <- data.frame(.obs = curve_info$n_long, .index = t, .value = y_vec)
  rownames(data) <- 1:n
  
  ################
  # construct pcre
  # terms for famm
  ################
  names <- vector()
  N_vec <- c(N_B, N_C, N_E)
  
  if(!is.null(N_vec)){
    funs_names <- c("_B", "_C", "_E")
    for(i in seq_along(funs_names)){
      if(i == 1){
        id <- "id_subject"
      }
      if(i == 2){
        id <- "id_word"
      }
      if(i == 3){
        id <- "id_n"
      }
      if(N_vec[i] > 0){
        names <- cbind(names, paste("pcre(id = ", id, ", efunctions = phi", funs_names[i], 
                                    "_hat_grid, evalues = nu", funs_names[i], "_hat, yind = my_grid)", 
                                    sep = ""))
      }
    }
    listofbys_pc <- c(as.vector(names))
    
    if(covariate){
      names <- vector()
      
      for(i in 1:num_covariates){
        if(covariate_form[i] == "by"){
          name_help <- paste("covariate.", i, "", sep = "")
        }
        if(covariate_form[i] == "smooth"){
          if(all(data[[paste0("covariate.", i)]] %in% c(0, 1))){
            stop("no smooth effects for dummy covariates allowed,  
               please use covariate_form = 'by' for dummy covariates")
          }
          name_help <- paste0("s(covariate.", i, ")")
        }
        names <- cbind(names, name_help)
      }
      
      if(interaction == FALSE){
        listofbys_cov <- as.vector(names)
      }else{
        inter_names <- vector(mode = "character")
        inter_by <- numeric()
        for(i in 1:num_covariates){
          for(k in 1:num_covariates){
            if(which_interaction[i, k] & (i < k)){
              if(!all(data[[paste0("covariate.", i)]] %in% c(0, 1))|!all(data[[paste0("covariate.", k)]] %in% c(0, 1))){
                stop("interaction effects are only implemented between dummy covariates")
              }
              if(covariate_form[i] == "by" & covariate_form[k] == "by"){
                inter_names <- cbind(inter_names, paste("inter_", i, "_", k, "", sep = ""))
              }else{
                warning("interaction effects are only implemented between dummy covariates acting as varying-coefficients")
              }
            }
          }
        }
        listofbys_cov <- c(as.vector(names), c(inter_names))
      }
      
      listofbys <- c(listofbys_pc, listofbys_cov)
    }else{
      listofbys <- listofbys_pc
    }
    
    pred <- as.formula(paste("y_vec ~ 1 + ", paste(listofbys, collapse = " + "), sep = ""))
    
    ####################################
    # get design matrices to get S.scale
    ####################################
    if(use_bam_famm){
      #######################
      # FPC famm estimation/
      # prediction using bam
      #######################
      famm_setup_get_scale <- pffr(pred, yind = my_grid, data = data, ydata = ydata, 
                                   algorithm = "bam", control = gam.control(trace = TRUE), 
                                   method = method, bs.yindex = bs_y_famm, 
                                   bs.int = bs_int_famm, fit = FALSE)
    }else{
      #######################
      # FPC famm estimation/
      # prediction using gam
      #######################
      famm_setup_get_scale <- pffr(pred, yind = my_grid, data = data, ydata = ydata, 
                                   algorithm = "gam", control = gam.control(trace = TRUE), 
                                   method = method, bs.yindex = bs_y_famm, 
                                   bs.int = bs_int_famm, fit = FALSE)
    } 
    
    #################
    # extract S.scale
    #################
    if(N_B > 0){
      scale_B <- famm_setup_get_scale$smooth[[2]]$S.scale    
      if(N_C > 0){
        scale_C <- famm_setup_get_scale$smooth[[3]]$S.scale    
        if(N_E > 0){
          scale_E <- famm_setup_get_scale$smooth[[4]]$S.scale    
          scale_res <- c(scale_B, scale_C, scale_E)
        }else{
          scale_res <- c(scale_B, scale_C)
        }
      }else{
        if(N_E > 0){
          scale_E <- famm_setup_get_scale$smooth[[3]]$S.scale    
          scale_res <- c(scale_B, scale_E)
        }else{
          scale_res <- c(scale_B)
        }
      }  
    }else{
      if(N_C > 0){
        scale_C <- famm_setup_get_scale$smooth[[2]]$S.scale  
        if(N_E > 0){
          scale_E <- famm_setup_get_scale$smooth[[3]]$S.scale  
          scale_res <- c(scale_C, scale_E)
        }else{
          scale_res <- c(scale_C)
        }
      }else{
        if(N_E > 0){
          scale_E <- famm_setup_get_scale$smooth[[2]]$S.scale  
          scale_res <- c(scale_E)  
        }else{
          scale_res <- NULL
          warning("no FPCs chosen at all")
        }
      }
    }
    
    #################
    # specify sp_fix
    #################
    if(covariate){
      num_smooth <- sum(covariate_form == "smooth") # add -1 for smooth effects
      sp_fix <- c(-1, scale_res * sigmasq, rep(-1, (length(listofbys_cov) + num_smooth)))  
    }else{
      sp_fix <- c(-1, scale_res * sigmasq)  
    }
    
    ##############
    # set cluster
    # if specified
    ##############
    if(para_estim_famm & !use_discrete){
      if(detectCores() > 1){
        nc_use <- min(detectCores(), para_estim_famm_nc)
        if(.Platform$OS.type == "unix"){
          cl_estim <- makeForkCluster(nnodes = nc_use) # only runs on linux
        }else{
          cl_estim <- makeCluster(nc_use) # also runs on windows
        }
      }else{
        cl_estim <- NULL
      }
    }else{
      cl_estim <- NULL
    }
    
    
    ###############
    # estimate famm
    ###############
    if(use_bam_famm){
      if(use_discrete){
        famm_estim <- pffr(pred, sp = sp_fix, yind = my_grid, data = data, ydata = ydata, 
                           algorithm = "bam", control = gam.control(trace = TRUE), method = method, 
                           bs.yindex = bs_y_famm, bs.int = bs_int_famm, discrete = TRUE, nthreads = para_estim_famm_nc)
      }else{
        famm_estim <- pffr(pred, sp = sp_fix, yind = my_grid, data = data, ydata = ydata, 
                           algorithm = "bam", control = gam.control(trace = TRUE), method = method, 
                           bs.yindex = bs_y_famm, bs.int = bs_int_famm, cluster = cl_estim)  
      }
      
      
    }else{
      famm_estim <- pffr(pred, sp = sp_fix, yind = my_grid, data = data, ydata = ydata, 
                         algorithm = "gam", control = gam.control(trace = TRUE), method = method, 
                         bs.yindex = bs_y_famm, bs.int = bs_int_famm)
    }
    
    
    ##########################
    # stop cluster if existing
    ##########################
    if(!is.null(cl_estim)) stopCluster(cl_estim) 
    
    results[["intercept"]] <- famm_estim$coefficients[1]
    results[["sigmasq"]] <- famm_estim$sig2
    
    ###############
    # get residuals
    ###############
    results[["residuals"]] <- famm_estim$residuals
    
    #####################
    # predict in parts
    # to avoid many
    # doubles
    #####################
    if(N_B > 0){
      pred1 <- coef(famm_estim, n2 = length(my_grid))$smterms[[2]]$coef
      pred1_sorted <- cbind(pred1[order(pred1$id_subject.vec), c("id_subject.vec", "value")],
                            use = rep(1:length(my_grid), times = length(unique(curve_info$subject_long))))
      
      pred1_sorted_reshaped <- as.matrix(subset(reshape(pred1_sorted, idvar = "id_subject.vec", direction = "wide", 
                                                        timevar = "use", times = "value", v.names = "value"), 
                                                select = - c(id_subject.vec)),
                                         nrow = length(unique(curve_info$subject_long)), ncol = length(my_grid))
      
      dimnames(pred1_sorted_reshaped)[[1]] <- unique(curve_info$subject_long_orig)
      dimnames(pred1_sorted_reshaped)[[2]] <- NULL
      results[["famm_predict_B"]] <- pred1_sorted_reshaped
    }else{
      results[["famm_predict_B"]] <- NA
    }
    if(N_C > 0){
      if(N_B > 0){
        pred2 <- coef(famm_estim, n2 = length(my_grid))$smterms[[3]]$coef
      }else{
        pred2 <- coef(famm_estim, n2 = length(my_grid))$smterms[[2]]$coef
      }
      pred2_sorted <- cbind(pred2[order(pred2$id_word.vec), c("id_word.vec", "value")],
                            use = rep(1:length(my_grid), times = length(unique(curve_info$word_long))))
      
      pred2_sorted_reshaped <- as.matrix(subset(reshape(pred2_sorted, idvar = "id_word.vec", direction = "wide", 
                                                        timevar = "use", times = "value", v.names = "value"), 
                                                select = - c(id_word.vec)),
                                         nrow = length(unique(curve_info$word_long)), ncol = length(my_grid))
      
      dimnames(pred2_sorted_reshaped)[[1]] <- unique(curve_info$word_long_orig)
      dimnames(pred2_sorted_reshaped)[[2]] <- NULL
      
      results[["famm_predict_C"]] <- pred2_sorted_reshaped
    }else{
      results[["famm_predict_C"]] <- NA
    }
    if(N_E > 0){
      if(N_B > 0){
        if(N_C > 0){
          pred3 <- coef(famm_estim, n2 = length(my_grid))$smterms[[4]]$coef
        }else{
          pred3 <- coef(famm_estim, n2 = length(my_grid))$smterms[[3]]$coef
        }
      }else{
        pred3 <- coef(famm_estim, n2 = length(my_grid))$smterms[[2]]$coef
      }
      pred3_sorted <- cbind(pred3[order(pred3$id_n.vec), c("id_n.vec", "value")],
                            use = rep(1:length(my_grid), times = length(unique(curve_info$n_long))))
      
      pred3_sorted_reshaped <- as.matrix(subset(reshape(pred3_sorted, idvar = "id_n.vec", direction = "wide", 
                                                        timevar = "use", times = "value", v.names = "value"), 
                                                select = - c(id_n.vec)),
                                         nrow = length(unique(curve_info$n_long)), ncol = length(my_grid))
      
      dimnames(pred3_sorted_reshaped)[[1]] <- unique(curve_info$n_long_orig)
      dimnames(pred3_sorted_reshaped)[[2]] <- NULL
      
      results[["famm_predict_E"]] <- pred3_sorted_reshaped
      
    }else{
      results[["famm_predict_E"]] <- NA
    }
    
    ##################
    # get covariate
    # effects and
    # confidence bands
    ##################
    coef_use <- coef(famm_estim, n1 = length(my_grid), n2 = length(my_grid))  
    if(any(covariate_form == "smooth")){
      newdata_smooth_mean <- data.table(id_subject = rep(1, length = length(my_grid)), 
                                        id_word = rep(1, length = length(my_grid)), 
                                        id_n = rep(1, length = length(my_grid)))
      if(covariate){
        for(i in 1:num_covariates){
          if(covariate_form[i] == "by"){
            mean_use <- mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", i)]])
            newdata_smooth_mean[, paste0("covariate.", i) := rep(mean_use, length(my_grid))]    
          }else{
            mean_use <- mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", i)]])
            newdata_smooth_mean[, paste0("covariate.", i) := rep(mean_use, length(my_grid))]  
          }
          
          if(interaction){
            for(k in 1:num_covariates){
              if(which_interaction[i, k] & (i < k)){
                if(all(data[[paste0("covariate.", i)]] %in% c(0, 1)) & all(data[[paste0("covariate.", k)]] %in% c(0, 1))){
                  mean_use <- mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", i)]]) * mean(curve_info[!duplicated(n_long), ][[paste0("covariate.", k)]])
                  newdata_smooth_mean[, paste0("inter_", i, "_", k) := rep(mean_use, length(my_grid))]
                }else{
                  warning("interaction effects are only implemented between dummy covariates")
                }
              }
            }  
          } 
        }
      }
      
      pred_smooth_mean <- predict(famm_estim, type = "iterms", newdata = newdata_smooth_mean)
      results[["pred_smooth_mean"]] <- pred_smooth_mean
    }
    
    coef_use_sm <- coef_use$smterms
    results[["famm_cb_mean"]] <- coef_use_sm[[1]]$coef
    
    if(covariate){
      for(i in 1:num_covariates){
        if(covariate_form[i] == "by"){
          results[[paste0("famm_cb_covariate.", i)]] <- coef_use_sm[[paste0("covariate.", i, "(yindex)")]][["coef"]]
        }
        if(covariate_form[i] == "smooth"){
          results[[paste0("famm_cb_covariate.", i)]] <- coef_use_sm[[paste0("s(covariate.", i, ")")]][["coef"]]
          results[[paste0("famm_cb_covariate.", i, "_mean")]] <- pred_smooth_mean[[paste0("ti(covariate.", i, ", yindex.vec)")]]
        }
        if(interaction){
          for(k in 1:num_covariates){
            if(which_interaction[i, k] & (i < k)){
              if(all(curve_info[[paste0("covariate.", i)]] %in% c(0, 1)) & all(curve_info[[paste0("covariate.", k)]] %in% c(0, 1))){
                results[[paste0("famm_cb_inter_", i, "_", k)]] <- coef_use_sm[[paste0("inter_", i, "_", k, "(yindex)")]][["coef"]]
              }else{
                warning("interaction effects are only implemented between dummy covariates")
              }
            }
          }
        }
      }
    }
    
    ###################################################
    # get original basis weights of FPC-FAMM estimation
    ###################################################
    #######
    # for B
    if(N_B > 0){
      xid <- diag(length(unique(data$id_subject)))
      xef <- phi_B_hat_grid
      xi_B_hat_famm_help <- qr.coef(qr(xid %x% xef), as.vector(t(results[["famm_predict_B"]])))
      xi_B_hat_famm <- t(matrix(xi_B_hat_famm_help, ncol = length(unique(data$id_subject)), nrow = N_B))
      row.names(xi_B_hat_famm) <- unique(curve_info$subject_long_orig)
    }else{
      xi_B_hat_famm <- NA
    }
    results[["xi_B_hat_famm"]] <- xi_B_hat_famm
    
    #######
    # for C
    if(N_C > 0){
      xid <- diag(length(unique(data$id_word)))
      xef <- phi_C_hat_grid
      xi_C_hat_famm_help <- qr.coef(qr(xid %x% xef), as.vector(t(results[["famm_predict_C"]])))
      xi_C_hat_famm <- t(matrix(xi_C_hat_famm_help, ncol = length(unique(data$id_word)), nrow = N_C))
      row.names(xi_C_hat_famm) <- unique(curve_info$word_long_orig)
    }else{
      xi_C_hat_famm <- NA
    }
    results[["xi_C_hat_famm"]] <- xi_C_hat_famm
    
    #######
    # for E
    if(N_E > 0){
      xid <- diag(length(unique(data$id_n)))
      xef <- phi_E_hat_grid
      xi_E_hat_famm_help <- qr.coef(qr(xid %x% xef), as.vector(t(results[["famm_predict_E"]])))
      xi_E_hat_famm <- t(matrix(xi_E_hat_famm_help, ncol = length(unique(data$id_n)), nrow = N_E))
      row.names(xi_E_hat_famm) <- unique(curve_info$n_long_orig)
    }else{
      xi_E_hat_famm <- NA
    }
    results[["xi_E_hat_famm"]] <- xi_E_hat_famm
    
    
    ##################
    # save famm object
    # if specified
    ##################
    if(save_model_famm){
      results[["famm_estim"]] <- famm_estim
    }
    famm_estim <- NULL
  }else{
    warning(" no FPCs chosen at all")
  }
  
  ###############
  # return output
  ###############
  return(results)
}

###################################################################################################