FAMD_iterative.R
#remove.packages("FactoMineR")
#install.packages('FactoMineR', dependencies = TRUE)
#install.packages("Rcpp", repos="https://rcppcore.github.io/drat")
library(FactoMineR)
library(factoextra)
library(jsonlite)
myArgs <- commandArgs(trailingOnly = TRUE)
data <- read.csv(myArgs)
########################################################################
# get contributing and not contributing variables from dataframe
get_contributing_variables <- function(data_frame_var_contrib) {
col_names_var_contrib <- colnames(data_frame_var_contrib)
contributing_variables <- c()
not_contributing_variables <- c()
index <- 1
sum_contri <- 0
for (val in data_frame_var_contrib[1,]){
sum_contri <- sum_contri + abs(val)
if (abs(val) > 100/length(col_names_var_contrib)) {
contributing_variables <- append(contributing_variables, col_names_var_contrib[index])
} else {
not_contributing_variables <- append(not_contributing_variables, col_names_var_contrib[index])
}
index <- index +1
}
df_contributing_variables_from_wine <- as.data.frame(data[,contributing_variables])
if (length(not_contributing_variables) > 0) {
df_NOT_contributing_variables_from_wine <- as.data.frame(data[,not_contributing_variables])
return (list(df_contributing_variables_from_wine, df_NOT_contributing_variables_from_wine))
} else {
return (list(df_contributing_variables_from_wine, NULL))
}
}
########################################################################
#run FAMD
run_FAMD <- function(data_frame, index) {
data_types <- sapply(data_frame, class)
run_type <- 'MCA'
if ((any(data_types=="integer") || any(data_types == "numeric")) && (any(data_types == "factor") || any(data_types == "character"))) {
run_type <- 'famd'
} else if ((any(data_types=="integer") || any(data_types == "numeric")) && (!any(data_types == "factor") || !any(data_types == "character"))) {
run_type <- 'PCA'
}
if(run_type == 'famd' || run_type == 'MCA') {
################################
# do FAMD in case of qualitative and quantitative data
res.famd <- FAMD(data_frame, graph = FALSE)
var <- get_famd_var(res.famd)
# print(res.famd$eig)
var_contrib <- res.famd[["var"]][["contrib"]]
# Contribution to the first dimension
# print(fviz_contrib(res.famd, "var", axes = 1))
# print(fviz_contrib(res.famd, "var", axes = 2))
data_frame_var_contribution <- as.data.frame(t(var_contrib))
return(list(data_frame_var_contribution, res.famd$eig, res.famd$ind$coord, res.famd$var$coord, index))
} else if (run_type == 'PCA') {
##########################
# in case of only numerical values use PCA
res.famd = PCA(data_frame, scale.unit=TRUE, ncp=5, graph=FALSE)
var <- res.famd$var
# print(res.famd$eig)
# print(fviz_contrib(res.famd, "var", axes = 1))
# print(fviz_contrib(res.famd, "var", axes = 2))
var_contrib <- var$contrib
data_frame_var_contribution <- as.data.frame(t(var_contrib))
return(list(data_frame_var_contribution, res.famd$eig, res.famd$ind$coord, res.famd$var$coord, index))
}
}
get_famds <- function(data, index) {
if (length(colnames(data))<=1) {
# not enough dimensions for a meaningful FAMD
return(list(colnames(data)))
}
contribution_of_dimensions_and_eigen_value <- run_FAMD(data, index)
eigenvalue <- contribution_of_dimensions_and_eigen_value[[2]]
if (eigenvalue[1][1] > 1) {
contribution_of_dimensions <- contribution_of_dimensions_and_eigen_value[[1]]
contributing_and_not_contributing <- get_contributing_variables(contribution_of_dimensions)
INITIAL_contributing <- contributing_and_not_contributing[[1]]
INITIAL_not_contributing <- contributing_and_not_contributing[[2]]
if (length(INITIAL_contributing) <2) {
return(list(colnames(data)))
}
INITIAL_contributing_PC_FINAL <- run_FAMD(INITIAL_contributing, index)
return(c(Recall(INITIAL_not_contributing, index + 1), list(INITIAL_contributing_PC_FINAL)))
} else {
return(list(colnames(data)))
}
}
list_final_contributing <- get_famds(data, 1)
cat(jsonlite::toJSON(list_final_contributing, pretty=TRUE))