Revision 215dc19479aad7da6918390886e04de4fd2194ca authored by didacvp on 28 December 2020, 12:12:50 UTC, committed by GitHub on 28 December 2020, 12:12:50 UTC
1 parent de18867
RandomHyperParameterSearchCV.R
args = commandArgs(TRUE)
eta=as.numeric(args[1])
max_depth=as.numeric(args[2])
gamma=as.numeric(args[3])
min_child_weight=as.numeric(args[4])
nrounds=as.numeric(args[5])
i=as.numeric(args[6])
idx=as.numeric(args[7])
data_folder=as.character(args[8])
sex_split=try(as.logical(args[9]))
HyperParameterSearchCV = function(eta, max_depth, gamma, min_child_weight, nrounds, i, idx, data_folder, sex_split) {
if(missing(sex_split)) { sex_split = F}
print(idx)
basefolder="/cluster/projects/p274/projects/p024-modes_of_variation"
data_folder=file.path(basefolder, data_folder)
.libPaths()
list.of.packages = c("dplyr", "xgboost", "caret")
new.packages = list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, repos = "file://tsd/shared/R/cran")
lapply(list.of.packages, require, character.only = T)
load(file.path(data_folder, "vars.Rda"))
load(file.path(data_folder,"All_raw.Rda"))
load(file.path(data_folder,"All_preproc.Rda"))
df.Train = list()
data.train = list()
label.train = list()
if(sex_split == T) {
jj = sort(unique(df$sex)) # 0 and 1s
for (j in jj) {
df.Train[[j+1]] <- df %>% filter(!eid %in% subs.long & sex == j)
data.train[[j+1]] = df.Train[[j+1]][, T1w_vars] %>% as.matrix()
label.train[[j+1]] = df.Train[[j+1]]$age %>% as.matrix()
}
} else {
df.Train[[1]] <- df %>% filter(!eid %in% subs.long)
data.train[[1]] = df.Train[[1]][, T1w_vars] %>% as.matrix()
label.train[[1]] = df.Train[[1]]$age %>% as.matrix()
}
if(sex_split == T) {nfold = 5} else {nfold = 10}
params = list(booster = "gbtree",
objective = "reg:squarederror",
eta = eta,
max_depth=max_depth,
gamma = gamma,
min_child_weight = min_child_weight)
train = Nrsme = rmse = c()
for(j in 1:length(data.train)) {
xgbcv <- xgb.cv( params = params,
data = data.train[[j]],
label = label.train[[j]],
nrounds = nrounds,
nfold = nfold,
showsd = T,
stratified = T,
print_every_n = 30,
early_stop_round = 10,
maximize = F)
rmse = c(rmse, min(xgbcv$evaluation_log$test_rmse_mean))
Nrsme = c(Nrsme, which.min(xgbcv$evaluation_log$test_rmse_mean))
train = c(train, min(xgbcv$evaluation_log$train_rmse_mean))
}
print("loading file")
load(file.path(data_folder,"RandomHyperParameterSearchCV.Rda"))
print("including output in data.frame")
print(dim(xgb_grid_1))
print(xgb_grid_1[1,])
xgb_grid_1$rmse[idx]=mean(rmse)
xgb_grid_1$Nrmse[idx]=mean(Nrsme)
xgb_grid_1$idx[idx] = i
xgb_grid_1$train[idx]=mean(train)
print("saving file")
Sys.sleep(1)
print(xgb_grid_1[idx,])
save("xgb_grid_1",
file = file.path(data_folder,"RandomHyperParameterSearchCV.Rda"))
Sys.sleep(1)
}
HyperParameterSearchCV(eta, max_depth, gamma, min_child_weight, nrounds, i,idx, data_folder, sex_split)
Computing file changes ...