https://github.com/ab4377/dream-project
Raw File
Tip revision: 8e64d9628355fde264b6481f9955478fed995fe5 authored by Avinash Bukkittu on 04 October 2017, 22:20:26 UTC
Merge branch 'master' of https://github.com/ab4377/dream-project
Tip revision: 8e64d96
PD_Challenge_SC1_Fit_Models_forTest.R
# External Call:
# PD_score_challenge1(training_features)
# where training_features is a data frame with the training matrix
# MUST CONTAIN recordId IN 1st COLUMN

library(synapseClient)
library(caret)
library(caretEnsemble)
library(pROC)
library(glmnet)
library(e1071)
library(randomForest)
library(kernlab)
library(nnet)
library(data.table)

synapseLogin()



PD_score_challenge1<-function(training_features){
  #manipulate incoming data into dataframe
  training_features<-as.data.frame(training_features)
  recordidname<-names(training_features)[1]
  featurenames<-names(training_features)[-1]
  training_features[,featurenames] <- sapply( training_features[,featurenames], as.numeric )

  print("Reading and merging covariates")
  #Read-in and merge covariates
  training_features<-download_merge_covariate(training_features)

  print("Summarizing the data")
  #Summarize by Median
  covs_num<-c("age")
  covs_fac<-c("gender")
  groupvariables<-c("healthCode", "medTimepoint", "professional.diagnosis", covs_num, covs_fac) #"age", "gender") #, "appVersion.walktest", "phoneInfo.walktest")

  ensemble_model<-NULL
  
  if(any(is.na(training_features[,featurenames]))){
    print("WARNING: Missing values/records not allowed!")
  } else {
    dttrain<-data.table(training_features)
    mediantraining<-dttrain[, lapply(.SD, median, na.rm=TRUE), by=groupvariables, .SDcols=featurenames ]
    mediantraining<-as.data.frame(mediantraining)
  
    print("Fitting the model")
    #Fit model
    ensemble_model<-fit_model(mediantraining, featurenames, covs_num, covs_fac)
  }
  
  return(ensemble_model)
}



download_merge_covariate<-function(features){
  
  #download correct covariates
  synid<-"syn10233116"

  syndemos<-synGet(synid)
  demos<-read.csv(attributes(syndemos)$filePath, header=T, as.is=T)
  
  # Merge Data
  mergeddata<-cbind(demos, features[match(demos$recordId.walktest, features[,1]), -1])
  if(dim(features)[2]==2){
    names(mergeddata)[dim(mergeddata)[2]]<-names(features)[2]
  }
  
  return(mergeddata)
}

fit_model<-function(training, featurenames, covs_num, covs_fac){
  trainoutcome<-training$professional.diagnosis
  trainoutcome<-factor(ifelse(trainoutcome, "PD", "Control"))
  
  trainfeatures<-training[,c(covs_num, covs_fac,featurenames)]
  trainfeatures[,covs_fac]<-sapply(trainfeatures[,covs_fac], as.factor)
  
  print("Run Control")  
  set.seed(1234)
  myControl <- trainControl(  method="boot",
                              number = 50,
                              savePredictions="final",
                              classProbs=TRUE,
                              index=createResample(trainoutcome, 50),
                              summaryFunction=twoClassSummary )
  
  
  tmpmmatrix<-model.matrix( ~ ., trainfeatures)
  tmpmmatrix<-tmpmmatrix[,-1]
  
  print(dim(tmpmmatrix))
  print(length(trainoutcome))
  print(table(trainoutcome))
  
  print("Caret Call")
  model_list <- caretList(
    y=trainoutcome , x=tmpmmatrix,
    trControl=myControl,
    metric = "ROC",
    tuneList=list(glmnet = caretModelSpec(method = "glmnet"), 
                  rf = caretModelSpec(method="rf"), 
                  svmLinear = caretModelSpec(method="svmLinear"), 
                  knn = caretModelSpec(method="knn"), 
                  nnet = caretModelSpec(method = "nnet", trace=F)),
    continue_on_fail = FALSE, preProc = c("center", "scale")
  )
  
  print("Ensemble run")
  greedy_ensemble <- caretEnsemble(
    model_list, 
    metric="ROC",
    trControl=trainControl(
      number=length(model_list),
      summaryFunction=twoClassSummary,
      classProbs=TRUE
    ))
  return(greedy_ensemble)
}

mydata = read.csv('feature_set_both_3.csv')

back to top