Content - 5b4af4fe9733c39be69c1a38804987321fae1288 - a2933ef/R/selection.R

visit type:
Tip revision: 282a9c980c81d17a8a23daa5e816fd04fe768a60 authored by Max Kuhn on 02 September 2011, 18:25:30 UTC
version 5.01-001
Tip revision: 282a9c9
selection.R


# This function sorts the tuning parameter matrix from
# least complex models to most complex models

byComplexity <- function(x, model)
  {
    switch(model,
           rda =
           {
             # since lds is less complex than qda, we
             # sort on lambda (larger are least complex)
             x[order(-x$lambda, x$gamma),] 
           },
           hda =
           {
             x[order(x$newdim, -x$lambda, x$gamma),] 
           },           
           gbm =
           {
             # This is a toss-up, but the # trees probably adds
             # complexity faster than number of splits
             x[order(x$n.trees, x$interaction.depth, x$shrinkage),] 
           },
           bstTree =
           {
             x[order(x$mstop, x$maxdepth, x$nu),] 
           },
           bstLs =, bstSm = 
           {
             x[order(x$mstop, x$nu),] 
           },            
           rf =, rfNWS =, rfLSF =, parRF =, gpls =, pcr =, pls =, PLS =, pam =, cforest =,
           nb =, rpart =, ctree2 =, logitBoost=, J48 =, LMT =, ppr =, mda =, pda =, pda2 =,
           lars =, lars2 =, Linda =, QdaCov =, icr =, qrf =,Boruta =,
           leapForward=, leapBackward=, leapSeq = 
           {
             x[order(x[,1]),]
           },
           JRip =
           {
             x[order(x[,1], decreasing = TRUE),]
           },
           M5Rules =
           {
             x$pruned <- factor(as.character(x$pruned), levels = c("Yes", "No"))
             x$smoothed <- factor(as.character(x$smoothed), levels = c("Yes", "No"))
             x[order(x$pruned, x$smoothed),]
           },
           M5 =
           {
             x$pruned <- factor(as.character(x$pruned), levels = c("Yes", "No"))
             x$smoothed <- factor(as.character(x$smoothed), levels = c("Yes", "No"))
             x$rules <- factor(as.character(x$rules), levels = c("Yes", "No"))
             x[order(x$pruned, x$smoothed, x$rules),]
           },             
           svmradial =, svmRadial =
           {
             # If the cost is high, the decision boundary will work hard to
             # adapt. Also, if C is fixed, smaller values of sigma yeild more
             # complex boundaries
             x[order(x$C, -x$sigma),]
           },
           svmRadialCost =
           {
             x[order(x$C),]
           },
           svmpoly =, svmPoly =
           {
             x[order(x$degree, x$C, x$scale),]
           },
           svmLinear =
           {
             x[order(x$C),]
           },           
           rvmRadial=, lssvmRadial =, gaussprRadial =
           {
             x[order(-x$sigma),]
           },
           rvmPoly =, lssvmPoly =, gaussprPoly =
           {
             x[order(x$degree, x$scale),]
           },           
           avNNet =, nnet =, pcaNNet =
           {
             x[order(x$size, -x$decay),]
           },
           qrnn =
           {
             x[order(x$n.hidden, -x$penalty),]
           },           
           knn =, multinom =
           {
             x[order(-x[,1]),]
           },
           lvq =
           {
             x[order(-x$k, -x$size),]
           },
           mars =, earth =, fda =, bagEarth =, bagFDA =
           {
             x[order(x$degree, x$nprune),]
           },
           gcvEarth =
           {
             x[order(x$degree),]
           },           
           treebag =, lda =, lm =, sddaLDA =, sddaQDA =,
           glmStepAIC =,lmStepAIC =, slda =, glm =, qda =, OneR =,
           rvmLinear =, lssvmLinear =, gaussprLinear =,
           rlm =, vbmpRadial =, glmrob =, logforest =
           {
             x
           },
           glmboost =, gamboost =
           {
             x[order(x$mstop, x$prune),]
           },
           blackboost =
           {
             x[order(x$mstop, x$maxdepth),]
           },
           ada =
           {
             x[order(x$iter, x$maxdepth, x$nu),]
           },
           ctree =
           {
             # If this is the threshold for splitting, then
             # smaller thresholds means more complexity
             x[order(-x$mincriterion),]
           },
           enet =
           {
             x[order(x$fraction, -x$lambda),]
           },
           ridge =
           {
             x[order(-x$lambda),]
           },           
           lasso =
           {
             x[order(x$fraction),]

           },
           superpc =
           {
             x[order(x$threshold, x$n.components),]
           },
           sparseLDA =
           {
             x[order(x$NumVars, -x$lambda),]
           },
           relaxo =
           {
             x[order(x$phi, -x$lambda),]
           },           
           penalized =
           {
             x[order(x$lambda1, x$lambda2),]
           },
           spls =, splsda = 
           {
             x[order(-x$eta, x$K),]
           },
           smda =
           {
             x[order(x$NumVars, x$R, -x$lambda),]
           },
           nodeHarvest =
           {
             x[order(x$maxinter, x$mode),]
           },
           plr =
           {
             x[order(-x$lambda),]
           },
           partDSA =
           {
             x[order(x$cut.off.growth, x$MPD),]
           },
           bag =
           {
             x[order(x$vars),]
           },
           hdda =
           {
             x[order(-x$threshold),]
           },
           plsGlmBinomial =, plsGlmGaussian =, plsGlmGamma =, plsGlmPoisson =
           {
             x[order(x$nt),]
           },
           cubist =
           {
             x[order(x$committees,  x$neighbors),]
           },
           logreg = x[order(x$ntrees, x$treesize),],
           logicBag = x[order(x$ntrees, x$nleaves),],
           neuralnet = x[order(x$layer1, x$layer2, x$layer3),],
           scrda = x[order(x$alpha, x$delta, decreasing = TRUE),],
           rocc = x[order(x$xgenes),],
           foba = x[order(x$k, -x$lambda),], 
           obliqueTree = x[order(x$variable.selection),],
           PART = x[order(x$pruned, -x$threshold),],
           sda = x[order(x$diagonal),],
           glmnet = x[order(-x$lambda, x$alpha),],
           stepLDA =, stepQDA = x[order(x$maxvar),],
           GAMens = x[order(x$iter, x$rsm_size),],
           gam = x[order(x$select),],
           gamLoess =  x[order(x$degree, -x$span),],
           gamSpline =  x[order(x$df),],
           stop("no sorting routine for this model")
           )

  }

## In these functions, x is the data fram of performance values and tuning parameters.

best <- function(x, metric, maximize)
  {

    bestIter <- if(maximize) which.max(x[,metric])
    else which.min(x[,metric])   

    bestIter
  }

oneSE <- function(x, metric, num, maximize)
  {
    index <- 1:nrow(x)
    
    if(!maximize)
      {
        bestIndex <- which.min(x[,metric])  
        perf <- x[bestIndex,metric] + (x[bestIndex,paste(metric, "SD", sep = "")])/sqrt(num)
        candidates <- index[x[, metric] <= perf]
        bestIter <- min(candidates)
      } else {
        bestIndex <- which.max(x[,metric])  
        perf <- x[bestIndex,metric] - (x[bestIndex,paste(metric, "SD", sep = "")])/sqrt(num)

        candidates <- index[x[, metric] >= perf]
        bestIter <- min(candidates)
      }
    bestIter
  }

tolerance <- function(x, metric, tol = 1.5, maximize)
  {
       
    index <- 1:nrow(x)
    
    if(!maximize)
      {
        best <- min(x[,metric])  
        perf <- (x[,metric] - best)/best * 100
        candidates <- index[perf < tol]
        bestIter <- min(candidates)
      } else {
        best <- max(x[,metric])  
        perf <- (x[,metric] - best)/best * -100
        candidates <- index[perf < tol]
        bestIter <- min(candidates)
      }
    bestIter
  }
Browse the archive

https://github.com/cran/caret