sda <- function (x, ...) UseMethod("sda") ## todo: split the stop arg into two different and have one supercede ## todo: added scaling within the function? sda.default <- function(x, y, lambda=1e-6, stop, maxIte=100, trace=FALSE, tol=1e-6, ...){ ## ## sda performs Sparse Linear Disciminant Analysis ## Solving: argmin{|(y*theta-x*b)|_2^2 + t*|beta|_1 + lambda*|beta|_2^2} ## ## InPUT: ## x : matrix of n observations down the rows and p variable columns. The ## columns are assumed normalized ## y : matrix initializing the dummy variables representing the groups or a factor ## lambda : the weight on the L2-norm for elastic net regression. Default: 1e-6. ## stop : If STOP is negative, its ## absolute value corresponds to the desired number of variables. If STOP ## is positive, it corresponds to an upper bound on the L1-norm of the ## b coefficients. There is a one to one correspondence between stop ## and t. ## maxIte : Maximum number of iterations. Default: 100. ## trace : trace = FALSE turns printing of RSS off and trace = TRUE turns it on. ## tol : Tolerance for the stopping criterion (change in RSS). Default is 1e-6. ## ## OUTPUT: ## $beta : The sparse loadings ## $theta : Optimal scores ## $rss : Residual Sum of Squares at each itearation ## ## Author: Line H. Clemmensen, IMM, DTU, lhc@imm.dtu.dk ## Based on the elastic net algorithm by Hui Zou and Trevor Hastie ## ## this is stright from nnet:::formula class.ind <- function(cl) { n <- length(cl) x <- matrix(0, n, length(levels(cl))) x[(1:n) + n * (as.vector(unclass(cl)) - 1)] <- 1 dimnames(x) <- list(names(cl), levels(cl)) x } if(is.factor(y)) { classes <- levels(y) factorY <- y y <- class.ind(y) } else { classes <- colnames(y) factorY <- factor(colnames(y)[apply(y, 1, which.max)]) } if(!is.matrix(x)) x <- as.matrix(x) predNames <- colnames(x) n <- dim(x)[1] p <- dim(x)[2] K <- dim(y)[2] RSS <- 1e6 RSSold <- Inf ite <- 0 Dpi <- t(y)%*%y/n ## diagonal matrix of class priors Dpi_inv <- diag(1/sqrt(diag(Dpi))) theta <- 1/sum(diag(Dpi))*diag(rep(1,K))[,1:K-1]/K Ytheta <- y%*%theta if (length(stop)< (K-1)){ stop <- rep(stop[1],1,K-1) } if (stop[1]<0) sparse <- "varnum" else sparse <- "penalty" Yhat <- matrix(0,n,K-1) b <- matrix(0,p,K-1) rss <- rep(0,maxIte) while (abs(RSSold-RSS)/RSS > tol & ite < maxIte){ RSSold <- RSS ite <- ite + 1 ## 1. Estimate beta: for (j in 1:(K-1)){ Yc <- Ytheta[,j] beta<- solvebeta(x, Yc, paras=c(lambda, abs(stop[j])),sparse=sparse) b[,j] <- t(beta) Yhat[,j] <- x%*%b[,j] } ## 2. Optimal scores: (balanced Procrustes problem) B <- t(y)%*%Yhat sb <- svd(B,nu=K-1,nv=K-1) theta.old <- theta theta <- Dpi_inv%*%sb$u%*%t(sb$v) Ytheta <- y%*%theta RSS <- sum((Ytheta-Yhat)*(Ytheta-Yhat)) rss[ite] <- RSS if (trace){ cat('ite: ', ite, ' RSS: ', RSS,'\n') } } rss <- rss[1:ite] for (j in 1:(K-1)){ Yc <- Ytheta[,j] beta<- solvebeta(x, Yc, paras=c(lambda, abs(stop[j])),sparse=sparse) b[,j] <- t(beta) Yhat[,j] <- x%*%b[,j] } if (trace){ RSS <- sum((Ytheta-Yhat)*(Ytheta-Yhat)) cat('final update, RSS: ', RSS,'\n') } notZero <- apply(b, 1, function(x) any(x != 0)) b <- b[notZero,] origP <- ncol(x) x <- x[, notZero, drop = FALSE] varNames <- colnames(x) sl <- x %*% b colnames(sl) <- paste("score", 1:ncol(sl), sep = "") lobj<-lda(sl,factorY, ...) ## todo save fitted and probs (and data?) for predict structure( list(call = match.call(), beta = b, theta = theta, varNames = varNames, varIndex = which(notZero), origP = origP, rss = rss[1:ite], fit = lobj, classes = classes, lambda = lambda, stop = stop), class = "sda") } print.sda <- function(x, digits = max(3, getOption("digits") - 3), ...) { cat("\nCall:\n", deparse(x$call), "\n\n", sep = "") cat("lambda =", format(x$lambda, digits = digits), "\nstop =", format(x$stop, digits = digits), "\n\n") top <- if(!is.null(x$varNames)) x$varNames else paste("Predictor", x$varIndex, sep = "") varOrder <- if(is.matrix(x$beta)) order(apply(abs(x$beta), 1, sum)) else order(abs(x$beta)) top <- top[varOrder] top <- top[1:min(5, length(top))] top <- paste(top, collapse = ", ") if(length(x$beta) > 5) { cat("Top 5 predictors (out of ", length(x$varIndex), "):\n\t", top, sep = "") } else { cat("Predictors:\n\t", top, "\n", sep = "") } invisible(x) } predict.sda <- function(object, newdata = NULL, ...) { if(!is.matrix(newdata)) newdata <- as.matrix(newdata) if(!is.null(object$varNames)) { newdata <- newdata[, object$varNames, drop = FALSE] } else { if(ncol(newdata) != object$origP) stop("dimensions of training and testing X different") newdata <- newdata[, object$varIndex, drop = FALSE] } x <- newdata %*% object$beta predict(object$fit, newdata = x, ...) } ## todo: plot and summary