Revision 046813c7361fb51046a7e264bed101ca0099e509 authored by Line Clemmensen on 28 February 2009, 00:00:00 UTC, committed by Gabor Csardi on 28 February 2009, 00:00:00 UTC
0 parent
sda.R
sda <- function (x, ...) UseMethod("sda")
## todo: split the stop arg into two different and have one supercede
## todo: added scaling within the function?
sda.default <- function(x, y, lambda=1e-6, stop, maxIte=100, trace=FALSE, tol=1e-6, ...){
##
## sda performs Sparse Linear Disciminant Analysis
## Solving: argmin{|(y*theta-x*b)|_2^2 + t*|beta|_1 + lambda*|beta|_2^2}
##
## InPUT:
## x : matrix of n observations down the rows and p variable columns. The
## columns are assumed normalized
## y : matrix initializing the dummy variables representing the groups or a factor
## lambda : the weight on the L2-norm for elastic net regression. Default: 1e-6.
## stop : If STOP is negative, its
## absolute value corresponds to the desired number of variables. If STOP
## is positive, it corresponds to an upper bound on the L1-norm of the
## b coefficients. There is a one to one correspondence between stop
## and t.
## maxIte : Maximum number of iterations. Default: 100.
## trace : trace = FALSE turns printing of RSS off and trace = TRUE turns it on.
## tol : Tolerance for the stopping criterion (change in RSS). Default is 1e-6.
##
## OUTPUT:
## $beta : The sparse loadings
## $theta : Optimal scores
## $rss : Residual Sum of Squares at each itearation
##
## Author: Line H. Clemmensen, IMM, DTU, lhc@imm.dtu.dk
## Based on the elastic net algorithm by Hui Zou and Trevor Hastie
##
## this is stright from nnet:::formula
class.ind <- function(cl) {
n <- length(cl)
x <- matrix(0, n, length(levels(cl)))
x[(1:n) + n * (as.vector(unclass(cl)) - 1)] <- 1
dimnames(x) <- list(names(cl), levels(cl))
x
}
if(is.factor(y))
{
classes <- levels(y)
factorY <- y
y <- class.ind(y)
} else {
classes <- colnames(y)
factorY <- factor(colnames(y)[apply(y, 1, which.max)])
}
if(!is.matrix(x)) x <- as.matrix(x)
predNames <- colnames(x)
n <- dim(x)[1]
p <- dim(x)[2]
K <- dim(y)[2]
RSS <- 1e6
RSSold <- Inf
ite <- 0
Dpi <- t(y)%*%y/n ## diagonal matrix of class priors
Dpi_inv <- diag(1/sqrt(diag(Dpi)))
theta <- 1/sum(diag(Dpi))*diag(rep(1,K))[,1:K-1]/K
Ytheta <- y%*%theta
if (length(stop)< (K-1)){
stop <- rep(stop[1],1,K-1)
}
if (stop[1]<0) sparse <- "varnum" else sparse <- "penalty"
Yhat <- matrix(0,n,K-1)
b <- matrix(0,p,K-1)
rss <- rep(0,maxIte)
while (abs(RSSold-RSS)/RSS > tol & ite < maxIte){
RSSold <- RSS
ite <- ite + 1
## 1. Estimate beta:
for (j in 1:(K-1)){
Yc <- Ytheta[,j]
beta<- solvebeta(x, Yc, paras=c(lambda, abs(stop[j])),sparse=sparse)
b[,j] <- t(beta)
Yhat[,j] <- x%*%b[,j]
}
## 2. Optimal scores: (balanced Procrustes problem)
B <- t(y)%*%Yhat
sb <- svd(B,nu=K-1,nv=K-1)
theta.old <- theta
theta <- Dpi_inv%*%sb$u%*%t(sb$v)
Ytheta <- y%*%theta
RSS <- sum((Ytheta-Yhat)*(Ytheta-Yhat))
rss[ite] <- RSS
if (trace){
cat('ite: ', ite, ' RSS: ', RSS,'\n')
}
}
rss <- rss[1:ite]
for (j in 1:(K-1)){
Yc <- Ytheta[,j]
beta<- solvebeta(x, Yc, paras=c(lambda, abs(stop[j])),sparse=sparse)
b[,j] <- t(beta)
Yhat[,j] <- x%*%b[,j]
}
if (trace){
RSS <- sum((Ytheta-Yhat)*(Ytheta-Yhat))
cat('final update, RSS: ', RSS,'\n')
}
notZero <- apply(b, 1, function(x) any(x != 0))
b <- b[notZero,]
origP <- ncol(x)
x <- x[, notZero, drop = FALSE]
varNames <- colnames(x)
sl <- x %*% b
colnames(sl) <- paste("score", 1:ncol(sl), sep = "")
lobj<-lda(sl,factorY, ...)
## todo save fitted and probs (and data?) for predict
structure(
list(call = match.call(),
beta = b,
theta = theta,
varNames = varNames,
varIndex = which(notZero),
origP = origP,
rss = rss[1:ite],
fit = lobj,
classes = classes,
lambda = lambda,
stop = stop),
class = "sda")
}
print.sda <- function(x, digits = max(3, getOption("digits") - 3), ...)
{
cat("\nCall:\n", deparse(x$call), "\n\n", sep = "")
cat("lambda =", format(x$lambda, digits = digits),
"\nstop =", format(x$stop, digits = digits),
"\n\n")
top <- if(!is.null(x$varNames)) x$varNames else paste("Predictor", x$varIndex, sep = "")
varOrder <- if(is.matrix(x$beta)) order(apply(abs(x$beta), 1, sum)) else order(abs(x$beta))
top <- top[varOrder]
top <- top[1:min(5, length(top))]
top <- paste(top, collapse = ", ")
if(length(x$beta) > 5)
{
cat("Top 5 predictors (out of ",
length(x$varIndex),
"):\n\t",
top,
sep = "")
} else {
cat("Predictors:\n\t",
top,
"\n",
sep = "")
}
invisible(x)
}
predict.sda <- function(object, newdata = NULL, ...)
{
if(!is.matrix(newdata)) newdata <- as.matrix(newdata)
if(!is.null(object$varNames))
{
newdata <- newdata[, object$varNames, drop = FALSE]
} else {
if(ncol(newdata) != object$origP) stop("dimensions of training and testing X different")
newdata <- newdata[, object$varIndex, drop = FALSE]
}
x <- newdata %*% object$beta
predict(object$fit, newdata = x, ...)
}
## todo: plot and summary
Computing file changes ...