swh:1:snp:ffdd0a7d2c8ea15ad41d45b3b178f668bd942287
Tip revision: 9a82fa3773bc49c10018153abbbc483df7f2004b authored by Derek Young on 30 March 2012, 00:00:00 UTC
version 0.4.5
version 0.4.5
Tip revision: 9a82fa3
npMSL.R
## nonparametric algorithm for Smoothed Likelihood Maximization
## implementing block structure
npMSL <- function(x, mu0, blockid = 1:ncol(x),
bw=bw.nrd0(as.vector(as.matrix(x))), samebw = TRUE,
h=bw, eps=1e-8,
maxiter=500, ngrid=200, verb = TRUE){
bw <- h # h is alternative bandwidth argument, for backward compatibility
x <- as.matrix(x)
n <- nrow(x) # number of subjects
r <- ncol(x) # number of measurements in each subject
u <- match(blockid, unique(blockid))
B <- max(u) # nb of blocks
BlS <- rep(0,B) # block sizes = C_ell in JCGS paper
for (ell in 1:B) {
BlS[ell] <- sum(u == ell)}
if (is.matrix(mu0)) # mu0=centers
m <- dim(mu0)[1] else m <- mu0 # mu0=number of clusters
if(!samebw && !is.matrix(bw)) {
bw <- matrix(bw, nrow=max(u), ncol=m)
}
z.hat <- matrix(0, nrow = n, ncol = m)
tt0 <- proc.time() # for total time
## Initial Values
if(m == 1) z.hat <- matrix(1, nrow = n, ncol = m) else {
kmeans <- kmeans(x, mu0)
for(j in 1:m)
z.hat[kmeans$cluster==j, j] <- 1
}
iter <- 0
finished <- FALSE
lambda <- matrix(0, nrow = maxiter, ncol = m)
loglik <- NULL
total_udfl <- 0; total_nan <- 0 # for internal checks
tmp <- 1:n
xtra <- (max(x)-min(x))/10
grid <- seq(min(x)-xtra, max(x)+xtra, length=ngrid)
# f stored on a ngrid by m by B array
# f_{g,j,ell} = f_{j ell}(u_g)
# f <- array(1/m/diff(grid[1:2]), c(ngrid, m, B))
# this f was not normalized for being uniform over grid
Delta <- diff(grid[1:2])
f <- array(1/((ngrid-1)*Delta), c(ngrid, m, B))
oldloglik <- -Inf
while (!finished) {
iter <- iter + 1
bw.old <- bw
t0 <- proc.time()
nb_udfl=0; # nb of underflows log(0) cancelled in nems_Estep_v3.c
nb_nan=0; # nb of nonzero K()*log(0) cancelled in nems_Estep_v3.c
## Note: Enter loop assuming E-step is done -- i.e., z.hat in place
## M-Step
lambda[iter, ] <- colMeans(z.hat)
## density estimation M-step
z=.C("npMSL_Mstep", as.integer(ngrid), as.integer(n),
as.integer(m), as.integer(r),
as.integer(B), as.integer(BlS), as.integer(u),
as.double(bw), as.double(x), as.double(grid),
new.f=as.double(f),
as.double(lambda[iter,]),
as.double(z.hat),
PACKAGE="mixtools")
f <- array(z$new.f, c(ngrid, m, B)) # check sum(f == 0)
# print(apply(f,2:3,sum) * Delta)
# print(max(abs(f-f2)))
# browser()
## E-step (for next iteration)
z=.C("npMSL_Estep", as.integer(ngrid), as.integer(n),
as.integer(m), as.integer(r),
as.integer(B), as.integer(u),
as.double(bw),
as.double(x), as.double(grid), f=as.double(f),
as.double(lambda[iter,]), post=as.double(z.hat),
loglik = double(1),
nb_udfl = as.integer(nb_udfl), nb_nan = as.integer(nb_nan),
PACKAGE="mixtools")
nb_udfl = z$nb_udfl; nb_nan = z$nb_nan;
total_udfl <- total_udfl + nb_udfl
total_nan <- total_nan + nb_nan
z.hat <- matrix(z$post, n, m)
if (sum(is.nan(z.hat)) > 0) cat("Error!! NaN in z.hat") # obsolete ?
loglik <- z$loglik
loglikchange <- loglik - oldloglik
oldloglik <- loglik
finished <- iter >= maxiter
if (iter>1 && max(abs(lambda[iter, ] - lambda[iter-1, ])) < eps)
finished <- TRUE
if (verb) {
t1 <- proc.time()
cat("iteration", iter, ": lambda ", round(lambda[iter, ], 4))
cat(" obj change", round(loglikchange,4))
cat(" time", (t1 - t0)[3])
if (nb_udfl > 0) cat("average underflows=", nb_udfl/(n*m*r)," ")
if (nb_nan >0) cat("average NaNs=", nb_nan/(n*m*r))
# Note: these average mean nb of nan over ngrid convolution
cat("\n")
}
}
# f <- array(z$f, c(ngrid, m, r)) # obsolete in block version
if (verb) {
tt1 <- proc.time()
cat("lambda ", round(lambda[iter, ], 4))
cat(", total time", (tt1 - tt0)[3], "s\n")
}
return(structure(list(data = x, posteriors = z.hat,
lambda = lambda[1:iter,], bandwidth = bw,
blockid = u, lambdahat = lambda[iter,], f=f,
grid = grid, loglik = loglik,
meanUdfl = total_udfl/(n*m*r*iter),# average underflow
meanNaN = total_nan/(n*m*r*iter)), # average nan
class="npEM")) # define a "NEMS" class ?
}