## nonparametric algorithm for Smoothed Likelihood Maximization 
## implementing block structure
#npMSL.old <- function(x, mu0, blockid = 1:ncol(x),
#                       bw=bw.nrd0(as.vector(as.matrix(x))), samebw = TRUE,
#                       h=bw, eps=1e-8,
#                       maxiter=500, ngrid=200, verb = TRUE){
#  bw <- h # h is alternative bandwidth argument, for backward compatibility
#  x <- as.matrix(x)
#  n <- nrow(x)      # number of subjects
#  r <- ncol(x)      # number of measurements in each subject

#  u <- match(blockid, unique(blockid))
#  B <- max(u) 		# nb of blocks
#  BlS <- rep(0,B)	# block sizes = C_ell in JCGS paper
#  for (ell in 1:B) {
#      BlS[ell] <- sum(u == ell)}

#  if (is.matrix(mu0)) # mu0=centers
#    m <- dim(mu0)[1]  else m <- mu0  # mu0=number of clusters
    
#  if(!samebw && !is.matrix(bw)) {
#    bw <- matrix(bw, nrow=max(u), ncol=m)
#  }
#  z.hat <- matrix(0, nrow = n, ncol = m)
#  tt0 <-  proc.time() # for total time
  ## Initial Values
#  if(m == 1) z.hat <- matrix(1, nrow = n, ncol = m) else {
#    kmeans <- kmeans(x, mu0)
#    for(j in 1:m)
#      z.hat[kmeans$cluster==j, j] <- 1
#  }
#  iter <- 0
#  finished <- FALSE
#  lambda <- matrix(0, nrow = maxiter, ncol = m)
#  loglik <- NULL
#  total_udfl <- 0; total_nan <- 0 # for internal checks

#  tmp <- 1:n
#  xtra <- (max(x)-min(x))/10
#  grid <- seq(min(x)-xtra, max(x)+xtra, length=ngrid)
  # f stored on a ngrid by m by B array 
  # f_{g,j,ell} = f_{j ell}(u_g)
  # f <- array(1/m/diff(grid[1:2]), c(ngrid, m, B)) 
  # this f was not normalized for being uniform over grid 
  	
#  Delta <- diff(grid[1:2])
  
#  f <- array(1/((ngrid-1)*Delta), c(ngrid, m, B))
#  oldloglik <- -Inf

#  while (!finished) {
#    iter <- iter + 1
#    bw.old <- bw
#    t0 <- proc.time()
#    nb_udfl=0;  # nb of underflows log(0) cancelled in nems_Estep_v3.c
#    nb_nan=0;  # nb of nonzero K()*log(0) cancelled in nems_Estep_v3.c

    ## Note:  Enter loop assuming E-step is done -- i.e., z.hat in place    
    ## M-Step
#    lambda[iter, ] <- colMeans(z.hat)
    ## density estimation M-step   
#    z=.C("npMSL_Mstep", as.integer(ngrid), as.integer(n),
#       as.integer(m), as.integer(r),
#       as.integer(B), as.integer(BlS), as.integer(u),
#       as.double(bw), as.double(x), as.double(grid),
#       new.f=as.double(f),
#       as.double(lambda[iter,]),
#       as.double(z.hat),
#       PACKAGE="mixtools")
       
#    f <- array(z$new.f, c(ngrid, m, B)) # check sum(f == 0)
## print(apply(f,2:3,sum) * Delta)
## print(max(abs(f-f2)))
##    browser()

    ## E-step (for next iteration)
#    z=.C("npMSL_Estep", as.integer(ngrid), as.integer(n),
#       as.integer(m), as.integer(r),
#       as.integer(B), as.integer(u),
#       as.double(bw),
#       as.double(x), as.double(grid), f=as.double(f),
#       as.double(lambda[iter,]), post=as.double(z.hat),
#       loglik = double(1),
#       nb_udfl = as.integer(nb_udfl), nb_nan = as.integer(nb_nan),
#       PACKAGE="mixtools")

#    nb_udfl = z$nb_udfl; nb_nan = z$nb_nan;
#    total_udfl <- total_udfl + nb_udfl
#    total_nan <- total_nan + nb_nan
    
#    z.hat <- matrix(z$post, n, m)
#    if (sum(is.nan(z.hat)) > 0) cat("Error!! NaN in z.hat") # obsolete ?
#    loglik <- z$loglik
#    loglikchange <- loglik - oldloglik
#    oldloglik <- loglik
#    finished <- iter >= maxiter
#    if (iter>1 && max(abs(lambda[iter, ] - lambda[iter-1, ])) < eps)
#      finished <- TRUE
#    if (verb) {
#      t1 <- proc.time()
#      cat("iteration", iter, ": lambda ", round(lambda[iter, ], 4))
#      cat(" obj change", round(loglikchange,4))
#      cat(" time", (t1 - t0)[3])
#      if (nb_udfl > 0) cat("average underflows=", nb_udfl/(n*m*r)," ")
#      if (nb_nan >0) cat("average NaNs=", nb_nan/(n*m*r))
# Note: these average mean nb of nan over ngrid convolution
#      cat("\n")
#    }
#  }
## f <- array(z$f, c(ngrid, m, r)) # obsolete in block version

#  if (verb) {
#    tt1 <- proc.time()
#    cat("lambda ", round(lambda[iter, ], 4))
#    cat(", total time", (tt1 - tt0)[3], "s\n")
#  }
#  return(structure(list(data = x, posteriors = z.hat,
#                        lambda = lambda[1:iter,], bandwidth = bw,
#                        blockid = u, lambdahat = lambda[iter,], f=f,
#                        grid = grid, loglik = loglik,
#			meanUdfl = total_udfl/(n*m*r*iter),# average underflow
#			meanNaN = total_nan/(n*m*r*iter)), # average nan
#                    class="npEM")) # define a "NEMS" class ?
#}


################################################################ 
################################################################ 
# development of version with adaptive bandwidth
################################################################ 
################################################################ 
npMSL <- function(x, mu0, blockid = 1:ncol(x),
                       bw=bw.nrd0(as.vector(as.matrix(x))), samebw = TRUE,
                       h=bw, eps=1e-8,
                       maxiter=500, ngrid=200, verb = TRUE){
  bw <- h # h is alternative bandwidth argument, for backward compatibility
  x <- as.matrix(x)
  n <- nrow(x)      # number of subjects
  r <- ncol(x)      # number of measurements in each subject
  u <- match(blockid, unique(blockid))
  B <- max(u) 		# nb of blocks
  BlS <- rep(0,B)	# block sizes = C_ell in JCGS paper
  for (ell in 1:B) {
      BlS[ell] <- sum(u == ell)}      
      
  if (is.matrix(mu0)) # mu0=centers
    m <- dim(mu0)[1]  else m <- mu0  # mu0=number of clusters
    
  if(!samebw && !is.matrix(bw)) { # create initial bandwidth matrix 
    bw <- matrix(bw, nrow=max(u), ncol=m)  
  }
  z.hat <- matrix(0, nrow = n, ncol = m)
  tt0 <-  proc.time() # for total time
  ## Initial Values
  if(m == 1) z.hat <- matrix(1, nrow = n, ncol = m) else {
    kmeans <- kmeans(x, mu0)
    for(j in 1:m)
      z.hat[kmeans$cluster==j, j] <- 1
  }
  iter <- 0
  finished <- FALSE
  lambda <- matrix(0, nrow = maxiter, ncol = m)
  loglik <- NULL
  total_udfl <- 0; total_nan <- 0 # eventual NaN and underflow in C code

  tmp <- 1:n
  xtra <- (max(x)-min(x))/10
  grid <- seq(min(x)-xtra, max(x)+xtra, length=ngrid)
  # f stored on a ngrid by m by B array 
  # f_{g,j,ell} = f_{j ell}(u_g)
  # f <- array(1/m/diff(grid[1:2]), c(ngrid, m, B)) 
  # this f was not normalized for being uniform over grid 
  Delta <- diff(grid[1:2])  
  f <- array(1/((ngrid-1)*Delta), c(ngrid, m, B)) 
  oldloglik <- -Inf

  orderx <- xx <- list() # preparation for adaptive bandwidth
  for(k in 1:B) {
    xx[[k]] <- as.vector(x[, u==k])
    if (!samebw) {
      orderx[[k]] = order(xx[[k]]) # only needed for IQR calculation for bw
    }
  }
  CftEstep <- ifelse(samebw, "npMSL_Estep", "npMSL_Estep_bw")
  # CftEstep <- "npMSL_Estep_bw" # temporary, for testing only the M-step  
  CftMstep <- ifelse(samebw, "npMSL_Mstep", "npMSL_Mstep_bw")
  
  while (!finished) {
    iter <- iter + 1
    bw.old <- bw	# is this needed?
    t0 <- proc.time()
    nb_udfl=0;  # nb of underflows log(0) cancelled in nems_Estep.c
    nb_nan=0;  # nb of nonzero K()*log(0) cancelled in nems_Estep.c

    ## Note:  Enter loop assuming E-step is done -- i.e., z.hat in place    
    ## M-Step
    lambda[iter, ] <- colMeans(z.hat)
    
    ## density estimation in M-step: WKDE-step  
      cs <- colSums(z.hat)
      z.tmp <- sweep(z.hat, 2, cs, "/")
      z.tmp[, cs==0] <- 1/NROW(z.tmp) # Just in case

    ## adaptive bandwidth update
    for (ell in 1:B) {
      r2 <- BlS[ell]	# block size = nb of coordinates
      if (!samebw) {
        wts <- apply(z.tmp, 2, function(z) rep(z/r2, r2))
        variances <- colSums(wts*outer(xx[[ell]], colSums(wts*xx[[ell]]),
        			'-')^2)
        iqr <- apply(as.matrix(wts[orderx[[ell]],]), 2, wIQR, 
			xx[[ell]][orderx[[ell]]],
			already.sorted=TRUE, already.normalized=TRUE)
        h <- bw[ell, ] <- 0.9 * pmin(sqrt(variances), iqr/1.34) * 
                   pmax(1,r2*n*lambda[iter, ])^(-1/5) 
                   # Note:  Doesn't allow "sample size" < 1.
#      browser()
      }
    }
        
    z=.C(CftMstep, as.integer(ngrid), as.integer(n),
       as.integer(m), as.integer(r), 
       as.integer(B), as.integer(BlS), as.integer(u),
       as.double(bw), as.double(x), as.double(grid), 
       new.f=as.double(f), 
       as.double(lambda[iter,]), 
       as.double(z.hat))
       
    f <- array(z$new.f, c(ngrid, m, B)) # check sum(f == 0)
# print(apply(f,2:3,sum) * Delta)
# print(max(abs(f-f2)))
#    browser()

    ## E-step (for next iteration)
    z=.C(CftEstep, as.integer(ngrid), as.integer(n),
       as.integer(m), as.integer(r), 
       as.integer(B), as.integer(u),
       as.double(bw),
       as.double(x), as.double(grid), f=as.double(f),
       as.double(lambda[iter,]), post=as.double(z.hat),
       loglik = double(1),
       nb_udfl = as.integer(nb_udfl), nb_nan = as.integer(nb_nan))

    nb_udfl = z$nb_udfl; nb_nan = z$nb_nan; 
    total_udfl <- total_udfl + nb_udfl
    total_nan <- total_nan + nb_nan
    
    z.hat <- matrix(z$post, n, m)
    if (sum(is.nan(z.hat)) > 0) cat("Error!! NaN in z.hat") # obsolete ?
    loglik <- z$loglik
    loglikchange <- loglik - oldloglik
    oldloglik <- loglik
    finished <- iter >= maxiter
    if (iter>1 && max(abs(lambda[iter, ] - lambda[iter-1, ])) < eps)
      finished <- TRUE
    if (verb) {
      t1 <- proc.time()
      cat("iteration", iter, ": lambda ", round(lambda[iter, ], 4))
      cat(" obj change", round(loglikchange,4))
      cat(" time", (t1 - t0)[3])
      if ((nb_udfl > 0) || (nb_nan >0)) cat("\n  ")
      if (nb_udfl > 0) cat("average underflows=", round(nb_udfl/(n*m*r),3)," ")
      if (nb_nan >0) cat("average NaNs=", round(nb_nan/(n*m*r),3))
# Note: these average mean nb of nan over ngrid convolution
      cat("\n")
    }
  }
  # f <- array(z$f, c(ngrid, m, r)) # obsolete in block version

 if (!samebw) {
    rownames(bw) <- paste("block", 1:max(u))
    colnames(bw) <- paste("component", 1:m)
  	}

  if (verb) {
    tt1 <- proc.time()
    cat("lambda ", round(lambda[iter, ], 4))
    cat(", total time", (tt1 - tt0)[3], "s\n")
  }
  return(structure(list(data = x, posteriors = z.hat, 
                        lambda = lambda[1:iter,], bandwidth = bw, 
                        blockid = u, lambdahat = lambda[iter,], f=f,
                        grid = grid, loglik = loglik,
			meanUdfl = total_udfl/(n*m*r*iter),# average underflow
			meanNaN = total_nan/(n*m*r*iter)), # average NaN's
                    class="npEM")) # define a "npMSL" class ?
}