##' Selection of Tuning Parameter s in Guided Sparse K-means ##' ##' Select tuning parameter s via permutation in Guided Sparse K-means integrating clinical dataset with gene expression dataset. ##' @title GuidedSparseKmeans.S ##' @param x Gene expression matrix, n*p (rows for subjects and columns for genes). ##' @param z One phenotypic variable from clinical dataset, a vector. ##' @param K Number of clusters. ##' @param s The boundary of l1n weights, a vector. ##' @param lam The intensity of guidance. ##' @param model The model fitted to obtain R2, please select model from 'linear', 'logit', 'exp', 'polr','cox'. ##' @param nstart Specify the number of starting point for K-means. ##' @param maxiter Maximum number of iteration. ##' @param nperms Number of permutation times ##' @param silence Output progress or not. ##' @return A list consisting of ##' \item{nnonzerows}{number of nonzero weights.} ##' \item{gaps}{gap statistics.} ##' \item{segaps}{statard error of gap statistics.} ##' \item{s}{candidates of s.} ##' \item{s.best}{the best s with largest gap.} ##' @export ##' @author Lingsong Meng GuidedSparseKmeans.S <- function(x, z, K, s, lam, model, nstart = 20, maxiter = 15, nperms = 50, silence = F) { if (is.null(s)) stop("Must provide either K or centers.") if (min(s) <= 1) stop("s should be greater than 1, since otherwise only one weight will be nonzero.") if (length(s) < 2) stop("s should be a vector of at least two elements.") if (is.null(K)) stop("Must provide either K or centers.") permx <- list() nnonzerows <- NULL for (i in 1:nperms) { permx[[i]] <- matrix(NA, nrow = nrow(x), ncol = ncol(x)) for (j in 1:ncol(x)) permx[[i]][, j] <- sample(x[, j]) } tots <- NULL out <- GuidedSparseKmeans(x, z, K, s, lam, model, nstart, maxiter, silence) for (i in 1:length(out)) { nnonzerows <- c(nnonzerows, sum(out[[i]]$weights != 0)) tots <- c(tots, getSS(x = x, c = out[[i]]$clusters, w = out[[i]]$weights)$bcss.w) } permtots <- matrix(NA, nrow = length(s), ncol = nperms) for (k in 1:nperms) { if (silence == F) print(paste0("nperms=", k)) perm.out <- GuidedSparseKmeans(permx[[k]], z, K, s, lam, model, nstart, maxiter, silence) for (i in 1:length(perm.out)) { permtots[i, k] <- getSS(x = permx[[k]], c = perm.out[[i]]$clusters, w = perm.out[[i]]$weights)$bcss.w } } gaps <- (log(tots) - apply(log(permtots), 1, mean)) segaps <- apply(log(permtots), 1, sd)/sqrt(nperms) out <- list(nnonzerows = nnonzerows, gaps = gaps, segaps = segaps, s = s, s.best = s[which.max(gaps)]) return(out) }