https://github.com/LingsongMeng/GuidedSparseKmeans
Raw File
Tip revision: 31ee86eb84ac4ecddba48ac1cdc645c8399e9e8f authored by LingsongMeng on 03 May 2020, 04:04:37 UTC
Create README.md
Tip revision: 31ee86e
GuidedSparseKmeans.S.R
##' Selection of Tuning Parameter s in Guided Sparse K-means
##'
##' Select tuning parameter s via permutation in Guided Sparse K-means integrating clinical dataset with gene expression dataset.
##' @title GuidedSparseKmeans.S
##' @param x Gene expression matrix, n*p (rows for subjects and columns for genes).
##' @param z One phenotypic variable from clinical dataset, a vector.
##' @param K Number of clusters.
##' @param s The boundary of l1n weights, a vector.
##' @param lam The intensity of guidance.
##' @param model The model fitted to obtain R2, please select model from 'linear', 'logit', 'exp', 'polr','cox'.
##' @param nstart Specify the number of starting point for K-means.
##' @param maxiter Maximum number of iteration.
##' @param nperms Number of permutation times
##' @param silence Output progress or not.
##' @return A list consisting of
##' \item{nnonzerows}{number of nonzero weights.}
##' \item{gaps}{gap statistics.}
##' \item{segaps}{statard error of gap statistics.}
##' \item{s}{candidates of s.}
##' \item{s.best}{the best s with largest gap.}
##' @export
##' @author Lingsong Meng


GuidedSparseKmeans.S <- function(x, z, K, s, lam, model, nstart = 20, maxiter = 15, nperms = 50, silence = F) {
    
    if (is.null(s)) 
        stop("Must provide either K or centers.")
    if (min(s) <= 1) 
        stop("s should be greater than 1, since otherwise only one weight will be nonzero.")
    if (length(s) < 2) 
        stop("s should be a vector of at least two elements.")
    if (is.null(K)) 
        stop("Must provide either K or centers.")
    
    permx <- list()
    nnonzerows <- NULL
    for (i in 1:nperms) {
        permx[[i]] <- matrix(NA, nrow = nrow(x), ncol = ncol(x))
        for (j in 1:ncol(x)) permx[[i]][, j] <- sample(x[, j])
    }
    tots <- NULL
    out <- GuidedSparseKmeans(x, z, K, s, lam, model, nstart, maxiter, silence)
    for (i in 1:length(out)) {
        nnonzerows <- c(nnonzerows, sum(out[[i]]$weights != 0))
        tots <- c(tots, getSS(x = x, c = out[[i]]$clusters, w = out[[i]]$weights)$bcss.w)
    }
    permtots <- matrix(NA, nrow = length(s), ncol = nperms)
    for (k in 1:nperms) {
        if (silence == F) 
            print(paste0("nperms=", k))
        perm.out <- GuidedSparseKmeans(permx[[k]], z, K, s, lam, model, nstart, maxiter, silence)
        for (i in 1:length(perm.out)) {
            permtots[i, k] <- getSS(x = permx[[k]], c = perm.out[[i]]$clusters, w = perm.out[[i]]$weights)$bcss.w
        }
    }
    gaps <- (log(tots) - apply(log(permtots), 1, mean))
    segaps <- apply(log(permtots), 1, sd)/sqrt(nperms)
    
    out <- list(nnonzerows = nnonzerows, gaps = gaps, segaps = segaps, s = s, s.best = s[which.max(gaps)])
    return(out)
}
back to top