https://github.com/cran/CluMix
Tip revision: a006880878209b1a96d9cdde0332d96fa86036af authored by Manuela Hummel on 03 June 2016, 18:47:22 UTC
version 1.1
version 1.1
Tip revision: a006880
mix.heatmap.R
mix.heatmap <-
#function(data, D.subjects, D.variables, dend.subjects, dend.variables, type=list(),
# !! to be done: allow also asymmetric binary variables
function(data, D.subjects, D.variables, dend.subjects, dend.variables,
varweights,
dist.variables.method=c("associationMeasures","ClustOfVar"), associationFun=association,
rowlab, rowmar=3, lab.cex=1.5, ColSideColors, RowSideColors,
col.cont=marray::maPalette(low="lightblue", high="darkblue", k=50),
col.ord=list(low="lightgreen", high="darkgreen"),
col.cat=c("indianred1","darkred","orangered","orange","palevioletred1","violetred4","red3","indianred4"),
#col.cat=c("darkorange","darkred","thistle","cornflowerblue","olivedrab","darkgrey","purple4","indianred","yellow2","darkseagreen4"),
legend.colbar, legend.rowbar, legend.mat=FALSE, legend.cex=1){
# data: data frame where columns are variables (of different data types) and rows are observations (subjects, samples)
# D.subjects, D.variables: the already calculated distance matrices (class 'dissimilarity') for subjects and variables can be given;
# if missing, they will be calculated; if set to NULL, no clustering is done and original order in 'data' will be preserved
# dend.subjects, dend.variables: dendrograms for subects and variables can be given;
# then no distances will be calculated and also D.subjects/D.variables will be ignored
# varweights: optional vector of variable weights, used for calculating Gower distances btw. subjects
# dist.variables.method: distance can be based on 1-sqrt(associationmeasures) or ClustOfVar approach
# associationFun: function calculating association coefficients between variables (only used when dist.variables.method="associationMeasures")
# rowlab: variable labels; if missing, colnames of data are used
# rowmar: margin for variable labels
# lab.cex: size of row labels
# ColSideColors, RowSideColors: color bars can be added on top / to the left (just one bar each)
# col.cont: color palette for continuous variables
# col.ord: list with colors for lowest and highest category of ordinal variables -> color palette will be created based on the number of categories
# col.cat: vector of colors for categorical variables
# legend.colbar / legend.rowbar: class labels for subject/variable groups defined by ColSideColors/RowSideColors
# legend.mat: shall legend matrix for heatmap be shown
# legend.cex: size of legend text
if(ncol(data) > 200)
stop("the heatmap is currently only available for a maximum of 200 variables")
# number of subjects and variables
p <- ncol(data)
n <- nrow(data)
## subjects
# if dendrogram is given, order subjects by its labels
if(!missing(dend.subjects)){
if(!all(labels(dend.subjects) %in% rownames(data)))
stop("labels of dend.subjects have to correspond to rownames of data")
o.subjects <- labels(dend.subjects)
plotdend.sub <- TRUE
}
# if neither dendrogram nor dist matrix is given, calculate distance matrix for subjects
else if(missing(D.subjects)){
D.subjects <- dist.subjects(data, weights=varweights)
#D.subjects <- dist.subjects(data, type=type)
dend.subjects <- as.dendrogram(hclust(D.subjects))
o.subjects <- labels(dend.subjects)
plotdend.sub <- TRUE
}
# if D.subjects is set to NULL, keep order of data
else if(is.null(D.subjects)){
o.subjects <- 1:n
plotdend.sub <- FALSE
}
# if D.subjects is specified
else{
dend.subjects <- as.dendrogram(hclust(D.subjects))
o.subjects <- labels(dend.subjects)
plotdend.sub <- TRUE
}
## variables
# if dendrogram is given, order variables by its labels
if(!missing(dend.variables)){
if(!all(labels(dend.variables) %in% names(data)))
stop("labels of dend.variables have to correspond to colnames of data")
o.variables <- labels(dend.variables)
plotdend.var <- TRUE
}
# if neither dendrogram nor dist matrix is given, calculate distance matrix for variables
else if(missing(D.variables)){
dist.variables.method <- match.arg(dist.variables.method)
if(dist.variables.method == "associationMeasures"){
D.variables <- dist.variables(data, associationFun=associationFun)
dend.variables <- as.dendrogram(hclust(D.variables))
}
else if(dist.variables.method == "ClustOfVar"){
dc <- sapply(data, data.class)
if(any(dc == "numeric"))
X.quanti <- data[,dc == "numeric"]
else
X.quanti <- NULL
if(all(dc == "numeric"))
X.quali <- NULL
else
X.quali <- data[,dc != "numeric"]
dend.variables <- as.dendrogram(ClustOfVar::hclustvar(X.quanti, X.quali))
}
o.variables <- labels(dend.variables)
plotdend.var <- TRUE
}
# if D.variables is set to NULL, keep order of data
else if(is.null(D.variables)){
o.variables <- 1:p
plotdend.var <- FALSE
}
# if D.variables is specified
else{
dend.variables <- as.dendrogram(hclust(D.variables))
o.variables <- labels(dend.variables)
plotdend.var <- TRUE
}
# order rows and columns for plotting
data.plot <- data[o.subjects, rev(o.variables)]
if(!missing(ColSideColors)){
names(ColSideColors) <- rownames(data)
ColSideColors2 <- ColSideColors[o.subjects]
}
if(!missing(RowSideColors)){
names(RowSideColors) <- names(data)
RowSideColors2 <- RowSideColors[rev(o.variables)]
}
# plot layout matrix: 1: subjects dendrogram, 2: column color bar, 3: variables dendrogram, 4: row color bar,
# 5-(p+4): heatmap rows, p+5: leave some empty space
# p+6: legend for column/row color bar (if legend.colbar and/or legend.rowbar is specified)
# p+7: legend matrix for heatmap (if legend.mat=TRUE)
def.par <- par(no.readonly = TRUE) # save default, for resetting...
mardend1 <- ifelse(!plotdend.sub, 0.3, 1) # space for column dendrogram or nothing
mardend2 <- ifelse(missing(ColSideColors), .1, 0.6) # space for column color bar or nothing
mardend3 <- ifelse(!plotdend.var, 0.3, 1) # space for row dendrogram or nothing
mardend4 <- ifelse(missing(RowSideColors), .05, 0.3) # space for row color bar or nothing
if(missing(legend.colbar) & missing(legend.rowbar)){
layoutmat <- matrix(c(0, 0, rep(3, p), rep(0,3), rep(4,p), 0:2, 5:(p+5)), ncol=3) ## maximal 200 rows allowed in layout!
if(legend.mat){
layoutmat <- rbind(layoutmat, max(layoutmat) + 1)
layout(layoutmat, widths=c(mardend3, mardend4, 7), heights=c(mardend1, mardend2, rep(.25, p+1), 1))
}
layout(layoutmat, widths=c(mardend3, mardend4, 7), heights=c(mardend1, mardend2, rep(.25, p+1)))
}
else{
layoutmat <- matrix(c(0, 0, rep(3, p), rep(0,3), rep(4,p), 0:2, 5:(p+5), rep(p+6, p+3)), ncol=4)
if(legend.mat){
layoutmat <- rbind(layoutmat, max(layoutmat) + 1)
layout(layoutmat, widths=c(mardend3, mardend4, 7, 2), heights=c(mardend1, mardend2, rep(.25, p+1), 1))
}
layout(layoutmat, widths=c(mardend3, mardend4, 7, 2), heights=c(mardend1, mardend2, rep(.25, p+1)))
}
# subjects dendrogram / nothing
if(!plotdend.sub){
par(mar=c(0, 1, 0, rowmar))
plot(rep(1,n), type="n", axes=F, xlab="", ylab="")
}
else{
par(mar=c(0, 1, 1, rowmar))
plot(dend.subjects, axes=FALSE, leaflab="none", xaxs="i")
}
# column color bar / nothing
if(missing(ColSideColors)){
par(mar=c(0, 1, 0, rowmar))
plot(rep(1,n), type="n", axes=FALSE, xlab="", ylab="")
}
else{
par(mar=c(1, 1, 1, rowmar))
addfac(as.matrix(ColSideColors2))
}
# variables dendrogram / nothing
if(!plotdend.var){
par(mar=c(0, 0, 0, 0))
plot(rep(1,p), type="n", axes=F, xlab="", ylab="")
}
else{
par(mar=c(0, 1, 0, 0))
plot(dend.variables, axes=FALSE, leaflab="none", yaxs="i", horiz=TRUE)
}
# row color bar / nothing
if(missing(RowSideColors)){
par(mar=c(0, 0, 0, 0))
plot(rep(1,p), type="n", axes=F, xlab="", ylab="")
}
else{
par(mar=c(0, 1, 0, 0))
addfac(t(as.matrix(RowSideColors2)))
}
# row (=variable) labels
if(missing(rowlab))
rowlab <- names(data.plot)
else{
names(rowlab) <- names(data)
rowlab <- rowlab[rev(o.variables)]
}
# heatmap
for(i in 1:p){
v.i <- data.plot[,i]
dc <- ifelse(length(na.omit(unique(v.i))) == 2, "binary", data.class(v.i))
#dc <- data.class(v.i)
N <- rowlab[i]
par(mar=c(0, 1, 0, rowmar))
# continuous -> heatmap colors
if(dc == "numeric")
heat(t(as.matrix(v.i)), ylab=N, cols=col.cont, cex=lab.cex)
# ordinal -> green scale colors
else if(dc == "ordered"){
col.o <- fac2col(v.i, cols=maPalette(low=col.ord$low, high=col.ord$high, k=length(levels(v.i))))
addfac(as.matrix(col.o), ylab=N, cex=lab.cex)
}
# categorical -> category colors
else {
col.c <- fac2col(v.i, cols=col.cat)
addfac(as.matrix(col.c), ylab=N, cex=lab.cex)
}
}
# leave some empty space
plot(1, type="n", frame=F, axes=F, xlab="", ylab="")
# legends for column/row color bars
if(!missing(legend.colbar) & !missing(legend.rowbar)){
par(mar=c(0,0,0,0))
nc <- length(unique(ColSideColors))
nr <- length(unique(RowSideColors))
plot(c(0,0), c(0, nc+nr), type="n", axes=FALSE, xlab="", ylab="")
legend(x=-.9, y=nc+nr, legend=legend.colbar, fill=unique(ColSideColors), border="black", cex=1.5, bty="n")
legend(x=-.9, y=nr, legend=legend.rowbar, fill=unique(RowSideColors), border="black", cex=1.5, bty="n")
}
else if(!missing(legend.colbar)){
par(mar=c(0,0,0,0))
plot(0, 0, type="n", axes=FALSE, xlab="", ylab="")
legend(x=-.9, y=.9, legend=legend.colbar, fill=unique(ColSideColors), border="black", cex=1.5, bty="n")
}
else if(!missing(legend.rowbar)){
par(mar=c(0,0,0,0))
plot(0, 0, type="n", axes=FALSE, xlab="", ylab="")
legend(x=-.9, y=.9, legend=legend.rowbar, fill=unique(RowSideColors), border="black", cex=1.5, bty="n")
}
# legend matrix for heatmap
if(legend.mat)
legendmat(data.plot, Names=rowlab, col.cont, col.ord, col.cat, lab.cex=legend.cex)
# reset plotting parameters
par(def.par)
}