https://github.com/cran/sjPlot
Tip revision: 60dc03cb2e7f88aa6a654894f9fe9688031b4e09 authored by Daniel Luedecke on 19 November 2014, 16:54:27 UTC
version 1.6
version 1.6
Tip revision: 60dc03c
sjTabCorr.R
#' @title Show correlations as HTML table
#' @name sjt.corr
#'
#' @seealso \itemize{
#' \item \href{http://www.strengejacke.de/sjPlot/sjt.corr}{sjPlot manual: sjt.corr}
#' \item \code{\link{sjp.corr}}
#' }
#'
#' @description Shows the results of a computed correlation as HTML table. Requires either
#' a data frame or a computed \code{\link{cor}}-object.
#'
#' @param data A correlation object, built with the R-\code{\link{cor}}-function, or a data frame
#' which correlations should be calculated.
#' @param missingDeletion Indicates how missing values are treated. May be either
#' \code{"listwise"} or \code{"pairwise"} (default).
#' @param corMethod Indicates the correlation computation method. May be one of
#' \code{"spearman"} (default), \code{"pearson"} or \code{"kendall"}.
#' @param title A table caption. By default, \code{title} is \code{NULL}, hence no title will be used.
#' @param showPValues Whether significance levels (p-values) of correlations should
#' be printed or not.
#' @param pvaluesAsNumbers If \code{TRUE}, the significance levels (p-values) are printed as numbers.
#' if \code{FALSE} (default), asterisks are used.
#' @param fadeNS If \code{TRUE} (default), non-significant correlation-values appear faded (by using
#' a lighter grey text color).
#' @param file The destination file, which will be in html-format. If no filepath is specified,
#' the file will be saved as temporary file and openend either in the RStudio View pane or
#' in the default web browser.
#' @param varlabels The item labels that are printed along the first column/row. If no item labels are
#' provided (default), the data frame's column names are used. Item labels must
#' be a string vector, e.g.: \code{varlabels=c("Var 1", "Var 2", "Var 3")}.
#' varlabels are detected automatically if \code{data} is a data frame where each variable has
#' a \code{"variable.label"} attribute (see \code{\link{sji.setVariableLabels}}) for details).
#' @param breakLabelsAt Wordwrap for diagram labels. Determines how many chars of the variable labels are displayed in
#' one line and when a line break is inserted. Default is 40.
#' @param digits The amount of digits used the values inside table cells.
#' Default is 2.
#' @param triangle Indicates whether only the upper right (use \code{"upper"}), lower left (use \code{"lower"})
#' or both (use \code{"both"}) triangles of the correlation table is filled with values. Default
#' is \code{"both"}. You can specifiy the inital letter only.
#' @param val.rm Specify a number between 0 and 1 to suppress the output of correlation values
#' that are smaller than \code{val.rm}. The absolute correlation values are used, so
#' a correlation value of -.5 would be greater that \code{"val.rm=.4"} and thus not be
#' omitted. By default, this parameter is \code{NULL}, hence all values are shown in the table.
#' If a correlation value is below the specified value of \code{val.rm}, it is still printed to
#' the HTML table, but made "invisible" with white foreground color. You can use the \code{CSS}
#' parameter (\code{"css.valueremove"}) to change color and appearance of those correlation value that are smaller than
#' the limit specified by \code{val.rm}.
#' @param stringDiagonal a vector with string values of the same length as \code{ncol(data)} (number of
#' correlated items) that can be used to display content in the diagonal cells
#' where row and column item are identical (i.e. the "self-correlation"). By defauilt,
#' this parameter is \code{NULL} and the diagnal cells are empty.
#' @param encoding The charset encoding used for variable and value labels. Default is \code{NULL}, so encoding
#' will be auto-detected depending on your platform (\code{"UTF-8"} for Unix and \code{"Windows-1252"} for
#' Windows OS). Change encoding if specific chars are not properly displayed (e.g.) German umlauts).
#' @param CSS A \code{\link{list}} with user-defined style-sheet-definitions, according to the
#' \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See return value \code{page.style} for details
#' of all style-sheet-classnames that are used in this function. Parameters for this list need:
#' \enumerate{
#' \item the class-names with \code{"css."}-prefix as parameter name and
#' \item each style-definition must end with a semicolon
#' }
#' You can add style information to the default styles by using a + (plus-sign) as
#' initial character for the parameter attributes. Examples:
#' \itemize{
#' \item \code{css.table='border:2px solid red;'} for a solid 2-pixel table border in red.
#' \item \code{css.summary='font-weight:bold;'} for a bold fontweight in the summary row.
#' \item \code{css.lasttablerow='border-bottom: 1px dotted blue;'} for a blue dotted border of the last table row.
#' \item \code{css.summary='+color:blue;'} adds blue font color to summary row.
#' }
#' See further examples below and \href{http://www.strengejacke.de/sjPlot/sjtbasics}{sjPlot manual: sjt-basics}.
#' @param useViewer If \code{TRUE}, the function tries to show the HTML table in the IDE's viewer pane. If
#' \code{FALSE} or no viewer available, the HTML table is opened in a web browser.
#' @param no.output If \code{TRUE}, the html-output is neither opened in a browser nor shown in
#' the viewer pane and not even saved to file. This option is useful when the html output
#' should be used in \code{knitr} documents. The html output can be accessed via the return
#' value.
#' @return Invisibly returns a \code{\link{structure}} with
#' \itemize{
#' \item the web page style sheet (\code{page.style}),
#' \item the web page content (\code{page.content}),
#' \item the complete html-output (\code{output.complete}) and
#' \item the html-table with inline-css for use with knitr (\code{knitr})
#' }
#' for further use.
#'
#' @note The HTML tables can either be saved as file and manually opened (specify parameter \code{file}) or
#' they can be saved as temporary files and will be displayed in the RStudio Viewer pane (if working with RStudio)
#' or opened with the default web browser. Displaying resp. opening a temporary file is the
#' default behaviour (i.e. \code{file=NULL}).
#'
#' @examples
#' # create data frame with 5 random variables
#' df <- as.data.frame(cbind(rnorm(10), rnorm(10), rnorm(10), rnorm(10), rnorm(10)))
#'
#' # plot correlation matrix using circles
#' \dontrun{
#' sjt.corr(df)}
#'
#' # -------------------------------
#' # Data from the EUROFAMCARE sample dataset
#' # -------------------------------
#' data(efc)
#'
#' # retrieve variable and value labels
#' varlabs <- sji.getVariableLabels(efc)
#'
#' # recveive first item of COPE-index scale
#' start <- which(colnames(efc)=="c83cop2")
#' # recveive last item of COPE-index scale
#' end <- which(colnames(efc)=="c88cop7")
#'
#' # create data frame with COPE-index scale
#' df <- as.data.frame(efc[,c(start:end)])
#' colnames(df) <- varlabs[c(start:end)]
#'
#' # we have high correlations here, because all items
#' # belong to one factor. See example from "sjp.pca".
#' \dontrun{
#' sjt.corr(df, pvaluesAsNumbers=TRUE)}
#'
#' # -------------------------------
#' # auto-detection of labels, only lower triangle
#' # -------------------------------
#' efc <- sji.setVariableLabels(efc, varlabs)
#' \dontrun{
#' sjt.corr(efc[,c(start:end)], triangle="lower")}
#'
#' # -------------------------------
#' # auto-detection of labels, only lower triangle,
#' # all correlation values smaller than 0.3 are not
#' # shown in the table
#' # -------------------------------
#' efc <- sji.setVariableLabels(efc, varlabs)
#' \dontrun{
#' sjt.corr(efc[,c(start:end)], triangle="lower", val.rm=0.3)}
#'
#' # -------------------------------
#' # auto-detection of labels, only lower triangle,
#' # all correlation values smaller than 0.3 are printed
#' # in blue
#' # -------------------------------
#' efc <- sji.setVariableLabels(efc, varlabs)
#' \dontrun{
#' sjt.corr(efc[,c(start:end)], triangle="lower",
#' val.rm=0.3, CSS=list(css.valueremove='color:blue;'))}
#'
#' @export
sjt.corr <- function (data,
missingDeletion="pairwise",
corMethod="spearman",
title=NULL,
showPValues=TRUE,
pvaluesAsNumbers=FALSE,
fadeNS=TRUE,
file=NULL,
varlabels=NULL,
breakLabelsAt=40,
digits=3,
triangle="both",
val.rm=NULL,
stringDiagonal=NULL,
encoding=NULL,
CSS=NULL,
useViewer=TRUE,
no.output=FALSE) {
# --------------------------------------------------------
# check encoding
# --------------------------------------------------------
encoding <- get.encoding(encoding)
# --------------------------------------------------------
# parameter check
# --------------------------------------------------------
if (is.null(triangle)) {
triangle <- "both"
}
else if (triangle=="u" || triangle=="upper") {
triangle <- "upper"
}
else if (triangle=="l" || triangle=="lower") {
triangle <- "lower"
}
else triangle <- "both"
# --------------------------------------------------------
# try to automatically set labels is not passed as parameter
# --------------------------------------------------------
if (is.null(varlabels) && is.data.frame(data)) {
varlabels <- c()
# if yes, iterate each variable
for (i in 1:ncol(data)) {
# retrieve variable name attribute
vn <- autoSetVariableLabels(data[,i])
# if variable has attribute, add to variableLabel list
if (!is.null(vn)) {
varlabels <- c(varlabels, vn)
}
else {
# else break out of loop
varlabels <- NULL
break
}
}
}
# ----------------------------
# check for valid parameter
# ----------------------------
if (corMethod!="pearson" && corMethod!="spearman" && corMethod!= "kendall") {
stop("Parameter 'corMethod' must be one of: pearson, spearman or kendall")
}
# ----------------------------
# check if user has passed a data frame
# or a pca object
# ----------------------------
if (class(data)=="matrix") {
corr <- data
cpvalues <- NULL
}
else {
# missing deletion corresponds to
# SPSS listwise
if (missingDeletion=="listwise") {
data <- na.omit(data)
corr <- cor(data, method=corMethod)
}
# missing deletion corresponds to
# SPSS pairwise
else {
corr <- cor(data, method=corMethod, use="pairwise.complete.obs")
}
#---------------------------------------
# if we have a data frame as parameter,
# compute p-values of significances
#---------------------------------------
computePValues <- function(df) {
cp <- c()
for (i in 1:ncol(df)) {
pv <- c()
for (j in 1:ncol(df)) {
test <- cor.test(df[,i], df[,j], alternative="two.sided", method=corMethod)
pv <- cbind(pv, round(test$p.value,5))
}
cp <- rbind(cp, pv)
}
return (cp)
}
cpvalues <- computePValues(data)
}
# --------------------------------------------------------
# save original p-values
# --------------------------------------------------------
cpv <- cpvalues
# --------------------------------------------------------
# add column with significance value
# --------------------------------------------------------
if (!is.null(cpvalues)) {
if (!pvaluesAsNumbers) {
# --------------------------------------------------------
# prepare function for apply-function. replace sig. p
# with asterisks
# --------------------------------------------------------
fun.star <- function(x) {
if (x>=0.05) x=""
else if (x>=0.01 && x<0.05) x="*"
else if (x>=0.001 && x<0.01) x="**"
else if (x<0.001) x="***"
}
}
else {
# --------------------------------------------------------
# prepare function for apply-function.
# round p-values, keeping the numeric values
# --------------------------------------------------------
fun.star <- function(x) {
round(x,digits)
}
}
cpvalues <- apply(cpvalues, c(1,2), fun.star)
}
else {
showPValues <- FALSE
}
# ----------------------------
# check if user defined labels have been supplied
# if not, use variable names from data frame
# ----------------------------
if (is.null(varlabels)) {
varlabels <- row.names(corr)
}
# check length of x-axis-labels and split longer strings at into new lines
varlabels <- sju.wordwrap(varlabels, breakLabelsAt, "<br>")
# -------------------------------------
# init header
# -------------------------------------
toWrite <- sprintf("<html>\n<head>\n<meta http-equiv=\"Content-type\" content=\"text/html;charset=%s\">\n", encoding)
# -------------------------------------
# init style sheet and tags used for css-definitions
# we can use these variables for string-replacement
# later for return value
# -------------------------------------
tag.table <- "table"
tag.caption <- "caption"
tag.thead <- "thead"
tag.tdata <- "tdata"
tag.notsig <- "notsig"
tag.pval <- "pval"
tag.valueremove <- "valueremove"
tag.summary <- "summary"
tag.centeralign <- "centeralign"
tag.firsttablecol <- "firsttablecol"
css.table <- "border-collapse:collapse; border:none;"
css.thead <- "font-style:italic; font-weight:normal; border-top:double black; border-bottom:1px solid black; padding:0.2cm;"
css.tdata <- "padding:0.2cm;"
css.caption <- "font-weight: bold; text-align:left;"
css.valueremove <- "color:white;"
css.centeralign <- "text-align:center;"
css.firsttablecol <- "font-style:italic;"
css.notsig <- "color:#999999;"
css.summary <- "border-bottom:double black; border-top:1px solid black; font-style:italic; font-size:0.9em; text-align:right;"
css.pval <- "vertical-align:super;font-size:0.8em;"
if (pvaluesAsNumbers) css.pval <- "font-style:italic;"
# ------------------------
# check user defined style sheets
# ------------------------
if (!is.null(CSS)) {
if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']],1,1)=='+', paste0(css.table, substring(CSS[['css.table']],2)), CSS[['css.table']])
if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']],1,1)=='+', paste0(css.thead, substring(CSS[['css.thead']],2)), CSS[['css.thead']])
if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']],1,1)=='+', paste0(css.tdata, substring(CSS[['css.tdata']],2)), CSS[['css.tdata']])
if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']],1,1)=='+', paste0(css.caption, substring(CSS[['css.caption']],2)), CSS[['css.caption']])
if (!is.null(CSS[['css.summary']])) css.summary <- ifelse(substring(CSS[['css.summary']],1,1)=='+', paste0(css.summary, substring(CSS[['css.summary']],2)), CSS[['css.summary']])
if (!is.null(CSS[['css.centeralign']])) css.centeralign <- ifelse(substring(CSS[['css.centeralign']],1,1)=='+', paste0(css.centeralign, substring(CSS[['css.centeralign']],2)), CSS[['css.centeralign']])
if (!is.null(CSS[['css.firsttablecol']])) css.firsttablecol <- ifelse(substring(CSS[['css.firsttablecol']],1,1)=='+', paste0(css.firsttablecol, substring(CSS[['css.firsttablecol']],2)), CSS[['css.firsttablecol']])
if (!is.null(CSS[['css.notsig']])) css.notsig <- ifelse(substring(CSS[['css.notsig']],1,1)=='+', paste0(css.notsig, substring(CSS[['css.notsig']],2)), CSS[['css.notsig']])
if (!is.null(CSS[['css.pval']])) css.pval <- ifelse(substring(CSS[['css.pval']],1,1)=='+', paste0(css.pval, substring(CSS[['css.pval']],2)), CSS[['css.pval']])
if (!is.null(CSS[['css.valueremove']])) css.valueremove <- ifelse(substring(CSS[['css.valueremove']],1,1)=='+', paste0(css.valueremove, substring(CSS[['css.valueremove']],2)), CSS[['css.valueremove']])
}
# ------------------------
# set page style
# ------------------------
page.style <- sprintf("<style>%s { %s }\n%s { %s }\n.%s { %s }\n.%s { %s }\n.%s { %s }\n.%s { %s }\n.%s { %s }\n.%s { %s }\n.%s { %s }\n.%s { %s }\n</style>",
tag.table, css.table, tag.caption, css.caption,
tag.thead, css.thead, tag.tdata, css.tdata,
tag.firsttablecol, css.firsttablecol,
tag.centeralign, css.centeralign,
tag.notsig, css.notsig,
tag.pval, css.pval,
tag.summary, css.summary,
tag.valueremove, css.valueremove)
# ------------------------
# start content
# ------------------------
toWrite <- paste0(toWrite, page.style)
toWrite = paste(toWrite, "\n</head>\n<body>", "\n")
# -------------------------------------
# start table tag
# -------------------------------------
page.content <- "<table>\n"
# -------------------------------------
# table caption, variable label
# -------------------------------------
if (!is.null(title)) page.content <- paste0(page.content, sprintf(" <caption>%s</caption>\n", title))
# -------------------------------------
# header row
# -------------------------------------
# write tr-tag
page.content <- paste0(page.content, " <tr>\n")
# first column
page.content <- paste0(page.content, " <th class=\"thead\"> </th>\n")
# iterate columns
for (i in 1:ncol(corr)) {
page.content <- paste0(page.content, sprintf(" <th class=\"thead\">%s</th>\n", varlabels[i]))
}
# close table row
page.content <- paste0(page.content, " </tr>\n")
# -------------------------------------
# data rows
# -------------------------------------
# iterate all rows of df
for (i in 1:nrow(corr)) {
# write tr-tag
page.content <- paste0(page.content, " <tr>\n")
# print first table cell
page.content <- paste0(page.content, sprintf(" <td class=\"firsttablecol\">%s</td>\n", varlabels[i]))
# --------------------------------------------------------
# iterate all columns
# --------------------------------------------------------
for (j in 1:ncol(corr)) {
# --------------------------------------------------------
# leave out self-correlations
# --------------------------------------------------------
if (j==i) {
if (is.null(stringDiagonal) || length(stringDiagonal)>ncol(corr)) {
page.content <- paste0(page.content, " <td class=\"tdata centeralign\"> </td>\n")
}
else {
page.content <- paste0(page.content, sprintf(" <td class=\"tdata centeralign\">%s</td>\n", stringDiagonal[j]))
}
}
else {
# --------------------------------------------------------
# check whether only lower or upper triangle of correlation
# table should be printed
# --------------------------------------------------------
if((triangle=="upper" && j>i) || (triangle=="lower" && i>j) || triangle=="both") {
# --------------------------------------------------------
# print table-cell-data (cor-value)
# --------------------------------------------------------
cellval <- sprintf("%.*f", digits, corr[i,j])
# --------------------------------------------------------
# check whether we want to show P-Values
# --------------------------------------------------------
if (showPValues) {
if (pvaluesAsNumbers) {
# --------------------------------------------------------
# if we have p-values as number, print them in new row
# --------------------------------------------------------
cellval <- sprintf("%s<br><span class=\"pval\">(%.*f)</span>", cellval, digits, cpvalues[i,j])
}
else {
# --------------------------------------------------------
# if we have p-values as "*", add them
# --------------------------------------------------------
cellval <- sprintf("%s<span class=\"pval\">%s</span>", cellval, cpvalues[i,j])
}
}
# --------------------------------------------------------
# prepare css for not significant values
# --------------------------------------------------------
notsig <- ""
# --------------------------------------------------------
# check whether non significant values should be blurred
# --------------------------------------------------------
if (fadeNS && !is.null(cpv)) {
# set css-class-attribute
if (cpv[i,j] >=0.05) notsig <- " notsig"
}
# --------------------------------------------------------
# prepare css for values that shoould be removed due to low
# correlation value
# --------------------------------------------------------
value.remove <- ""
# --------------------------------------------------------
# check whether correlation value is too small and should
# be omitted
# --------------------------------------------------------
if (!is.null(val.rm) && abs(corr[i,j])<abs(val.rm)) {
value.remove <- " valueremove"
}
page.content <- paste0(page.content, sprintf(" <td class=\"tdata centeralign%s%s\">%s</td>\n", notsig, value.remove, cellval))
}
else {
page.content <- paste0(page.content, " <td class=\"tdata centeralign\"> </td>\n")
}
}
}
# close row
page.content <- paste0(page.content, " </tr>\n")
}
# -------------------------------------
# feedback...
# -------------------------------------
page.content <- paste0(page.content, " <tr>\n")
page.content <- paste0(page.content, sprintf(" <td colspan=\"%i\" class=\"summary\">", ncol(corr)+1))
page.content <- paste0(page.content, sprintf("Computed correlation used %s-method with %s-deletion.", corMethod, missingDeletion))
page.content <- paste0(page.content, "</td>\n </tr>\n")
# -------------------------------------
# finish table
# -------------------------------------
page.content <- paste(page.content, "\n</table>")
# -------------------------------------
# finish html page
# -------------------------------------
toWrite <- paste(toWrite, page.content, "\n")
toWrite <- paste0(toWrite, "</body></html>")
# -------------------------------------
# replace class attributes with inline style,
# useful for knitr
# -------------------------------------
# copy page content
# -------------------------------------
knitr <- page.content
# -------------------------------------
# set style attributes for main table tags
# -------------------------------------
knitr <- gsub("class=", "style=", knitr)
knitr <- gsub("<table", sprintf("<table style=\"%s\"", css.table), knitr)
knitr <- gsub("<caption", sprintf("<caption style=\"%s\"", css.caption), knitr)
# -------------------------------------
# replace class-attributes with inline-style-definitions
# -------------------------------------
knitr <- gsub(tag.tdata, css.tdata, knitr)
knitr <- gsub(tag.thead, css.thead, knitr)
knitr <- gsub(tag.centeralign, css.centeralign, knitr)
knitr <- gsub(tag.notsig, css.notsig, knitr)
knitr <- gsub(tag.pval, css.pval, knitr)
knitr <- gsub(tag.summary, css.summary, knitr)
knitr <- gsub(tag.firsttablecol, css.firsttablecol, knitr)
knitr <- gsub(tag.valueremove, css.valueremove, knitr)
# -------------------------------------
# check if html-content should be outputted
# -------------------------------------
out.html.table(no.output, file, knitr, toWrite, useViewer)
# -------------------------------------
# return results
# -------------------------------------
invisible (structure(class = "sjtcorr",
list(page.style = page.style,
page.content = page.content,
output.complete = toWrite,
knitr = knitr)))
}