Revision 738b43f6dcce877b85c10f23dafc38e0098bd8fb authored by Daniel Luedecke on 19 February 2014, 11:32:18 UTC, committed by cran-robot on 19 February 2014, 11:32:18 UTC
1 parent 64f2fd6
Raw File
sjPlotCorr.R
# bind global variables
if(getRversion() >= "2.15.1") utils::globalVariables(c("ordx", "ordy"))



#' @title Plot correlation matrix
#' @name sjp.corr
#' @references \itemize{
#'              \item \url{http://strengejacke.wordpress.com/sjplot-r-package/}
#'              \item \url{http://strengejacke.wordpress.com/2013/04/18/examples-for-sjplotting-functions-including-correlations-and-proportional-tables-with-ggplot-rstats/}
#'             }
#'             
#' @description Plot correlations as ellipses or tiles. Required parameter is either 
#'                a data frame or a computed \code{cor}-object. In case of ellipses, the
#'                ellipses size indicates the strength of the correlation. Furthermore,
#'                blue and red colors indicate positive or negative correlations, where
#'                stronger correlations are darkened.
#' 
#' @param data A correlation object, built with the R-\code{cor}-function, or a data frame
#'          which correlations should be calculated.
#' @param title Title of the diagram, plotted above the whole diagram panel.
#' @param titleSize The size of the plot title. Default is 1.3.
#' @param titleColor The color of the plot title. Default is \code{"black"}.
#' @param axisLabels Labels for the x- andy y-axis.
#' @param type Indicates whether the geoms of correlation values should be plotted
#'          as \code{"circle"} (default) or as \code{"tile"}.
#' @param sortCorrelations If \code{TRUE} (default), the axis labels are sorted
#'          according to the correlation strength. If \code{FALSE}, axis labels
#'          appear in order of how variables were included in the cor-computation or
#'          data frame.
#' @param decimals Indicates how many decimal values after comma are printed when
#'          the values labels are shown. Default is 3. Only applies when
#'          \code{showCorrelationValueLabels} is \code{TRUE}.
#' @param missingDeletion Indicates how missing values are treated. May be either
#'          \code{"listwise"} (default) or \code{"pairwise"}.
#' @param corMethod Indicates the correlation computation method. May be one of
#'          \code{"spearman"} (default), \code{"pearson"} or \code{"kendall"}.
#' @param geomAlpha Specify the transparancy (alpha value) of geom objects (circles or tiles).
#'          Default is 0.8.
#' @param valueLabelColor The color of the value labels (numbers) inside the diagram.
#'          Default is \code{"black"}.
#' @param valueLabelSize The size of value labels in the diagram. Default is 4.5, recommended values range
#'          between 2 and 8.
#' @param valueLabelAlpha Specify the transparancy (alpha value) of value labels.
#'          Default is 1.
#' @param circleSize Specifies the circle size factor. The circle size depends on the correlation
#'          value multiplicated with this factor. Default is 15.
#' @param outlineColor Defines the outline color of geoms (circles or tiles). Default is black.
#' @param outlineSize Defines the outline size of geoms (circles or tiles). Default is 1.
#' @param axisColor User defined color of axis border (y- and x-axis, in case the axes should have different colors than
#'          the diagram border).
#' @param borderColor User defined color of whole diagram border (panel border).
#' @param axisLabelSize The size of variable labels at the axes. Default is 1.1, recommended values range
#'          between 0.5 and 3.0.
#' @param axisLabelColor User defined color for axis labels. If not specified, a default dark gray
#'          color palette will be used for the labels.
#' @param axisLabelAngle.x Angle for x-axis-labels.
#' @param axisLabelAngle.y Angle for y-axis-labels.
#' @param breakTitleAt Wordwrap for diagram title. Determines how many chars of the title are displayed in
#'          one line and when a line break is inserted into the title. Default is 50.
#' @param breakLabelsAt Wordwrap for diagram labels. Determines how many chars of the category labels are displayed in 
#'          one line and when a line break is inserted. Default is 12.
#' @param hideDiagCircle If \code{TRUE} (default), the geoms of the diagonal correlations
#'          (self-correlations with value "1") are not plotted. Only applies if parameter
#'          \code{type} is \code{"circle"}.
#' @param hideLegend Show or hide the legend. The legend indicates the strength of correlations
#'          by gradient colour fill. Default is \code{TRUE}, hence the legend is hidden.
#' @param legendTitle The legend title, provided as string, e.g. \code{legendTitle=c("Strength of correlation")}.
#'          Default is \code{NULL}, hence no legend title is used.
#' @param showCorrelationValueLabels Whether correlation values should be plotted to each geom
#' @param showCorrelationPValues Whether significance levels (p-values) of correlations should 
#'          be plotted to each geom.
#' @param pvaluesAsNumbers If \code{TRUE}, the significance levels (p-values) are printed as numbers.
#'          if \code{FALSE} (default), asterisks are used.
#' @param showTickMarks Whether tick marks should be plotted or not. Default is \code{FALSE}.
#' @param fillColor A color palette for fillng the geoms. If not specified, the 5th diverging color palette
#'          from the color brewer palettes (RdBu) is used, resulting in red colors for negative and blue colors
#'          for positive correlations, that become lighter the weaker the correlations are. Use any
#'          color palette that is suitbale for the \code{scale_fill_gradientn} parameter of ggplot2.
#' @param majorGridColor Specifies the color of the major grid lines of the diagram background.
#' @param minorGridColor Specifies the color of the minor grid lines of the diagram background.
#' @param theme Specifies the diagram's background theme. Default (parameter \code{NULL}) is a gray 
#'          background with white grids.
#'          \itemize{
#'          \item Use \code{"bw"} for a white background with gray grids
#'          \item \code{"classic"} for a classic theme (black border, no grids)
#'          \item \code{"minimal"} for a minimalistic theme (no border,gray grids) or 
#'          \item \code{"none"} for no borders, grids and ticks.
#'          }
#'          The ggplot-object can be returned with \code{returnPlot} set to \code{TRUE} in order to further
#'          modify the plot's theme.
#' @param returnPlot If \code{TRUE}, the ggplot-object with the complete plot will be returned (and not plotted).
#'          Default is \code{FALSE}, hence the ggplot object will be plotted, not returned.
#' @return The ggplot-object with the complete plot in case \code{returnPlot} is \code{TRUE}.
#'          
#' @examples
#' # create data frame with 5 random variables
#' df <- as.data.frame(cbind(rnorm(10), rnorm(10), rnorm(10), rnorm(10), rnorm(10)))
#' 
#' # plot correlation matrix using circles
#' sjp.corr(df)
#' 
#' # plot correlation matrix using square tiles without diagram background
#' sjp.corr(df, type="tile", theme="none")
#' 
#' 
#' # -------------------------------
#' # Data from the EUROFAMCARE sample dataset
#' # -------------------------------
#' data(efc)
#' 
#' # retrieve variable and value labels
#' varlabs <- sji.getVariableLabels(efc)
#' 
#' # recveive first item of COPE-index scale
#' start <- which(colnames(efc)=="c83cop2")
#' # recveive last item of COPE-index scale
#' end <- which(colnames(efc)=="c88cop7")
#'  
#' # create data frame with COPE-index scale
#' df <- as.data.frame(efc[,c(start:end)])
#' colnames(df) <- varlabs[c(start:end)]
#'
#' # we have high correlations here, because all items
#' # belong to one factor. See example from "sjp.pca". 
#' sjp.corr(df, type="tile", theme="none", outlineColor="white", hideLegend=FALSE)
#'  
#'
#' @import ggplot2
#' @importFrom reshape2 melt
#' @importFrom scales brewer_pal
#' @export
sjp.corr <- function(data,
                     title=NULL,
                     titleSize=1.3,
                     titleColor="black",
                     axisLabels=NULL,
                     type="circle",
                     sortCorrelations=TRUE,
                     decimals=3,
                     missingDeletion="listwise",
                     corMethod="spearman",
                     geomAlpha=0.8,
                     valueLabelColor="black",
                     valueLabelSize=4.5,
                     valueLabelAlpha=1,
                     circleSize=15,
                     outlineColor="black",
                     outlineSize=1,
                     axisColor=NULL, 
                     borderColor=NULL, 
                     axisLabelSize=1.1,
                     axisLabelColor="gray30",
                     axisLabelAngle.x=0, 
                     axisLabelAngle.y=0, 
                     breakTitleAt=50, 
                     breakLabelsAt=12, 
                     hideDiagCircle=TRUE,
                     hideLegend=TRUE,
                     legendTitle=NULL,
                     showCorrelationValueLabels=TRUE,
                     showCorrelationPValues=TRUE,
                     pvaluesAsNumbers=FALSE,
                     showTickMarks=FALSE,
                     fillColor=NULL,
                     majorGridColor=NULL,
                     minorGridColor=NULL,
                     theme=NULL,
                     returnPlot=FALSE) {
  # ----------------------------
  # check for valid parameter
  # ----------------------------
  if (corMethod!="pearson" && corMethod!="spearman" && corMethod!= "kendall") {
    stop("Parameter 'corMethod' must be one of: pearson, spearman or kendall")
  }
  
  
  # ----------------------------
  # check if user has passed a data frame
  # or a pca object
  # ----------------------------
  if (class(data)=="matrix") {
    corr <- data
    cpvalues <- NULL
  }
  else {
    # missing deletion corresponds to
    # SPSS listwise
    if (missingDeletion=="listwise") {
      data <- na.omit(data)
      corr <- cor(data, method=corMethod)
    }
    # missing deletion corresponds to
    # SPSS pairwise
    else {
      corr <- cor(data, method=corMethod, use="pairwise.complete.obs")
    }
    #---------------------------------------
    # if we have a data frame as parameter,
    # compute p-values of significances
    #---------------------------------------
    computePValues <- function(df) {
      cp <- c()
      for (i in 1:ncol(df)) {
        pv <- c()
        for (j in 1:ncol(df)) {
          test <- cor.test(df[,i], df[,j], alternative="two.sided", method=corMethod)
          pv <- cbind(pv, round(test$p.value,4))
        }
        cp <- rbind(cp, pv)
      }
      return (cp)
    }
    cpvalues <- computePValues(data)
  }
  
  
  # ----------------------------
  # check if user defined labels have been supplied
  # if not, use variable names from data frame
  # ----------------------------
  if (is.null(axisLabels)) {
    axisLabels <- row.names(corr)
  }
  # --------------------------------------------------------
  # unlist labels
  # --------------------------------------------------------
  # Help function that unlists a list into a vector
  unlistlabels <- function(lab) {
    dummy <- unlist(lab)
    labels <- c()
    for (i in 1:length(dummy)) {
      labels <- c(labels, as.character(dummy[i]))
    }
    return (labels)
  }
  if (!is.null(axisLabels) && is.list(axisLabels)) {
    axisLabels <- unlistlabels(axisLabels)
  }
  # ----------------------------
  # Prepare length of title and labels
  # ----------------------------
  # check length of diagram title and split longer string at into new lines
  if (!is.null(title)) {
    title <- sju.wordwrap(title, breakTitleAt)
  }
  # check length of x-axis-labels and split longer strings at into new lines
  if (!is.null(axisLabels)) {
    axisLabels <- sju.wordwrap(axisLabels, breakLabelsAt)
  }

  
  # --------------------------------------------------------
  # order correlations from highest to lowest correlation coefficient
  # --------------------------------------------------------
  if (sortCorrelations) {
    neword <- order(corr[1,])
    orderedCorr <- corr[neword, neword]
    # order variable labels as well
    axisLabels <- axisLabels[neword]
    if (!is.null(cpvalues)) {
      cpvalues <- cpvalues[neword, neword]
    }
  }
  else {
    orderedCorr <- rev(corr)
    axisLabels <- rev(axisLabels)
    if (!is.null(cpvalues)) {
      cpvalues <- rev(cpvalues)
    }
  }  
  
  
  # --------------------------------------------------------
  # prepare a ordering-index-column needed for the data frame
  # that is passed to the ggplot
  # --------------------------------------------------------
  yo <- c()
  for (i in 1:nrow(corr)) {
    yo <- c(yo, c(rep(i,nrow(corr))))
  }
  
  # --------------------------------------------------------
  # melt correlation matrix and create data frame
  # --------------------------------------------------------
  orderedCorr <- melt(orderedCorr)
  if (!is.null(cpvalues)) cpvalues <- melt(cpvalues)
  # bind additional information like order for x- and y-axis
  # as well as the size of plotted points
  orderedCorr <- cbind(orderedCorr, ordx=c(1:nrow(corr)), ordy=yo, psize=c(exp(abs(orderedCorr$value))*circleSize))
  # if the diagonal circles should be hidden, set their point size to 0
  if (hideDiagCircle) {
    orderedCorr$psize[which(orderedCorr$value>=0.999)] <- 0
  }

  orderedCorr$ordx <- as.factor(orderedCorr$ordx)
  orderedCorr$ordy <- as.factor(orderedCorr$ordy)
  

  # --------------------------------------------------------
  # add column with significance value
  # --------------------------------------------------------
  cpv <- c()
  if (!is.null(cpvalues)) {
    if (!pvaluesAsNumbers) {
      if (cpvalues$value>=0.05) cpv <- c("")
      else if (cpvalues$value>=0.01 && cpvalues$value<0.05) cpv <- c("*")
      else if (cpvalues$value>=0.001 && cpvalues$value<0.01) cpv <- c("**")
      else if (cpvalues$value<0.001) cpv <- c("***")
    }
    else {
      cpv <- c(sprintf("\n(%.3f)", round(cpvalues$value,3)))
    }
  }
  else {
    cpv <- c("")
  }
  orderedCorr$ps <- cpv

  
  # --------------------------------------------------------
  # Set theme and default grid colours. grid colours
  # might be adjusted later
  # --------------------------------------------------------
  if (is.null(theme)) {
    ggtheme <- theme_gray()
  }
  else if (theme=="bw") {
    ggtheme <- theme_bw()
  }
  else if (theme=="classic") {
    ggtheme <- theme_classic()
  }
  else if (theme=="minimal") {
    ggtheme <- theme_minimal()
  }
  else if (theme=="none") {
    ggtheme <- theme_minimal()
    majorGridColor <- c("white")
    minorGridColor <- c("white")
    showTickMarks <-FALSE
  }
  
  
  # --------------------------------------------------------
  # Set up grid colours
  # --------------------------------------------------------
  majorgrid <- NULL
  minorgrid <- NULL
  if (!is.null(majorGridColor)) {
    majorgrid <- element_line(colour=majorGridColor)
  }
  if (!is.null(minorGridColor)) {
    minorgrid <- element_line(colour=minorGridColor)
  }
  
  
  # --------------------------------------------------------
  # Set up visibility oftick marks
  # --------------------------------------------------------
  if (!showTickMarks) {
    ggtheme <- ggtheme + theme(axis.ticks = element_blank())
  }
  if (!showCorrelationValueLabels) {
    correlationValueLabels <- c("")
    correlationPValues <- c("")
  }
  else {
    correlationValueLabels <- c(round(orderedCorr$value,decimals))
    if (showCorrelationPValues) {
      correlationPValues <- orderedCorr$ps
    }
    else {
      correlationPValues <- c("")
    }
  }
  
  
  print(sprintf("Computing correlation using %s-method with %s-deletion...", corMethod, missingDeletion))
  
  # --------------------------------------------------------
  # start with base plot object here
  # --------------------------------------------------------
  corrPlot <- ggplot(data=orderedCorr, aes(x=ordx, y=ordy, fill=value))
  # --------------------------------------------------------
  # determine the geom type, either points when "type" is "circles"
  # --------------------------------------------------------
  if (type=="circle") {
    # check whether we have an outline color
    if (is.null(outlineColor)) {
      geop <- geom_point(shape=21, size=orderedCorr$psize, alpha=geomAlpha)
    }
    # ... and apply colour-attribute
    else {
      geop <- geom_point(shape=21, size=orderedCorr$psize, alpha=geomAlpha, colour=outlineColor)
    }
    corrPlot <- corrPlot + 
      geop
  }
  # --------------------------------------------------------
  # or boxes / tiles when "type" is "tile"
  # --------------------------------------------------------
  else {
    # check whether we have an outline color
    if (is.null(outlineColor)) {
      geot <- geom_tile(alpha=geomAlpha)
    }
    # ... and apply colour-attribute
    else {
      geot <- geom_tile(size=outlineSize, colour=outlineColor, alpha=geomAlpha)
    }
    corrPlot <- corrPlot +
      geot
  }
  # fill gradient colour from distinct color brewer palette. negative correlations are dark
  # red, positive corr. are dark blue, and they become lighter the closer they are to a
  # correlation coefficient of zero
  if (is.null(fillColor)) {
    corrPlot <- corrPlot +
      scale_x_discrete(labels=axisLabels) +
      scale_y_discrete(labels=axisLabels) +
      # set limits to (-1,1) to make sure the whole color palette is used
      scale_fill_gradientn(colours=brewer_pal("div",5)(5), limits=c(-1,1))
  }
  else {
    corrPlot <- corrPlot +
      # set limits to (-1,1) to make sure the whole color palette is used
      scale_fill_gradientn(colours=fillColor, limits=c(-1,1))
  }
  corrPlot <- corrPlot +
    geom_text(label=sprintf("%s%s", correlationValueLabels, correlationPValues), colour=valueLabelColor, alpha=valueLabelAlpha, size=valueLabelSize) +
    labs(title=title, x=NULL, y=NULL, fill=legendTitle) +
    ggtheme +
    # set font size for axes.
    theme(axis.text = element_text(size=rel(axisLabelSize), colour=axisLabelColor), 
          axis.text.x = element_text(angle=axisLabelAngle.x),
          axis.text.y = element_text(angle=axisLabelAngle.y),
          plot.title = element_text(size=rel(titleSize), colour=titleColor))
  # the panel-border-property can only be applied to the bw-theme
  if (!is.null(borderColor)) {
    if (!is.null(theme) && theme=="bw") {
      corrPlot <- corrPlot + 
        theme(panel.border = element_rect(colour=borderColor))
    }
    else {
      cat("\nParameter 'borderColor' can only be applied to 'bw' theme.\n")
    }
  }
  if (!is.null(axisColor)) {
    corrPlot <- corrPlot + 
      theme(axis.line = element_line(colour=axisColor))
  }
  if (!is.null(minorgrid)) {
    corrPlot <- corrPlot + 
      theme(panel.grid.minor = minorgrid)
  }
  if (!is.null(majorgrid)) {
    corrPlot <- corrPlot + 
      theme(panel.grid.major = majorgrid)
  }
  if (hideLegend) {
    corrPlot <- corrPlot + 
      guides(fill=FALSE)
  }
  # ---------------------------------------------------------
  # Check whether ggplot object should be returned or plotted
  # ---------------------------------------------------------
  if (returnPlot) {
    return(corrPlot)
  }
  else {
    plot(corrPlot)
  }
}
back to top