Raw File
sjPlotStackFrequencies.R
#' @title Plot stacked proportional bars
#' @name sjp.stackfrq
#'
#' @description Plot items (variables) of a scale as stacked proportional bars. This
#'                function is useful when several items with identical scale/categoroies
#'                should be plotted to compare the distribution of answers.
#'
#' @note Thanks to \href{http://www.clas.ufl.edu/users/forrest/}{Forrest Stevens} for bug fixes.
#'
#' @param items Data frame, with each column representing one item.
#' @param sort.frq Indicates whether the \code{items} should be ordered by
#'          by highest count of first or last category of \code{items}.
#'          \describe{
#'            \item{\code{"first.asc"}}{to order ascending by lowest count of first category,}
#'            \item{\code{"first.desc"}}{to order descending by lowest count of first category,}
#'            \item{\code{"last.asc"}}{to order ascending by lowest count of last category,}
#'            \item{\code{"last.desc"}}{to order descending by lowest count of last category,}
#'            \item{\code{NULL}}{(default) for no sorting.}
#'          }
#' @param show.prc Logical, if \code{TRUE} (default), the percentage values at the x-axis are shown.
#'
#' @return A ggplot-object.
#'
#' @inheritParams sjp.grpfrq
#' @inheritParams sjp.frq
#' @inheritParams plot_model
#'
#' @examples
#' # Data from the EUROFAMCARE sample dataset
#' library(sjmisc)
#' data(efc)
#' # recveive first item of COPE-index scale
#' start <- which(colnames(efc) == "c82cop1")
#' # recveive first item of COPE-index scale
#' end <- which(colnames(efc) == "c90cop9")
#' # auto-detection of labels
#' sjp.stackfrq(efc[, start:end])
#'
#'
#' @import ggplot2
#' @importFrom dplyr group_by mutate arrange
#' @importFrom scales percent
#' @importFrom stats na.omit xtabs
#' @importFrom rlang .data
#' @export
sjp.stackfrq <- function(items,
                         title = NULL,
                         legend.title = NULL,
                         legend.labels = NULL,
                         axis.titles = NULL,
                         axis.labels = NULL,
                         weight.by = NULL,
                         sort.frq = NULL,
                         wrap.title = 50,
                         wrap.labels = 30,
                         wrap.legend.title = 30,
                         wrap.legend.labels = 28,
                         geom.size = 0.5,
                         geom.colors = "Blues",
                         show.values = TRUE,
                         show.n = TRUE,
                         show.prc = TRUE,
                         show.legend = TRUE,
                         grid.breaks = 0.2,
                         expand.grid = FALSE,
                         digits = 1,
                         vjust = "center",
                         coord.flip = TRUE) {

  # check param. if we have a single vector instead of
  # a data frame with several items, convert vector to data frame

  if (!is.data.frame(items) && !is.matrix(items)) items <- as.data.frame(items)

  # copy titles

  if (is.null(axis.titles)) {
    axisTitle.x <- NULL
    axisTitle.y <- NULL
  } else {
    axisTitle.x <- axis.titles[1]
    if (length(axis.titles) > 1)
      axisTitle.y <- axis.titles[2]
    else
      axisTitle.y <- NULL
  }

  # check sorting

  if (!is.null(sort.frq)) {
    if (sort.frq == "first.asc") {
      sort.frq  <- "first"
      reverseOrder <- FALSE
    } else if (sort.frq == "first.desc") {
      sort.frq  <- "first"
      reverseOrder <- TRUE
    } else if (sort.frq == "last.asc") {
      sort.frq  <- "last"
      reverseOrder <- TRUE
    } else if (sort.frq == "last.desc") {
      sort.frq  <- "last"
      reverseOrder <- FALSE
    } else {
      sort.frq  <- NULL
      reverseOrder <- FALSE
    }
  } else {
    reverseOrder <- FALSE
  }

  # try to automatically set labels if not passed as parameter

  if (is.null(legend.labels))
    legend.labels <- sjlabelled::get_labels(
      items[[1]],
      attr.only = F,
      values = NULL,
      non.labelled = T
    )

  if (is.null(axis.labels)) {
    axis.labels <- sjlabelled::get_label(items, def.value = colnames(items))
  }

  # unname labels, if necessary, so we have a simple
  # character vector

  if (!is.null(names(axis.labels))) axis.labels <- as.vector(axis.labels)

  # unname labels, if necessary, so we have a simple
  # character vector

  if (!is.null(legend.labels) && !is.null(names(legend.labels))) legend.labels <- as.vector(legend.labels)

  # if we have no legend labels, we iterate all data frame's
  # columns to find all unique items of the data frame.
  # In case one item has missing categories, this may be
  # "compensated" by looking at all items, so we have the
  # actual values of all items.

  if (is.null(legend.labels)) {
    legend.labels <- as.character(sort(unique(unlist(
      apply(items, 2, function(x) unique(stats::na.omit(x)))))))
  }

  # Check whether N of each item should be included into
  # axis labels

  if (show.n) {
    for (i in seq_len(length(axis.labels))) {
      axis.labels[i] <- paste(axis.labels[i],
                              sprintf(" (n=%i)", length(stats::na.omit(items[[i]]))),
                              sep = "")
    }
  }

  # if we have legend labels, we know the exact
  # amount of groups

  countlen <- length(legend.labels)

  # create cross table for stats, summary etc.
  # and weight variable. do this for each item that was
  # passed as parameter

  mydat <- c()

  # determine minimum value. if 0, add one, because
  # vector indexing starts with 1

  if (any(apply(items, c(1, 2), is.factor)) || any(apply(items, c(1, 2), is.character))) {
    diff <- ifelse(min(apply(items, c(1, 2), as.numeric), na.rm = TRUE) == 0, 1, 0)
  } else {
    diff <- ifelse(min(items, na.rm = TRUE) == 0, 1, 0)
  }

  # iterate item-list
  for (i in seq_len(ncol(items))) {
    # get each single items
    variable <- items[[i]]

    # create proportional table so we have the percentage
    # values that should be used as y-value for the bar charts
    # We now have a data frame with categories, group-association
    # and percentage values (i.e. each cell as separate row in the
    # data frame)

    # check whether counts should be weighted or not
    if (is.null(weight.by)) {
      df <- as.data.frame(prop.table(table(variable)))
    } else {
      df <- as.data.frame(prop.table(round(stats::xtabs(weight.by ~ variable), 0)))
    }

    # give columns names
    names(df) <- c("var", "prc")

    # need to be numeric, so percentage values (see below) are
    # correctly assigned, i.e. missing categories are considered
    df$var <- sjlabelled::as_numeric(df$var, keep.labels = F) + diff # if categories start with zero, fix this here

    # Create a vector of zeros
    prc <- rep(0, countlen)

    # Replace the values in prc for those indices which equal df$var
    prc[df$var] <- df$prc

    # create new data frame. We now have a data frame with all
    # variable categories abd their related percentages, including
    # zero counts, but no(!) missings!
    mydf <- data.frame(grp = i, cat = seq_len(countlen), prc)

    # now, append data frames
    mydat <- data.frame(rbind(mydat, mydf))
  }

  # make sure group and count variable
  # are factor values

  mydat$grp <- as.factor(mydat$grp)
  mydat$cat <- as.factor(mydat$cat)

  # add half of Percentage values as new y-position for stacked bars
  mydat <- mydat %>%
    dplyr::group_by(.data$grp) %>%
    dplyr::mutate(ypos = cumsum(prc) - 0.5 * prc) %>%
    dplyr::arrange(.data$grp)

  # Prepare and trim legend labels to appropriate size

  # wrap legend text lines
  legend.labels <- sjmisc::word_wrap(legend.labels, wrap.legend.labels)

  # check whether we have a title for the legend
  # if yes, wrap legend title line
  if (!is.null(legend.title)) legend.title <- sjmisc::word_wrap(legend.title, wrap.legend.title)

  # check length of diagram title and split longer string at into new lines
  # every 50 chars
  if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title)

  # check length of x-axis-labels and split longer strings at into new lines
  # every 10 chars, so labels don't overlap
  if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels)

  # Check if ordering was requested

  if (!is.null(sort.frq)) {
    # order by first cat
    if (sort.frq == "first") {
      facord <- order(mydat$prc[which(mydat$cat == 1)])
    } else {
      # order by last cat
      facord <- order(mydat$prc[which(mydat$cat == countlen)])
    }

    # create dummy vectors from 1 to itemlength
    dummy1 <- dummy2 <- seq_len(length(facord))

    # facords holds the ordered item indices! we now need to
    # change the original item-index with its ordered position index.
    # example:
    # we have 4 items, and they may be ordered like this:
    # 1 3 4 2
    # so the first item is the one with the lowest count , item 3 is on second postion,
    # item 4 is on third position and item 2 is the last item (with highest count)
    # we now need their order as subsequent vector: 1 4 2 3
    # (i.e. item 1 is on first pos, item 2 is on fourth pos, item 3 is on
    # second pos and item 4 is on third pos in order)

    if (reverseOrder) {
      dummy2[rev(facord)] <- dummy1
    } else {
      dummy2[facord] <- dummy1
    }

    # now we have the order of either lowest to highest counts of first
    # or last category of "items". We now need to repeat these values as
    # often as we have answer categories
    orderedrow <- unlist(tapply(dummy2, seq_len(length(dummy2)), function(x) rep(x, countlen)))

    # replace old grp-order by new order
    mydat$grp <- as.factor(orderedrow)

    # reorder axis labels as well
    axis.labels <- axis.labels[order(dummy2)]
  }


  # check if category-oder on x-axis should be reversed
  # change category label order then

  if (reverseOrder && is.null(sort.frq)) axis.labels <- rev(axis.labels)

  # set diagram margins

  if (expand.grid) {
    expgrid <- waiver()
  } else {
    expgrid <- c(0, 0)
  }

  # Set value labels and label digits

  mydat$digits <- digits
  if (show.values) {
    ggvaluelabels <-  geom_text(
      aes(y = .data$ypos, label = sprintf("%.*f%%", .data$digits, 100 * .data$prc)),
      vjust = vjust
    )
  } else {
    ggvaluelabels <-  geom_text(aes(y = .data$ypos), label = "")
  }

  # Set up grid breaks

  if (is.null(grid.breaks)) {
    gridbreaks <- waiver()
  } else {
    gridbreaks <- c(seq(0, 1, by = grid.breaks))
  }

  # check if category-oder on x-axis should be reversed
  # change x axis order then

  if (reverseOrder && is.null(sort.frq)) {
    baseplot <- ggplot(mydat, aes(x = rev(.data$grp), y = .data$prc, fill = .data$cat))
  } else {
    baseplot <- ggplot(mydat, aes(x = .data$grp, y = .data$prc, fill = .data$cat))
  }

  baseplot <- baseplot +
    # plot bar chart
    geom_bar(stat = "identity", position = position_stack(reverse = TRUE), width = geom.size)

  # show/hide percentage values on x axis

  if (show.prc)
    perc.val <- scales::percent
  else
    perc.val <- NULL

  # start plot here

  baseplot <- baseplot +
    # show absolute and percentage value of each bar.
    ggvaluelabels +
    # no additional labels for the x- and y-axis, only diagram title
    labs(title = title, x = axisTitle.x, y = axisTitle.y, fill = legend.title) +
    # print value labels to the x-axis.
    # If parameter "axis.labels" is NULL, the category numbers (1 to ...)
    # appear on the x-axis
    scale_x_discrete(labels = axis.labels) +
    # set Y-axis, depending on the calculated upper y-range.
    # It either corresponds to the maximum amount of cases in the data set
    # (length of var) or to the highest count of var's categories.
    scale_y_continuous(breaks = gridbreaks,
                       limits = c(0, 1),
                       expand = expgrid,
                       labels = perc.val)
  # check whether coordinates should be flipped, i.e.
  # swap x and y axis
  if (coord.flip) baseplot <- baseplot + coord_flip()

  # set geom colors

  sj.setGeomColors(
    baseplot,
    geom.colors,
    length(legend.labels),
    show.legend,
    legend.labels
  )
}
back to top