Raw File
summaryP.s
summaryP <- function(formula, data=NULL,
                     subset=NULL, na.action=na.retain,
                     exclude1=TRUE, sort=TRUE,
                     asna=c('unknown', 'unspecified'), ...) {
  
  formula <- Formula(formula)

  Y <- if(length(subset))
    model.frame(formula, data=data, subset=subset, na.action=na.action)
  else
    model.frame(formula, data=data, na.action=na.action)
  X <- model.part(formula, data=Y, rhs=1)
  Y <- model.part(formula, data=Y, lhs=1)
  nY <- NCOL(Y)
  nX <- NCOL(X)
  namY <- names(Y)
  if(nX == 0) X <- data.frame(x=rep(1, NROW(Y)))
  else {
    ## Remove observations with any values of X NA
    i <- apply(is.na(X), 1, any)
    if(any(i)) {
      X <- X[! i,, drop=FALSE]
      Y <- Y[! i,, drop=FALSE]
    }
  }
  ux <- unique(X)
  Z <- NULL
  n <- nrow(X)
  
  if(sort) {
    ## Compute marginal frequencies of all regular variables so can sort
    mfreq <- list()
    for(ny in namY) {
      y <- Y[[ny]]
      if(!inherits(y, 'ynbind') && !inherits(y, 'pBlock')) {
        if(length(asna) && (is.factor(y) || is.character(y)))
          y[y %in% asna] <- NA
        freq <- table(y)
        counts        <- as.numeric(freq)
        names(counts) <- names(freq)
        mfreq[[ny]]   <- - sort(- counts)
      }
    }
  }
  for(i in 1 : nrow(ux)) {
    j <- rep(TRUE, n)
    if(nX > 0) for(k in 1 : nX) j <- j & (X[[k]] == ux[i, k])
    for(k in 1 : nY) {
      ## y <- yx[[k]] doesn't work as attributes lost by [.data.frame
      y <- Y[[k]]
      y <- if(is.matrix(y)) y[j,, drop=FALSE] else y[j]
#      y <- (Y[[k]])[j,, drop=FALSE]
      if(inherits(y, 'ynbind') || inherits(y, 'pBlock')) {
        overlab <- attr(y, 'label')
        labs    <- attr(y, 'labels')
        z <- NULL
        for(iy in 1 : ncol(y)) {
          tab <- table(y[, iy])
          no <- as.numeric(sum(tab))
          d <- if(inherits(y, 'ynbind'))
            data.frame(var=overlab,
                       val=labs[iy],
                       freq=as.numeric(tab['TRUE']),
                       denom=no)
          else
            data.frame(var=overlab,
                       val=names(tab),  # paste(labs[iy], names(tab)),
                       freq=as.numeric(tab),
                       denom=no)
          z <- rbind(z, d)
        }
      }
      else {  # regular single column
        if(length(asna) && (is.factor(y) || is.character(y)))
          y[y %in% asna] <- NA
        tab <- table(y)
        ny <- namY[k]
        la  <- label(y)
        if(la == '') la <- ny
        lev <- names(tab)
        mf <- mfreq[[ny]]
        no <- as.numeric(sum(tab))
        if(exclude1 && length(mf) == 2) {
          lowest <- names(which.min(mf))
          z <- data.frame(var=la, val=lowest,
                          freq=as.numeric(tab[lowest]),
                          denom=no)
        }
        else {
          if(sort) lev <- reorder(lev, (mfreq[[ny]])[lev])
          z <- data.frame(var=la, val=lev,
                          freq=as.numeric(tab),
                          denom=no)
        }
      }
      ## Add current X subset settings
      if(nX > 0) for(k in 1: nX) z[[names(ux)[k]]] <- ux[i, k]
      Z <- rbind(Z, z)
    }
  }
  structure(Z, class=c('summaryP', 'data.frame'), formula=formula,
            nX=nX, nY=nY)
}

plot.summaryP <-
  function(x, formula=NULL, groups=NULL, xlim=c(-.05, 1.05), text.at=NULL,
           cex.values=0.5,
           key=list(columns=length(groupslevels),
             x=.75, y=-.04, cex=.9,
             col=trellis.par.get('superpose.symbol')$col, corner=c(0,1)),
           outerlabels=TRUE, autoarrange=TRUE, ...)
{
  X <- x
  at   <- attributes(x)
  Form <- at$formula
  nX   <- at$nX
  nY   <- at$nY

  groupslevels <- if(length(groups)) levels(x[[groups]])
  condvar <- setdiff(names(X), c('val', 'freq', 'denom', groups))
  ## Reorder condvar in descending order of number of levels
  numu <- function(x) if(is.factor(x)) length(levels(x))
                       else length(unique(x[! is.na(x)]))

  if(autoarrange && length(condvar) > 1) {
    nlev <- sapply(X[condvar], numu)
    condvar <- condvar[order(nlev)]
  }
  form <- if(length(formula)) formula
  else as.formula(
    paste('val ~ freq',
          paste(condvar, collapse=' * '), sep=' | '))
  
  pan <- function(x, y, subscripts, groups=NULL, ...) {
    y <- as.numeric(y)
    denom <- X$denom[subscripts]
    panel.dotplot(x/denom, y, subscripts=subscripts, groups=groups, ...)
    if(length(cex.values) && cex.values > 0) {
      col <- if(length(groups)) trellis.par.get('superpose.symbol')$col
       else trellis.par.get('dot.symbol')$col

      longest.string <- paste(max(x), max(denom), sep='/  ')
      length.longest <- unit(1, 'strwidth', longest.string)
      xpos <- unit(1, 'npc') - unit(1, 'mm')
      txt <- if(length(groups)) {
        groups <- groups[subscripts]
        tx <- ''
        ig <- 0
        xpos <- xpos - length(levels(groups)) * length.longest
        for(g in levels(groups)) {
          ig <- ig + 1
          i <- groups == g
          fr <- paste(x[i], denom[i], sep='/')
          xpos <- xpos + length.longest
          grid.text(fr, xpos, unit(y, 'native') - unit(1, 'mm'),
                    just=c('right','top'), gp=gpar(cex=cex.values, col=col[ig]))
        }
      }
      else {
        fr <- paste(x, denom, sep='/')
        grid.text(fr, xpos, unit(y, 'native') - unit(1, 'mm'),
                  gp=gpar(cex=cex.values, col=col[1]), just=c('right','top'))
      }
    }
  }

  scal <- list(y='free', rot=0)
  scal$x <- if(length(text.at)) {
    at <- pretty(xlim)
    list(limits=range(c(xlim, text.at)), at=at[at >= -0.0001 & at <= 1.0001])
  } else list(limits=xlim)
  d <- if(!length(groups))
    dotplot(form, data=X, scales=scal, panel=pan,
            xlab='Proportion', ...)
  else eval(parse(text=
                  sprintf("dotplot(form, groups=%s, data=X, scales=scal, panel=pan, auto.key=key, xlab='Proportion', ...)", groups) ))

#  if(outerlabels && ((nX - length(groups) + 1 == 2) ||
#                     length(dim(d)) == 2))  d <- useOuterStrips(d)
  if(length(dim(d)) == 2) d <- useOuterStrips(d)
  ## Avoid wasting space for vertical variables with few levels
  if(condvar[length(condvar)] == 'var') {
    vars <- levels(X$var)
    nv <- length(vars)
    h <- integer(nv)
    for(i in 1 : nv) h[i] <- length(unique((X$val[X$var == vars[i]])))
    d <- resizePanels(d, h = h + 1)
  }
  d
}

ggplot.summaryP <-
  function(data, groups=NULL, xlim=c(0, 1),
           col=NULL, shape=NULL, autoarrange=TRUE, addlayer=NULL, ...)
{
  X <- data
  class(X) <- setdiff(class(X), 'summaryP')
  at   <- attributes(X)
  Form <- at$formula
  nX   <- at$nX
  nY   <- at$nY

  groupslevels <- if(length(groups)) levels(X[[groups]])
  condvar <- setdiff(names(X), c('val', 'freq', 'denom', groups))
  ## Reorder condvar in descending order of number of levels
  numu <- function(x) if(is.factor(x)) length(levels(x))
                       else length(unique(x[! is.na(x)]))

  if(autoarrange && length(condvar) > 1) {
    nlev <- sapply(X[condvar], numu)
    condvar <- condvar[order(nlev)]
  }

  ## Find list of variables that contain only one level but have a
  ## variable name that is longer than 5 characters.  The space devoted
  ## to one-level variables is not tall enough to print the variable name.
  ## Replace the name with (1) (2) ... and put the variable names possibly
  ## in a footnote

  fnvar <- ''
  lvar <- levels(X$var)
  i <- 0
  for(v in lvar) {
    if(nchar(v) > 5) {
      nlev <- length(unique(X$val[X$var == v]))
      if(nlev == 1) {
        i <- i + 1
        w <- paste('(', i, ')', sep='')
        if(i > 1) fnvar <- paste(fnvar, '; ', sep='')
        fnvar <- paste(fnvar, w, ' ', v, sep='')
        levels(X$var)[levels(X$var) == v] <- w
      }
    }
  }
  
  spl <- function(x) {
    u <- levels(x)
    n <- length(u)
    utrans <- character(n); names(utrans) <- u
    for(w in u)
      utrans[w] <- paste(strwrap(w, 10), collapse='\n')
    factor(x, u, utrans)
  }
  X$var <- spl(X$var)
  if(length(condvar) == 2) {
    othvar <- setdiff(condvar, 'var')
    X[[othvar]] <- spl(X[[othvar]])
  }

  k <- 'ggplot(X, aes(x=freq / denom, y=val'
  if(length(groups)) k <- paste(k, sprintf(', color=%s, shape=%s',
                                           groups, groups))
  k <- paste(k, '))')
  p <- eval(parse(text=k)) + geom_point()
  if(length(addlayer)) p <- p + addlayer
  if('var' %nin% condvar) stop('program logic error')
  if(length(condvar) == 1)
    p <- p + facet_grid(var ~ . , scales='free_y', space='free_y')
  else {
    p <- p + facet_grid(as.formula(sprintf('var ~ %s', othvar)),
                        scales='free_y', space='free_y')
  }
  p <- p + xlim(xlim) + xlab('Proportion') + ylab('')
  if(length(col))   p <- p + scale_color_manual(values=col)
  if(length(shape)) p <- p + scale_shape_manual(values=shape)
  
  if(fnvar != '') attr(p, 'fnvar') <- fnvar
  p
}


latex.summaryP <- function(object, groups=NULL, file='', round=3,
                           size=NULL, append=TRUE, ...) {
  class(object) <- 'data.frame'
  if(! append) cat('', file=file)

  p <- ifelse(object$denom == 0, '',
              format(round(object$freq / object$denom, round)))
  object$y <- paste(p, ' {\\scriptsize$\\frac{',
                    format(object$freq), '}{', format(object$denom),
                    '}$}', sep='')
  object$freq <- object$denom <- NULL

  stratvar <- setdiff(names(object), c('var', 'val', 'y', groups))
  svar <- if(! length(stratvar)) as.factor(rep('', nrow(object)))
   else {
     if(length(stratvar) == 1) object[[stratvar]]
      else do.call('interaction', list(object[stratvar], sep=' '))
   }

  object$stratvar <- svar
  object <- object[, c('var', 'val', 'y', groups, 'stratvar')]

  nl <- 0

  slev <- levels(svar)
  nslev <- length(slev)
  for(i in 1 : nslev) {
    
    if(nslev > 1) cat('\n\\vspace{1ex}\n\n\\textbf{', slev[i],
                      '}\n\\vspace{1ex}\n\n', sep='', file=file, append=TRUE)
    x <- object[svar == slev[i], colnames(object) != 'stratvar']
    if(length(groups)) {
      r <- reshape(x, timevar=groups, direction='wide',
                   idvar=c('var', 'val'))
      ## reshape does not respect order of levels of factor variables; reorder
      lev <- levels(x[[groups]])
      r <- r[c('var', 'val', paste('y', lev, sep='.'))]
      
      nl  <- length(lev)
      var <- unique(as.character(r$var))
      w <- latex(r[colnames(r) != 'var'],
                 table.env=FALSE, file=file, append=TRUE,
                 rowlabel='', rowname=rep('', nrow(r)),
                 rgroup=levels(r$var), n.rgroup=as.vector(table(r$var)),
                 size=size,
                 colheads=c(' ', lev),
                 center='none')
    }
    else {
      w <- latex(x[colnames(x) != 'var'],
                 table.env=FALSE, file=file, append=TRUE,
                 rowlabel='', rowname=rep('', nrow(x)),
                 rgroup=levels(x$var), n.rgroup=as.vector(table(x$var)),
                 size=size, colheads=c(' ', ' '), center='none')
    }
  }
  attr(w, 'ngrouplevels') <- nl
  attr(w, 'nstrata') <- nslev
  w
}
back to top