Revision 9ae1e67cb5e13c5bbc731a5fc56a0053182f16b0 authored by Matthias Templ on 11 February 2020, 16:20:02 UTC, committed by cran-robot on 11 February 2020, 16:20:02 UTC
1 parent 775c7e1
Raw File
plot.imp.R
#' Plot method for objects of class imp
#' 
#' This function provides several diagnostic plots for the imputed data set in
#' order to see how the imputated values are distributed in comparison with the
#' original data values.
#' 
#' The first plot (which \eqn{== 1}) is a multiple scatterplot where for the
#' imputed values another plot symbol and color is used in order to highlight
#' them. Currently, the ggpairs functions from the GGally package is used.
#' 
#' Plot 2 is a parallel coordinate plot in which imputed values in certain
#' variables are highlighted.  In parallel coordinate plots, the variables are
#' represented by parallel axes.  Each observation of the scaled data is shown
#' as a line.  If interactive is TRUE, the variables to be used for
#' highlighting can be selected interactively. Observations which includes
#' imputed values in any of the selected variables will be highlighted.  A
#' variable can be added to the selection by clicking on a coordinate axis.  If
#' a variable is already selected, clicking on its coordinate axis will remove
#' it from the selection. Clicking anywhere outside the plot region quits the
#' interactive session.
#' 
#' Plot 3 shows a ternary diagram in which imputed values are highlighted, i.e.
#' those spikes of the chosen plotting symbol are colored in red for which of
#' the values are missing in the unimputed data set.
#' 
#' @param x object of class \sQuote{imp}
#' @param \dots other parameters to be passed through to plotting functions.
#' @param which if a subset of the plots is required, specify a subset of the
#' numbers 1:3.
#' @param ord determines the ordering of the variables
#' @param colcomb if colcomb\eqn{=}\dQuote{missnonmiss}, observations with
#' missings in any variable are highlighted. Otherwise, observations with
#' missings in any of the variables specified by colcomb are highlighted in the
#' parallel coordinate plot.
#' @param plotvars Parameter for the parallel coordinate plot. A vector giving
#' the variables to be plotted.  If NULL (the default), all variables are
#' plotted.
#' @param col a vector of length two giving the colors to be used in the plot.
#' The second color will be used for highlighting.
#' @param alpha a numeric value between 0 and 1 giving the level of
#' transparency of the colors, or NULL. This can be used to prevent
#' overplotting.
#' @param lty a vector of length two giving the line types.  The second line
#' type will be used for the highlighted observations.  If a single value is
#' supplied, it will be used for both non-highlighted and highlighted
#' observations.
#' @param xaxt the x-axis type (see \code{\link{par}}).
#' @param xaxlabels a character vector containing the labels for the x-axis.
#' If NULL, the column names of x will be used.
#' @param las the style of axis labels (see \code{\link{par}}).
#' @param interactive a logical indicating whether the variables to be used for
#' highlighting can be selected interactively (see \sQuote{Details}).
#' @param pch a vector of length two giving the symbol of the plotting points.
#' The symbol will be used for the highlighted observations.  If a single value
#' is supplied, it will be used for both non-highlighted and highlighted
#' observations.
#' @param ask logical; if TRUE, the user is asked before each plot, see
#' \code{\link{par}}(ask=.).
#' @param center logical, indicates if the data should be centered prior
#' plotting the ternary plot.
#' @param scale logical, indicates if the data should be centered prior
#' plotting the ternary plot.
#' @param id reads the position of the graphics pointer when the (first) mouse
#' button is pressed and returns the corresponding index of the observation.
#' (only used by the ternary plot)
#' @param seg.l length of the plotting symbol (spikes) for the ternary plot.
#' @param seg1 if TRUE, the spikes of the plotting symbol are justified.
#' @return None (invisible NULL).
#' @author Matthias Templ
#' @seealso \code{\link{impCoda}}, \code{\link{impKNNa}}
#' @references Aitchison, J. (1986) \emph{The Statistical Analysis of
#' Compositional Data} Monographs on Statistics and Applied Probability.
#' Chapman \& Hall Ltd., London (UK). 416p.
#' 
#' Wegman, E. J. (1990) \emph{Hyperdimensional data analysis using parallel
#' coordinates} Journal of the American Statistical Association 85, 664--675.
#' @keywords aplot hplot
#' @export
#' @importFrom GGally ggpairs
#' @importFrom MASS parcoord
#' @method plot imp
#' @examples
#' 
#' data(expenditures)
#' expenditures[1,3]
#' expenditures[1,3] <- NA
#' xi <- impKNNa(expenditures)
#' xi
#' summary(xi)
#' \dontrun{plot(xi, which=1)}
#' plot(xi, which=2)
#' plot(xi, which=3)
#' plot(xi, which=3, seg1=FALSE)
#' 
plot.imp <-
function (x, ..., which=1, ord=1:ncol(x),
          colcomb = "missnonmiss", plotvars = NULL,
          col = c("skyblue", "red"), alpha = NULL,
          lty = par("lty"), xaxt = "s", 
          xaxlabels = NULL,
          las = 3, interactive = TRUE,  pch = c(1, 3),
#          smooth = FALSE, reg.line = FALSE,
#          legend.plot = FALSE,  
          ask = prod(par("mfcol")) < 
            length(which) && dev.interactive(), 
          center = FALSE, 
          scale=FALSE, id=FALSE,
          seg.l=0.02, seg1=TRUE)
{
  smooth=FALSE
  reg.line=FALSE
  legend.plot=FALSE
  clInfo <- class(x)[1]
	if (clInfo != "imp") 
		stop("use only with objects of class \"imp\" ")
	wind <- x$wind
	x <- x$xImp  
  if (!is.numeric(which) || any(which < 1) || any(which > 3)) 
		stop("'which' must be in 1:3")
  if(length(ord) != ncol(x)) stop(paste("'ord' is a vector of length", length(ord), 
					  "\n but has to be of length", ncol(x)))
  if (ncol(x) < 2) 
	  stop("'x' must be at least 2-dimensional.")
  if (length(colcomb) == 0){ 
	  stop("'colcomb' must have positive length.")
  }	else if(length(colcomb) > ncol(x)){ 
	  stop("'colcomb' is too long.")
  }
  if (length(col) != 2) 
	  stop("'col' must be a vector of length 2.")
  if (length(lty) == 1){
	  lty <- rep(lty, 2)
  } else if(length(lty) != 2){ 
	  stop("'lty' must be a vector of length 1 or 2.")
  }
  if (length(pch) == 1){
	  pch <- rep(pch, 2)
  } else if(length(pch) != 2){ 
	  stop("'pch' must be a vector of length 1 or 2.")
  }
  if(class(seg1) != "logical") stop("'seg1' must be logical")
  if(length(seg1) != 1) stop("'seg1' has to be a vector of length 1") 
  if(length(seg.l) != 1) stop("'seg.l' has to be a vector of length 1") 
  x <- as.data.frame(x)
  x <- x[, ord]
  wind <- wind[, ord]
  show <- rep(FALSE, 3)
  show[which] <- TRUE
  if (ask) {
        oask <- devAskNewPage(TRUE)
        on.exit(devAskNewPage(oask))
  }
  #######################################################################
  ###   tenary plot:
  if(show[3]){
	 if(ncol(x) > 3){ 
		 warning(paste("tenary plot is only visible for 3 variables, you have", 
						 ncol(x),"\n only the first three variables are selected for plotting"))
	     x <- x[,1:3]
		 wind <- wind[,1:3]
	 }
	 if(ncol(x) == 2){ warning("tenary plot is only defined for 3 variables, 
                                \n you have 2")}
	 
	 col <- unclass(col)
	   cn <- colnames(x)
	   s60 <- sin(pi/3)
	   c60 <- cos(pi/3)
	   opar <- par(mar=c(3,2,2,0.5))
       plot(x = c(0, c60, 1, 0), y = c(0, s60, 0, 0), 
			 xlim = c(0, 1), ylim = c(0, 1), type = "n", 
			 xlab = "", ylab = "", axes = FALSE)
		segments(x0 = c(0, 1, c60), y0 = c(0, 0, s60), 
				 x1 = c(1, c60, 0), y1 = c(0, s60, 0))
		mtext(cn[1], side = 1, adj = 0, padj=-2, line = 1.5, cex=1.2)
		mtext(cn[2], side = 1, adj = 1, padj=-2, line = 1.5, cex=1.2)
		text(0.5, s60 * 1.05, cn[3], pos = 3, offset = 0.01, xpd = TRUE, cex=1.2)
		X <- constSum(x, c(1, 2, 3))
		Y <- scale(constSum(X), center = center, scale = scale)
		x2 <- Y[, 2] + Y[, 3] * c60
		y2 <- Y[, 3] * s60
		ind <- apply(wind, 1, any)
		points(x2, y2, col = col[1], pch=pch[1], cex=1)
		points(x2[ind], y2[ind], col = col[2], pch=pch[1], cex=1)
		n <- length(x2)
			ind <- apply(wind, 1, any)
			ind1 <- which(wind[,1])
			ind2 <- which(wind[,2])
			ind3 <- which(wind[,3])
			info <- apply(wind, 2, any)
			## first direction:
			seg.col="red"
			seg.lwd=1
			dir <- function(x,y, k=0.025){
					if(length(x) < 1){ 
						z1s <- z2s <- z3s <- matrix(0, nrow=length(x), ncol=2)
					} else{
						z1s <- z2s <- z3s <- matrix(0, nrow=length(x), ncol=2)
						for(i in 1:length(x)){
						  alpha <- atan(y[i]/x[i])
						  alphaz <- atan(x[i]/y[i])
						  k1s <- k*cos(alpha)
						  k2s <- k*sin(alpha)
						  z1s[i,] <- c(x[i], y[i]) + c(-k*cos(alpha), -k*sin(alpha))
						  z2s[i,] <- c(x[i], y[i]) + c(k*cos(alpha), -k*sin(alpha))
						  z3s[i,] <- if(x[i] < 0.5){ 
							  c(x[i], y[i]) + c(cos(alphaz)*k, sin(alphaz)*k)
						  } else{ 
							  c(x[i], y[i]) + c(-cos(alphaz)*k, sin(alphaz)*k)
						  }
						}
					}
					list(z1=z1s, z2=z2s, z3=z3s)
			}
			dir2 <- function(x,y,k=0.025){
				z1s <- z2s <- z3s <- matrix(0, nrow=length(x), ncol=2)
				z3s <- cbind(x,y) 
				z3s[,2] <- z3s[,2] + k
				z1s <- cbind(x,y) 
				z1s[,1] <- z1s[,1] - k*cos(0.45)
				z1s[,2] <- z1s[,2] - k*sin(0.45)				
				z2s <- cbind(x,y) 
				z2s[,1] <- z2s[,1] + k*cos(0.45)
				z2s[,2] <- z2s[,2] - k*sin(0.45)			
				list(z1=z1s, z2=z2s, z3=z3s)				
			}
			if(seg1) d <- dir(x=x2, y=y2, k=seg.l) else d <- dir2(x=x2, y=y2, k=seg.l)
			
			segments(x1=d$z1[,1], y1=d$z1[,2],
					x0=x2, y0=y2,
					col=col[1], lwd=seg.lwd)	
			segments(x1=d$z2[,1], y1=d$z2[,2],
					x0=x2, y0=y2,
					col=col[1], lwd=seg.lwd)				
			segments(x1=d$z3[,1], y1=d$z3[,2],
							x0=x2, y0=y2,
							col=col[1], lwd=seg.lwd)	
					
			if(info[1]){		
					segments(x1=d$z1[ind1,1], y1=d$z1[ind1,2],
							x0=x2[ind1], y0=y2[ind1],
							col=col[2], lwd=seg.lwd+2)	
				}
			if(info[2]){
					segments(x1=d$z2[ind2,1], y1=d$z2[ind2,2],
							x0=x2[ind2], y0=y2[ind2],
							col=col[2], lwd=seg.lwd+2)	
				}
			if(info[3]){
					segments(x1=d$z3[ind3,1], y1=d$z3[ind3,2],
							x0=x2[ind3], y0=y2[ind3],
							col=col[2], lwd=seg.lwd+2)	
				}
							
		if (id) {
				 if (is.null(idlabs)) 
					 idlabs <- paste(cn[1], "=", round(X[, 1], 2), 
							 ",\n", cn[2], "=", round(X[, 2], 2), ",\n", 
							 cn[3], "=", round(X[, 3], 2))
				 return(identify(x, idlabs, xpd = NA))
		}
		par(opar)
		 return(invisible(NULL))	  
  }

  #######################################################################
  ###   scatterplot matrix
  if(show[1]){
    if (!(inherits(x, c("data.frame", "matrix"))))
        stop("'x' must be a data.frame or matrix.")
    if (ncol(x) < 2)
        stop("'x' must be at least 2-dimensional.")
    if (length(col) != 2)
        stop("'col' must be a vector of length 2.")
    if (length(pch) != 2)
        stop("'pch' must be a vector of length 2.")
    if (!is.null(alpha))
        col <- alphablend(col, alpha)
    x <- as.data.frame(x)
    ind <- which(!unlist(lapply(x, is.numeric)))
    x[, ind] <- sapply(ind, function(i, x) as.numeric(x[, i]),
        x)
    args <- list(smooth = smooth, reg.line = reg.line, legend.plot = legend.plot,
        ...)
    if (!is.null(args$groups)) {
        args$groups <- FALSE
        warning("'groups' will be ignored.")
    }
    if (any(wind)) {
        w <- apply(wind, 2, any)
        #if (length(which(!w)) < 2)
  		#stop("at least two variables without missings are required.")
        args$x <- x
        #navec <- if (colcomb[1] == "missnonmiss")
        navec <- apply(wind, 1, any)
        #else apply(wind[, colcomb, drop = FALSE], 1, any)
        if (all(navec)) {
            args <- c(args, list(col = col, pch = pch[2]))
        }
        else {
            colvec <- ifelse(navec, col[2], col[1])
            args <- c(args, list(col = c(col[1], col), pch = pch,
                groups = as.factor(colvec)))
        }
    }
    else {
        args <- c(list(x = x, col = rep(col[1], 2),
            pch = pch[1]),
            args)
    }
    args$col <- c(args$col[1], args$col[3], args$col[2])
    args$pch <- args$pch[2:1]
    args$diagonal <- "histogram"
    ## new in GGally:
    x <- cbind(x, miss=args$group)
    print(ggpairs(x, colour='miss', alpha=0.4, shape='miss'))
#    do.call(scatterplotMatrix, args)
    invisible()
  }
  #########################################################################
  ###   parallel coordinate plot
  if(show[2]){
    if (!(inherits(x, c("data.frame", "matrix"))))
        stop("'x' must be a data.frame or matrix.")
    if (ncol(x) < 2)
        stop("'x' must be at least 2-dimensional.")
    if (length(colcomb) == 0)
        stop("'colcomb' must have positive length.")
    else if (length(colcomb) > ncol(x))
        stop("'colcomb' is too long.")
    if (length(col) != 2)
        stop("'col' must be a vector of length 2.")
    if (length(lty) == 1)
        lty <- rep(lty, 2)
    else if (length(lty) != 2)
        stop("'lty' must be a vector of length 1 or 2.")
    x <- as.data.frame(x)
    ind <- which(!unlist(lapply(x, is.numeric)))
    x[, ind] <- sapply(ind, function(i, x) as.numeric(x[, i]),
        x)
    if (!is.null(alpha))
        col <- alphablend(col, alpha)
    createPlot <- function() {
        op <- par(xaxt = xaxt, las = las)
        on.exit(par(op))
        z <- if (is.null(plotvars))
            x
        else x[, plotvars]
        if (any(wind) && length(colcomb)) {
            navec <- if (colcomb[1] == "missnonmiss"){
                apply(wind, 1, any)
              } else{ apply(wind[, colcomb, drop = FALSE], 1, any)}
            colvec <- ifelse(navec, col[2], col[1])
            ltyvec <- ifelse(navec, lty[2], lty[1])
            ord <- order(colvec, decreasing = col[2] < col[1])
            z <- z[ord, ]
            cols <- colvec[ord]
            ltys <- ltyvec[ord]
        }
        else {
            cols <- col[1]
            ltys <- lty[1]
        }
        if (!is.null(xaxlabels))
            colnames(z) <- xaxlabels
        parcoord(z, cols, ltys, ...)
    }
    createPlot()
    interactiveDevices <- c("X11", "quartz", "windows")
    dev <- names(dev.cur())
    if (interactive && any(!is.na(charmatch(interactiveDevices,
        dev)))) {
        cat(paste("\nClick on a coordinate axis to highlight",
            "missings of the corresponding variable.\n"))
        cat(paste("To regain use of the VIM GUI and the R console,",
            "click outside the plot region.\n\n"))
        if (is.null(plotvars)) {
            p <- ncol(x)
            plotvars <- colnames(x)
        }
        else {
            p <- length(plotvars)
            plotvars <- colnames(x)[plotvars]
        }
        pt <- locator(1)
        colcomb <- character()
        while (!is.null(pt) && 1 <= pt$x && pt$x < p && 0 <=
            pt$y && pt$y <= 1) {
            i <- round(pt$x)
            colcomb <- if (plotvars[i] %in% colcomb)
                setdiff(colcomb, plotvars[i])
            else c(colcomb, plotvars[i])
            createPlot()
            if (length(colcomb) == 0) {
                cat("No missings highlighted.")
            }
            else if (length(colcomb) == 1) {
                cat(paste("Highlighted imputed values in variable ",
                  colcomb, ".\n", sep = "'"))
            }
            else {
                colcombout <- paste(colcomb, collapse = "', '")
                cat(paste("Highlighted imputed values in variables ",
                  colcombout, ".\n", sep = "'"))
            }
            flush.console()
            pt <- locator(1)
        }
    }
  }
    invisible()
}
back to top