https://github.com/cran/robCompositions
Tip revision: 325444901c2227c36bea9ac4bd62ffcee2b6bc95 authored by Matthias Templ on 06 May 2015, 17:22:25 UTC
version 1.9.1
version 1.9.1
Tip revision: 3254449
missPatterns.R
#' missing or zero pattern structure.
#'
#' Analysis of the missing or zero patterns structure of a data set.
#'
#' Here, one pattern defines those observations that have the same structure
#' regarding their missingness or zeros. For all patterns a summary is
#' calculated.
#'
#' @aliases missPatterns zeroPatterns
#' @param x a data frame or matrix.
#' @return \item{groups }{List of the different patterns and the observation
#' numbers for each pattern} \item{cn }{the names of the patterns coded as
#' vectors of 0-1's} \item{tabcomb}{the pattern structure - all combinations of
#' zeros or missings in the variables} \item{tabcombPlus}{the pattern structure
#' - all combinations of zeros or missings in the variables including the size
#' of those combinations/patterns, i.e. the number of observations that belongs
#' to each pattern.} \item{rsum}{the number of zeros or missing values in each
#' row of the data set}
#' @author Matthias Templ. The code is based on a previous version from Andreas
#' Alfons and Matthias Templ from package VIM
#' @seealso \code{\link[VIM]{aggr}}
#' @keywords multivariate
#' @examples
#'
#' data(expenditures)
#' ## set NA's artificial:
#' expenditures[expenditures < 300] <- NA
#' ## detect the NA structure:
#' missPatterns(expenditures)
#'
missPatterns <- function(x){
# identification of the missing pattern structure
# Matthias Templ, Oct 10, 2011
if(is.null(dim(x))) stop("the data set has to be consist of at least two variables")
w <- is.na(x)
tmp <- ifelse(is.na(x), 1, 0) # 'ifelse' does not omit 'dim' attribute
tmpC <- apply(tmp, 1, paste, collapse=":")
tab <- table(tmpC)
tabcomb <- sapply(names(tab),
function(x) as.integer(unlist(strsplit(x, ":", fixed=TRUE))),
USE.NAMES=FALSE)
tabcomb <- if(is.null(dim(tabcomb))) as.matrix(tabcomb) else t(tabcomb)
tabcomb <- ifelse(tabcomb==0,TRUE,FALSE)
cn <- names(tab)
groups <- sapply(cn, function(y){
(which(tmpC %in% y))
})
## Karels beiden MUSS-Variablen ;-):
csum <- lapply(groups, length)
amountComb <- cbind(data.frame(tabcomb), csum=as.numeric(csum))
rsum <- apply(w, 1, sum)
## TODO: N variable dazu, + 2. zeilenweise, spaltenweise
list(groups=groups, cn=cn, tabcomb=tabcomb, tabcombPlus=amountComb, rsum=rsum)
}
zeroPatterns <- function(x){
# identification of the zero pattern structure
# Matthias Templ, Oct 10, 2011
if(is.null(dim(x))) stop("the data set has to be consist of at least two variables")
w <- x == 0
tmp <- ifelse(x==0, 1, 0) # 'ifelse' does not omit 'dim' attribute
tmpC <- apply(tmp, 1, paste, collapse=":")
tab <- table(tmpC)
tabcomb <- sapply(names(tab),
function(x) as.integer(unlist(strsplit(x, ":", fixed=TRUE))),
USE.NAMES=FALSE)
tabcomb <- if(is.null(dim(tabcomb))) as.matrix(tabcomb) else t(tabcomb)
tabcomb <- ifelse(tabcomb==0,TRUE,FALSE)
cn <- names(tab)
groups <- sapply(cn, function(y){
(which(tmpC %in% y))
})
## Karels beiden MUSS-Variablen ;-):
csum <- lapply(groups, length)
amountComb <- cbind(data.frame(tabcomb), csum=as.numeric(csum))
rsum <- apply(w, 1, sum)
## TODO: N variable dazu, + 2. zeilenweise, spaltenweise
list(groups=groups, cn=cn, tabcomb=tabcomb, tabcombPlus=amountComb, rsum=rsum)
}