https://github.com/cran/robCompositions
Tip revision: 325444901c2227c36bea9ac4bd62ffcee2b6bc95 authored by Matthias Templ on 06 May 2015, 17:22:25 UTC
version 1.9.1
version 1.9.1
Tip revision: 3254449
outCoDa.R
#' Outlier detection for compositional data
#'
#' Outlier detection for compositional data using standard and robust
#' statistical methods.
#'
#' The outlier detection procedure is based on (robust) Mahalanobis distances
#' after a isometric logratio transformation of the data. Observations with
#' squared Mahalanobis distance greater equal a certain quantile of the
#' Chi-squared distribution are marked as outliers.
#'
#' If method \dQuote{robust} is chosen, the outlier detection is based on the
#' homogeneous majority of the compositional data set. If method
#' \dQuote{standard} is used, standard measures of location and scatter are
#' applied during the outlier detection procedure.
#'
#' @param x compositional data
#' @param quantile quantile, corresponding to a significance level, is used as
#' a cut-off value for outlier identification: observations with larger
#' (squared) robust Mahalanobis distance are considered as potential outliers.
#' @param method either \dQuote{robust} (default) or \dQuote{standard}
#' @param h the size of the subsets for the robust covariance estimation
#' according the MCD-estimator for which the determinant is minimized (the
#' default is (n+p+1)/2).
#' @return \item{mahalDist }{resulting Mahalanobis distance} \item{limit
#' }{quantile of the Chi-squared distribution} \item{outlierIndex }{logical
#' vector indicating outliers and non-outliers} \item{method }{method used}
#' @note It is highly recommended to use the robust version of the procedure.
#' @author Matthias Templ, Karel Hron
#' @seealso \code{\link{isomLR}}
#' @references Egozcue J.J., V. Pawlowsky-Glahn, G. Mateu-Figueras and C.
#' Barcel'o-Vidal (2003) Isometric logratio transformations for compositional
#' data analysis. \emph{Mathematical Geology}, \bold{35}(3) 279-300. \
#'
#' Filzmoser, P., and Hron, K. (2008) Outlier detection for compositional data
#' using robust methods. \emph{Math. Geosciences}, \bold{40} 233-248.\
#'
#' Rousseeuw, P.J., Van Driessen, K. (1999) A fast algorithm for the minimum
#' covariance determinant estimator. \emph{Technometrics}, \bold{41} 212-223.
#' @keywords multivariate
#' @examples
#'
#' data(expenditures)
#' oD <- outCoDa(expenditures)
#' oD
#'
outCoDa <- function(x, quantile=0.975, method="robust", h=1/2){
if(dim(x)[2] < 2) stop("need data with at least 2 variables")
covEst <- function(x, type) {
standard <- function(x){
list(mean=colMeans(x, na.rm=TRUE), varmat=cov(x))
}
robust <- function(x){
v <- covMcd(x)
list(mean=v$center, varmat=v$cov)
}
switch(type,
standard = standard(x),
robust = robust(x))
}
z <- isomLR(x)
cv <- covEst(z, method)
dM <- sqrt(mahalanobis(z, center=cv$mean, cov=cv$varmat))
limit <- sqrt(qchisq(p=quantile, df=ncol(x)-1))
res <- list(mahalDist = dM, limit = limit,
outlierIndex = dM > limit, method=method)
class(res) <- "outCoDa"
invisible(res)
}