Revision 9ae1e67cb5e13c5bbc731a5fc56a0053182f16b0 authored by Matthias Templ on 11 February 2020, 16:20:02 UTC, committed by cran-robot on 11 February 2020, 16:20:02 UTC
1 parent 775c7e1
Raw File
imputeBDLs.Rd
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/imputeBDLs.R
\name{imputeBDLs}
\alias{imputeBDLs}
\alias{print.replaced}
\alias{checkData}
\alias{adjustImputed}
\title{EM-based replacement of rounded zeros in compositional data}
\usage{
imputeBDLs(
  x,
  maxit = 10,
  eps = 0.1,
  method = "subPLS",
  dl = rep(0.05, ncol(x)),
  variation = TRUE,
  nPred = NULL,
  nComp = "boot",
  bruteforce = FALSE,
  noisemethod = "residuals",
  noise = FALSE,
  R = 10,
  correction = "normal",
  verbose = FALSE,
  test = FALSE
)

adjustImputed(xImp, xOrig, wind)

checkData(x, dl)

\method{print}{replaced}(x, ...)
}
\arguments{
\item{x}{data.frame or matrix}

\item{maxit}{maximum number of iterations}

\item{eps}{convergency criteria}

\item{method}{either "lm", "lmrob" or "pls"}

\item{dl}{Detection limit for each variable. zero for variables with
variables that have no detection limit problems.}

\item{variation, }{if TRUE those predictors are chosen in each step, who's variation is lowest to the predictor.}

\item{nPred, }{if determined and variation equals TRUE, it fixes the number of predictors}

\item{nComp}{if determined, it fixes the number of pls components. If
\dQuote{boot}, the number of pls components are estimated using a
bootstraped cross validation approach.}

\item{bruteforce}{sets imputed values above the detection limit to the
detection limit. Replacement above the detection limit are only exeptionally
occur due to numerical instabilities. The default is FALSE!}

\item{noisemethod}{adding noise to imputed values. Experimental}

\item{noise}{TRUE to activate noise (experimental)}

\item{R}{number of bootstrap samples for the determination of pls
components. Only important for method \dQuote{pls}.}

\item{correction}{normal or density}

\item{verbose}{additional print output during calculations.}

\item{test}{an internal test situation (this parameter will be deleted soon)}

\item{xImp}{imputed data set}

\item{xOrig}{original data set}

\item{wind}{index matrix of rounded zeros}

\item{...}{further arguments passed through the print function}
}
\value{
\item{x }{imputed data} \item{criteria }{change between last and
second last iteration} \item{iter }{number of iterations} \item{maxit
}{maximum number of iterations} \item{wind}{index of zeros}
\item{nComp}{number of components for method pls} \item{method}{chosen
method}
}
\description{
Parametric replacement of rounded zeros for compositional data using
classical and robust methods based on ilr coordinates with a special
choice of balances.
}
\details{
Statistical analysis of compositional data including zeros runs into
problems, because log-ratios cannot be applied.  Usually, rounded zeros are
considerer as missing not at random missing values.

The algorithm iteratively imputes parts with rounded zeros whereas in each
step (1) compositional data are expressed in pivot coordinates (2) tobit regression is
applied (3) the rounded zeros are replaced by the expected values (4) the
corresponding inverse ilr mapping is applied. After all parts are
imputed, the algorithm starts again until the imputations do not change.
}
\examples{

p <- 10
n <- 50
k <- 2
T <- matrix(rnorm(n*k), ncol=k)
B <- matrix(runif(p*k,-1,1),ncol=k)
X <- T \%*\% t(B)
E <-  matrix(rnorm(n*p, 0,0.1), ncol=p)
XE <- X + E
data <- data.frame(pivotCoordInv(XE))
col <- ncol(data)
row <- nrow(data)
DL <- matrix(rep(0),ncol=col,nrow=1)
for(j in seq(1,col,2))
{DL[j] <- quantile(data[,j],probs=0.06,na.rm=FALSE)}

for(j in 1:col)        
{data[data[,j]<DL[j],j] <- 0}
\dontrun{
# under dontrun because of long exectution time
imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="subPLS")
imp
imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="pls", variation = FALSE)
imp
imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="lm")
imp
imp <- imputeBDLs(data,dl=DL,maxit=10,eps=0.1,R=10,method="lmrob")
imp

data(mcad)
## generate rounded zeros artificially:
x <- mcad
x <- x[1:25, 2:ncol(x)]
dl <- apply(x, 2, quantile, 0.1)
for(i in seq(1, ncol(x), 2)){
  x[x[,i] < dl[i], i] <- 0
} 
ni <- sum(x==0, na.rm=TRUE) 
ni/(ncol(x)*nrow(x)) * 100
dl[seq(2, ncol(x), 2)] <- 0
replaced_lm <- imputeBDLs(x, dl=dl, eps=1, method="lm",  
  verbose=FALSE, R=50, variation=TRUE)$x
replaced_lmrob <- imputeBDLs(x, dl=dl, eps=1, method="lmrob",  
  verbose=FALSE, R=50, variation=TRUE)$x
replaced_plsfull <- imputeBDLs(x, dl=dl, eps=1, 
  method="pls", verbose=FALSE, R=50, 
  variation=FALSE)$x 
}



}
\references{
Templ, M., Hron, K., Filzmoser, P., Gardlo, A. (2016). 
Imputation of rounded zeros for high-dimensional compositional data. 
\emph{Chemometrics and Intelligent Laboratory Systems}, 155, 183-190.

Chen, J., Zhang, X., Hron, K., Templ, M., Li, S. (2018). 
Regression imputation with Q-mode clustering for rounded zero replacement in high-dimensional compositional data. 
\emph{Journal of Applied Statistics}, 45 (11), 2067-2080.
}
\seealso{
\code{\link{imputeBDLs}}
}
\author{
Matthias Templ, method subPLS from Jiajia Chen
}
\keyword{manip}
\keyword{multivariate}
back to top