https://github.com/cran/robCompositions
Tip revision: 54cbcf1d6d3ecbca1153e8f00a804e837237f09d authored by Matthias Templ on 07 February 2014, 00:00:00 UTC
version 1.7.0
version 1.7.0
Tip revision: 54cbcf1
impRZilr.R
`impRZilr` <-
function(x, maxit=10, eps=0.1, method="pls",
dl=rep(0.05, ncol(x)), nComp = "boot",
bruteforce=FALSE, noisemethod="residuals",
noise=FALSE, R=10,
verbose=FALSE){
if( is.vector(x) ) stop("x must be a matrix or data frame")
## check if only numeric variables are in x:
cl <- lapply(x, class)
if(!all(cl %in% "numeric")) stop("some of your variables are not of class numeric.")
stopifnot((method %in% c("lm", "MM", "pls")))
if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
if(method=="pls" & ncol(x)<5) stop("too less variables/parts for method pls")
if(!(nComp[1] %in% c("boot","cv"))){
if(length(nComp) != ncol(x)) stop("nComp must be numeric of length ncol(x) or boot or cv")
}
#################
## store rowSums
rs <- rowSums(x, na.rm=TRUE)
#################
## zeros to NA:
# check if values are in (0, dl[i]):
check <- logical(ncol(x))
for(i in 1:ncol(x)){
check[i] <- any(x[,i] < dl[i] & x[,i] != 0)
x[x[,i] < dl[i],i] <- 0
}
if(any(check)){warning("values below detection limit have been set to zero and will be imputed")}
check2 <- any(x < 0)
if(check2){warning("values below 0 set have been set to zero and will be imputed")}
x[x == 0] <- NA
x[x < 0] <- NA
################
## sort variables of x based on
## decreasing number of missings in the variables
cn <- colnames(x)
wcol <- - abs(apply(x, 2, function(x) sum(is.na(x))))
o <- order(wcol)
x <- x[,o]
if(verbose) cat("variables with decreasing number of missings:", o)
#################
## index of missings / non-missings
w <- is.na(x)
wn <- !is.na(x)
## lines with RZ:
# w2 <- apply(x, 1, function(x){ sum(is.na(x)) })
# indNA <- apply(x, 2, function(x){any(is.na(x))})
#################
# wcol <- apply(x, 2, function(x) length(which(is.na(x))))
# indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
# number of RZ in each sorted variable
nwcol <- apply(x, 2, function(x) length(which(is.na(x))))
# indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
## save orig data for later use:
xOrig <- x
################
## initialisation
# cols with RZ's:
indNA <- apply(x, 2, function(x){any(is.na(x))})
# initialize RZ's with 2/3 dl per column
for(i in which(indNA)){
x[w[,i],i] <- dl[i]*2/3 #*runif(sum(ind),1/3,2/3)
}
################
n <- nrow(x)
d <- ncol(x)
### create progress bar
pb <- txtProgressBar(min = 0, max = maxit, style = 3,
title=paste("maximal time for ", maxit, "iterations"))
ii <- 1
### start the iteration
if(verbose) cat("\n start the iteration:")
it <- 1; criteria <- 99999999
if(length(nComp) > 1) nC <- nComp else nC <- integer(length(which(indNA)))
while(it <= maxit & criteria >= eps){
if(verbose) cat("\n iteration", it, "; criteria =", criteria)
## for criteria used:
xold <- x
## inner loop:
for(i in which(indNA)){
if(verbose) cat("\n replacement on part", i)
## ensure that imputed values are not too close to zero:
x[x < 2*.Machine$double.eps] <- 2*.Machine$double.eps
## transformation of the detection limit:
phi <- -isomLR(cbind(rep(dl[i], n), x[,-i,drop=FALSE]))[,1]
## transformation of the data, variable i on first column:
xilr <- data.frame(-isomLR(cbind(x[,i,drop=FALSE], x[,-i,drop=FALSE])))
## ensure that first variable is fixed:
# c1 <- colnames(xilr)[1]
# colnames(xilr)[1] <- "V1"
## response:
response <- as.matrix(xilr[,1,drop=FALSE])
## predictors (everything except response):
predictors <- as.matrix(xilr[,-1,drop=FALSE])
## fit and prediction:
if(method=="lm"){
reg1 <- lm(response ~ predictors)
yhat <- predict(reg1, new.data=data.frame(predictors))
} else if(method=="MM"){
reg1 <- rlm(response ~ predictors, method="MM",maxit = 100)#rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
yhat <- predict(reg1, new.data=data.frame(predictors))
} else if(method=="pls"){
if(it == 1 & nComp[1] =="boot"){
## evaluate ncomp in the first run:
nC[i] <- bootnComp(predictors,response, R, plotting=TRUE)$res
# nC[i] <- bootnComp(xilr[,!(colnames(xilr) == "V1"),drop=FALSE],y=xilr[,"V1"], R, plotting=TRUE)$res2
}
if(it == 1 & nComp[1] == "cv"){
## evaluate ncomp in the first run:
stop("implement method cv")
nC[i] <- mvr(as.matrix(response) ~ as.matrix(predictors),
method="simpls")
}
if(verbose) cat(" ; ncomp:",nC[i])
reg1 <- mvr(as.matrix(response) ~ as.matrix(predictors),
ncomp=nC[i], method="simpls")
yhat <- predict(reg1, new.data=data.frame(predictors),
ncomp=nC[i])
}
# s <- sqrt(sum(reg1$res^2)/abs(nrow(xilr)-ncol(xilr))) ## quick and dirty: abs()
s <- sqrt(sum(reg1$res^2)/nrow(xilr))
ex <- (phi - yhat)/s
if(any(is.na(yhat)) | any(yhat == "NaN")) stop("here NA - yhat")
if(any(is.na(phi)) | any(phi == "NaN")) stop("here NA - phi")
if(any(is.na(ex)) | any(ex == "NaN")) stop("here NA - ex")
yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
yhat[w[, i]])
if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# check if we are under the DL:
# yhat2seltest <<- yhat2sel
# phitest <<- phi[w[,i]]
if(any(yhat2sel >= phi[w[, i]])){
yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
}
xilr[w[, i], 1] <- yhat2sel
xinv <- isomLRinv(-xilr)
## reordering to previous order
if(i %in% 2:(d-1)){
xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
}
if(i == d){
xinv <- cbind(xinv[,2:d], xinv[,1])
}
x <- adjust2(xinv, xOrig, w)
# x <- adjust3(xinv, xOrig, w2)
# ## quick and dirty:
# x[!w] <- xOrig[!w]
}
setTxtProgressBar(pb, ii); ii <- ii + 1
it <- it + 1
criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
if(verbose & criteria != 0) cat("\n iteration", it, "; criteria =", criteria)
}
#### add random error ###
if(noise){
for(i in which(indNA)){
if(verbose) cat("\n add noise on variable", i)
# add error terms
inderr <- w[,i]
if(noisemethod == "residuals") {
error <- sample(residuals( reg1 )[inderr],
size=nwcol[i], replace=TRUE)
reg1$res[inderr] <- error
} else {
mu <- median(residuals( reg1 )[inderr])
sigma <- mad(residuals( reg1 )[inderr])
error <- rnorm(nwcol[i], mean=mu, sd=sigma)
reg1$res[inderr] <- error
}
# return realizations
yhat[inderr] <- yhat[inderr] + error
s <- sqrt(sum(reg1$res^2)/nrow(xilr)) ## quick and dirty: abs()
ex <- (phi - yhat)/s
yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
yhat[w[, i]])
if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# check if we are under the DL:
if(any(yhat2sel >= phi[w[, i]])){
yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
}
xilr[w[, i], 1] <- yhat2sel
xinv <- isomLRinv(-xilr)
## reordering of xOrig
if(i %in% 2:(d-1)){
xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
}
if(i == d){
xinv <- cbind(xinv[,2:d], xinv[,1])
}
# x <- adjust2(xinv, xOrig, w)
# ## quick and dirty:
# x[!w] <- xOrig[!w]
}
}
### end add random error ###
# x <- adjust3(x, xOrig, w)
if(!all.equal(x[!w], xOrig[!w])) stop("adjust problems - revise algorithm")
x <- x[,order(o)] ## checked: reordering is OK!
colnames(x) <- cn
# ## recover abs values with rs
# xtest <<- x
# w <<- w
# x_0 <- x
# x_0[w] <- 0
# rs_imp <- rowSums(x_0)
# fac <- rs/rs_imp
# for(i in 1:ncol(x)){
# x[,i] <- x[,i] * fac[i]
# }
# # quick and dirty: (preserve absolute values)
# x[!w] <- xorig[!w]
res <- list(x=x, criteria=criteria, iter=it,
maxit=maxit, wind=w, nComp=nC, method=method)
class(res) <- "replaced"
invisible(res)
}
cvnComp <- function(X,y, R=99, plotting=FALSE){
rr <- mvr(as.matrix(y) ~ as.matrix(X),
method="simpls", validation="CV")$validation$PRESS
return(which.max(as.numeric(rr)))
}
bootnComp <- function(X,y, R=99, plotting=FALSE){
ind <- 1:nrow(X)
d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
for(i in 1:R){
bootind <- sample(ind, replace=TRUE)
## "paired" bootstrap sample:
ds <- cbind(X[bootind,], as.numeric(y[bootind]))
colnames(ds)[ncol(ds)] <- "V1"
## regression on bootstrap sample, in validation
## the predictions and so called PRESS values are then stored:
reg1 <- mvr(V1~., data=data.frame(ds), method="simpls", validation="none")#, validation="CV")
## prediction error criteria is wrong
## (even the results looks fine), since we
## compare original values of response (y) from predictions
## based on bootstrap samples:
# d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3, function(x) sum(((y - x)^2)) ) )
## better? To use the coefficients from reg1 and predict
## based on original data --> prediction error:
ppp <- predict(reg1, X)
d[1:reg1$ncomp,i] <- as.numeric(apply(ppp, 3, function(x) mean(abs(y - x)) ))
}
d <- na.omit(d)
sdev <- apply(d, 1, quantile, probs=0.5, na.rm=TRUE)
sdev2 <- apply(d, 1, quantile, probs=0.25, na.rm=TRUE)
sdevs <- sdev -sdev2
means <- apply(d, 1, median, na.rm=TRUE)
sdev3 <- apply(d, 1, mad)
mi <- which.min(means)
threshold <- means + 2*sdev3
res <- which.min(!(means < threshold[mi]) )
minsd <- means - sdevs > means[mi]
check <- means
check[!minsd] <- 999999999999999
if(plotting) plot(means, type="l")
res2 <- which.min(check)
list(res=res, res2=NULL)
}
bootnCompHD <- function(X,y, R=99, plotting=FALSE){
ind <- 1:nrow(X)
d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
for(i in 1:R){
bootind <- sample(ind)
XX <- X
yy <- y
ds <- cbind(X[bootind,], as.numeric(y[bootind]))
colnames(ds)[ncol(ds)] <- "V1"
reg1 <- mvr(V1~., data=data.frame(ds), method="simpls", validation="CV")
d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3, function(x) sum(((y - x)^2)) ) )
}
d <- na.omit(d)
sdev <- apply(d, 1, mad, na.rm=TRUE)
means <- apply(d, 1, median, na.rm=TRUE)
mi <- which.min(means)
if(plotting) plot(means, type="l", col="blue", ylab="squared total prediction error", xlab="number of components")
themean <- mean(means)
thesd <- sd(means)
abovethreshold <- themean - sdev > means
check <- means
check[!abovethreshold] <- 9999999999
res <- which.min(check)
# minsd <- means - sdev > means[mi]
# check <- means
# check[!minsd] <- 99999999
# res <- which.min(check)
if(plotting){
abline(v=res, lwd=3)
abline(h=mi, col="red")
# abline(h=means-sdev, lty=3)
}
list(res=res, mean=means)
}
#checkIfValuesUnderDL <- function(x, dl, wind){
# check <- logical(ncol(x))
# for(i in 1:ncol(x)){
# check[i] <- any(x[,i] > dl[i])
# }
# return(check)
#}
## test adjust2:
adjust2 <- function (xImp, xOrig, wind){
xneu = xImp
s1 <- rowSums(xOrig, na.rm = TRUE)
for (i in 1:nrow(xImp)) {
if(any(wind[i,])) s <- sum(xImp[i, !wind[i, ]]) else s <- 1
if(any(wind[i,])) s2 <- sum(xImp[i, wind[i, ]]) else s2 <- 0
fac <- s/(s + s2)
s1[i] <- s1[i]/fac
}
impS <- s1/rowSums(xImp)
for (i in 1:ncol(xImp)) {
xneu[, i] <- xImp[, i] * impS
}
xImp <- xneu
return(xImp)
}
#x <- matrix(1:100, ncol=5)
#x <- x[,c(2,1,3:5)]
#colnames(x) <- c("eins","zwei","drei","vier","fuenf")
#x[x< 10] <- 0
#x[11,3] <- 0
#x[20,3] <- 0
#dl <- rep(10,5)
#
#set.seed(123)
#x <- xorig <- constSum(genVarsSmall(mvrnorm(20, mu=rep(1,3), Sigma=diag(3)), 15)[,1:5],100)
#x <- xorig <- x[order(x[,1]), ]
#dl <- apply(x, 2, quantile, 0.1)
#dl[1] <- 7.32
#for(i in 1:5){
# x[x[,i] < dl[i], i] <- 0
#}
#colnames(x) <- c("eins","zwei","drei","vier","fuenf")
#imp <- impRZilr(x, dl=dl, method="pls", eps=0.00001, maxit=100)
#imp$x
## ausfuerhen des codes der fkt
#y <- x
#x <- xOrig
#adjust2(xinv, xOrig, w) ## OK
#
#
#
#y <- x+rnorm(100, 0, 0.05)
#y[1,3] <- 8
#a <- adjust2(y, x, wind)
#x[1,1]/x[2,2]
#a[1,1]/a[2,2]
#res <- impRZilr(x)
adjust3 <- function(xImp, xOrig, wind){
xOrigSum <- rowSums(xOrig)
# sum imputed without former zeros:
xImpSum <- numeric(ncol(xOrig))
for(i in 1:ncol(xOrig)){
xImpSum[i] <- sum(xImp[i,!wind[i,]])
fac <- xOrigSum[i] / xImpSum[i]
xImp[i,wind[i,]] <- xImp[i,wind[i,]] * fac
}
xImp[!wind] <- xOrig[!wind]
xImp
}
#
### switch function to automatically select methods
#getM <- function(xReg, ndata, type, index,mixedTF,mixedConstant,factors,step,robust,noise,noise.factor=1,force=FALSE, robMethod="MM") {
# switch(type,
# numeric = useLM(xReg, ndata, index, mixedTF,mixedConstant,factors,step,robust,noise,noise.factor,force,robMethod),
# factor = useMN(xReg, ndata, index,factors,step,robust),
# bin = useB(xReg, ndata, index,factors,step,robust),
# count = useGLMcount(xReg, ndata, index, factors, step, robust)
# )
#}
#
#
#
#f <- function(){
## x <- constSum(x)
## dl <- dl/sum(dl)
# ## initialisation:
## x[is.na(x)] <- 0.001
# for(i in 1:length(dl)){
# ind <- is.na(x[,i])
# #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# }
#
## x <- constSum(x)
#
# ## parameters:
# it=0
# criteria <- 10000000
# error <- rep(0, ncol(x))
# nComp <- normalerror <- numeric(ncol(x))
# if(noisemethod=="residuals") residuals <- matrix(,ncol=ncol(x), nrow=nrow(x))
# nComp[nComp==0] <- NA
# criteria <- 1e+07
# sigma <- mu <- rep(0, ncol(x))
#
# ###########################################
# ### start the iteration
# if(verbose) cat("\n start the iteration:")
# while(it <= maxit & criteria >= eps){
# xold <- x
# it=it+1
# for(i in which(indNA)){
# if(verbose) cat("\n column", i)
# ## change the first column with that one with the highest amount of NAs
# ## in the step
# if(wcol[indM[i]] > 0){
# xNA=x[,indM[i]]
# x1=x[,1]
# x[,1]=xNA
# x[,indM[i]]=x1
#
# if(method == "roundedZero"){
# xilr <- isomLR(x)
# phi <- isomLR(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# ## --> x hat sich geaendert aber dl nicht.
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = lm(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
# if (method == "pls") {
# xilr = isomLR(x)
# ttt <- cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE])
# phi <- isomLR(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1]
## if(verbose) cat("\n phi", phi, "in iteration", it)
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# if(it == 1){ ## evaluate ncomp.
# test <- xilr2[,!(colnames(xilr2) == "V1")]
# testy <- xilr2[,"V1"]
# X <- xilr2[,!(colnames(xilr2) == "V1")]
# y <- xilr2[,"V1"]
# nComp[i] <- bootnComp(xilr2[,!(colnames(xilr2) == "V1")],y=xilr2[,"V1"], R)
# }
# reg1 <- mvr(V1~.,ncomp=nComp[i], data = xilr2, method="simpls")
# yhat2 <- as.numeric(predict(reg1, new.data=xilr2[,-i], ncomp=nComp[i]) )
## if(noisemethod=="residuals") residuals[,i] <- reg1$residuals[,,nComp[i]]
## if(noisemethod=="normal"){
## mu[i] <- median(residuals(reg1)[,,nComp[i]])
## sigma[i] <- mad(residuals(reg1)[,,nComp[i]]) * noiseeffect
## }
# colnames(xilr2)[1] <- c1
## fit=reg1$fitted.values[,,nComp[i]]
# s <- sqrt(sum(reg1$residuals[,,nComp[i]]^2)/abs(nrow(xilr2)-ncol(xilr2)))
# ex <- as.numeric((phi - yhat2)/s )
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of NaN or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
## if(verbose) cat("\n yhat2sel at iteration", it, "on column", i ,"is", yhat2sel)
# xilr2[w[, indM[i]], 1] <- yhat2sel
#
#
# }
# if(method == "roundedZeroRobust"){
# xilr <- isomLR(x)
# x[x < .Machine$double.eps] <- 0.00000000001 ## TODO: better solution
# phi <- isomLR(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
## reg1 = lmrob(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- reg1$s
# #PF# s <- IQR(reg1$resid)/1.349
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
#
# xilr <- xilr2
# x <- isomLRinv(xilr)
#
# ## return the order of columns:
# xNA=x[,1]
# x1=x[,indM[i]]
# x[,1]=x1
# x[,indM[i]]=xNA
# x <- adjust2(x, xcheck, w)
# print(x[1:2,1:2])
# if(verbose) cat("\n iteration", it, "column", i, "value", x[2,1])
# }
#
#
# }
#
#
# criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# colnames(x) <- colnames(xcheck)
#
# }
#
# res <- list(xOrig=xcheck, xImp=x[,order(o)], criteria=criteria, iter=it, nComp=nComp,
# maxit=maxit, w=length(which(w)), wind=w)
# class(res) <- "imp"
## res <- adjust(res)
# invisible(res)
#}
#
#
#
##################################################################################
#
#`impRZilr` <-
#function(x, maxit=10, eps=0.1, method="roundedZero",
# dl=rep(0.05, ncol(x)), bruteforce=FALSE, noise = TRUE, noisemethod="residuals", noiseeffect=1, R=10,
# verbose=FALSE){
#
#
# if( is.vector(x) ) stop("x must be a matrix or data frame")
# stopifnot((method %in% c("ltsReg", "ltsReg2", "classical", "lm", "roundedZero","roundedZeroRobust","pls")))
# if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
#
# #################
# ## zeros to NA:
# x[x==0] <- NA
#
# #################
# ## index of missings / non-missings
# w <- is.na(x)
# wn <- !is.na(x)
# w2 <- apply(x, 1, function(x){
# length(which(is.na(x)))
# })
# indNA <- apply(x, 2, function(x){any(is.na(x))})
#
# #################
# ## sort the columns of the data according to the amount of missings in the variables
# wcol <- apply(x, 2, function(x) length(which(is.na(x))))
# indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
# cn <- colnames(x)
# xcheck <- x
#
# ################
# ## detection limit in ilr-space
## for(i in which(indNA)){
##
## }
#
#
## x <- constSum(x)
## dl <- dl/sum(dl)
# ## initialisation:
## x[is.na(x)] <- 0.001
# for(i in 1:length(dl)){
# ind <- is.na(x[,i])
# #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# }
#
## x <- constSum(x)
#
# ## parameters:
# it=0
# criteria <- 10000000
# error <- rep(0, ncol(x))
# nComp <- normalerror <- numeric(ncol(x))
# if(noisemethod=="residuals") residuals <- matrix(,ncol=ncol(x), nrow=nrow(x))
# nComp[nComp==0] <- NA
# criteria <- 1e+07
# sigma <- mu <- rep(0, ncol(x))
#
# ###########################################
# ### start the iteration
# if(verbose) cat("\n start the iteration:")
# while(it <= maxit & criteria >= eps){
# xold <- x
# it=it+1
# for(i in which(indNA)){
# if(verbose) cat("\n column", i)
# ## change the first column with that one with the highest amount of NAs
# ## in the step
# if(wcol[indM[i]] > 0){
# xNA=x[,indM[i]]
# x1=x[,1]
# x[,1]=xNA
# x[,indM[i]]=x1
#
# if(method == "roundedZero"){
# xilr <- isomLR(x)
# phi <- isomLR(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# ## --> x hat sich geaendert aber dl nicht.
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = lm(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
# if (method == "pls") {
# xilr = isomLR(x)
# ttt <- cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE])
# phi <- isomLR(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1]
## if(verbose) cat("\n phi", phi, "in iteration", it)
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# if(it == 1){ ## evaluate ncomp.
# test <- xilr2[,!(colnames(xilr2) == "V1")]
# testy <- xilr2[,"V1"]
# X <- xilr2[,!(colnames(xilr2) == "V1")]
# y <- xilr2[,"V1"]
# nComp[i] <- bootnComp(xilr2[,!(colnames(xilr2) == "V1")],y=xilr2[,"V1"], R)
# }
# reg1 <- mvr(V1~.,ncomp=nComp[i], data = xilr2, method="simpls")
# yhat2 <- as.numeric(predict(reg1, new.data=xilr2[,-i], ncomp=nComp[i]) )
## if(noisemethod=="residuals") residuals[,i] <- reg1$residuals[,,nComp[i]]
## if(noisemethod=="normal"){
## mu[i] <- median(residuals(reg1)[,,nComp[i]])
## sigma[i] <- mad(residuals(reg1)[,,nComp[i]]) * noiseeffect
## }
# colnames(xilr2)[1] <- c1
## fit=reg1$fitted.values[,,nComp[i]]
# s <- sqrt(sum(reg1$residuals[,,nComp[i]]^2)/abs(nrow(xilr2)-ncol(xilr2)))
# ex <- as.numeric((phi - yhat2)/s )
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of NaN or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
## if(verbose) cat("\n yhat2sel at iteration", it, "on column", i ,"is", yhat2sel)
# xilr2[w[, indM[i]], 1] <- yhat2sel
#
#
# }
# if(method == "roundedZeroRobust"){
# xilr <- isomLR(x)
# x[x < .Machine$double.eps] <- 0.00000000001 ## TODO: better solution
# phi <- isomLR(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]))[,1] # TODO: phi auserhalb der Schleife!
# xilr2 <- data.frame(xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
## reg1 = lmrob(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# s <- reg1$s
# #PF# s <- IQR(reg1$resid)/1.349
# ex <- (phi - yhat2)/s
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# }
# }
#
# xilr <- xilr2
# x <- isomLRinv(xilr)
#
# ## return the order of columns:
# xNA=x[,1]
# x1=x[,indM[i]]
# x[,1]=x1
# x[,indM[i]]=xNA
# x <- adjust2(x, xcheck, w)
# print(x[1:2,1:2])
# if(verbose) cat("\n iteration", it, "column", i, "value", x[2,1])
# }
#
#
# }
#
#
# criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# colnames(x) <- colnames(xcheck)
#
# }
#
# res <- list(xOrig=xcheck, xImp=x, criteria=criteria, iter=it, nComp=nComp,
# maxit=maxit, w=length(which(w)), wind=w)
# class(res) <- "imp"
## res <- adjust(res)
# invisible(res)
#}
# `impRZilr` <-
# function(x, maxit=10, eps=0.1, method="roundedZero",
# dl=rep(0.05, ncol(x)), bruteforce=FALSE){
#
# `ilrM` <-
# function(x, info=TRUE){
# x.ilr=matrix(NA,nrow=nrow(x),ncol=ncol(x)-1)
# D=ncol(x)
# for (i in 1:ncol(x.ilr)){
# x.ilr[,i]=sqrt((D-i)/(D-i+1))*log(((apply(as.matrix(x[,(i+1):D,drop=FALSE]),1,prod))^(1/(D-i)))/(x[,i]))
# }
# # invisible(-x.ilr)
# if(info) {res <- list(xilr=-x.ilr,
# xOrig=x)
# class(res) <- "ilrTransform"
# } else {
# res <- -x.ilr
# }
# res
# }
# `invilrM` <-
# function(x.ilr){
# if(class(x.ilr) =="ilrTransform" ){
# fac <- rowSums(x.ilr$xOrig)
# x.ilr <- x.ilr$xilr
# y <- matrix(0,nrow=nrow(x.ilr),ncol=ncol(x.ilr)+1)
# D=ncol(x.ilr)+1
# y[,1]=-sqrt((D-1)/D)*x.ilr[,1]
# for (i in 2:ncol(y)){
# for (j in 1:(i-1)){
# y[,i]=y[,i]+x.ilr[,j]/sqrt((D-j+1)*(D-j))
# }
# }
# for (i in 2:(ncol(y)-1)){
# y[,i]=y[,i]-sqrt((D-i)/(D-i+1))*x.ilr[,i]
# }
# yexp=exp(-y)
# x.back=yexp/apply(yexp,1,sum) * fac # * rowSums(derOriginaldaten)
# invisible(x.back)
# } else {
# y=matrix(0,nrow=nrow(x.ilr),ncol=ncol(x.ilr)+1)
# D=ncol(x.ilr)+1
# y[,1]=-sqrt((D-1)/D)*x.ilr[,1]
# for (i in 2:ncol(y)){
# for (j in 1:(i-1)){
# y[,i]=y[,i]+x.ilr[,j]/sqrt((D-j+1)*(D-j))
# }
# }
# for (i in 2:(ncol(y)-1)){
# y[,i]=y[,i]-sqrt((D-i)/(D-i+1))*x.ilr[,i]
# }
# yexp=exp(-y)
# x.back=yexp/apply(yexp,1,sum) # * rowSums(derOriginaldaten)
# invisible(x.back)
# #return(yexp)
# }
# x.back
# }
#
#
#
# if( is.vector(x) ) stop("x must be a matrix or data frame")
# stopifnot((method %in% c("ltsReg", "ltsReg2", "classical", "lm", "roundedZero","roundedZeroRobust")))
# if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
#
#
# ## zeros to NA:
# x[x==0] <- NA
#
# ##index of missings / non-missings
# w <- is.na(x)
# wn <- !is.na(x)
# w2 <- apply(x, 1, function(x){
# length(which(is.na(x)))
# })
#
#
# ##sort the columns of the data according to the amount of missings in the variables
# wcol <- apply(x,2,function(x) length(which(is.na(x))))
# indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
# cn <- colnames(x)
# xcheck <- x
#
# ## initialisation:
# # x[is.na(x)] <- 0.001
# for(i in 1:length(dl)){
# ind <- is.na(x[,i])
# #PF# if(length(ind) > 0) x[ind,i] <- dl[i]/3*2
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# }
#
# # x <- constSum(x)
#
# ## parameters:
# it=0
# criteria <- 10000000
# error <- rep(0, ncol(x))
#
# ###########################################
# ### start the iteration
# while(it <= maxit & criteria >= eps){
# xold <- x
# it=it+1
# for(i in 1:ncol(x)){
# ## change the first column with that one with the highest amount of NAs
# ## in the step
# if(wcol[indM[i]] > 0){
# xNA=x[,indM[i]]
# x1=x[,1]
# x[,1]=xNA
# x[,indM[i]]=x1
#
# if(method == "roundedZero"){
# xilr <- ilrM(x)
# phi <- ilrM(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]), info=FALSE)[,1] # TODO: phi auserhalb der Schleife!
# ## --> x hat sich geaendert aber dl nicht.
# xilr2 <- data.frame(xilr$xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = lm(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# # s <- sd(reg1$res, na.rm=TRUE)
# s <- sqrt(sum(reg1$res^2)/(nrow(xilr2)-ncol(xilr2)))
# ex <- (phi - yhat2)/s
# #####################################################
# # CHANGED PF:
# #PF# if(all(dnorm(ex) > 5 * .Machine$double.eps)) yhat2 <- yhat2 - s*dnorm(ex)/pnorm(ex)
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# #####################################################
# }
# }
# if(method == "roundedZeroRobust"){
# xilr <- ilrM(x)
# x[x < .Machine$double.eps] <- 0.00000000001 ## TODO: better solution
# phi <- ilrM(cbind(rep(dl[indM[i]], nrow(x)), x[,-1,drop=FALSE]), info=FALSE)[,1] # TODO: phi auserhalb der Schleife!
# xilr2 <- data.frame(xilr$xilr)
# c1 <- colnames(xilr2)[1]
# colnames(xilr2)[1] <- "V1"
# reg1 = rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
# # reg1 = lmrob(V1 ~ ., data=xilr2)
# yhat2 <- predict(reg1, new.data=xilr2[,-i])
# if(bruteforce){
# xilr2[w[, indM[i]], 1] <- ifelse(yhat2[w[, indM[i]]] <= phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2[w[, indM[i]]] )
# } else {
# # s <- mad(reg1$res, na.rm=TRUE)
# s <- reg1$s
# #PF# s <- IQR(reg1$resid)/1.349
# ex <- (phi - yhat2)/s
# #####################################################
# # CHANGED PF:
# #PF# if(all(dnorm(ex) > 5 * .Machine$double.eps)) yhat2 <- yhat2 - s*dnorm(ex)/pnorm(ex)
# yhat2sel <- ifelse(dnorm(ex[w[, indM[i]]]) > .Machine$double.eps,
# yhat2[w[, indM[i]]] - s*dnorm(ex[w[, indM[i]]])/pnorm(ex[w[, indM[i]]]),
# yhat2[w[, indM[i]]])
# if(any(is.na(yhat2)) || any(yhat2=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, indM[i]]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, indM[i]]], phi[w[, indM[i]]], yhat2sel)
# }
# xilr2[w[, indM[i]], 1] <- yhat2sel
# #####################################################
# }
# }
#
# xilr$xilr <- xilr2
# x=invilrM(xilr)
# ## return the order of columns:
# xNA=x[,1]
# x1=x[,indM[i]]
# x[,1]=x1
# x[,indM[i]]=xNA
# }
#
#
# }
#
#
# criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# colnames(x) <- colnames(xcheck)
#
# }
#
# res <- list(xOrig=xcheck, xImp=x, criteria=criteria, iter=it,
# maxit=maxit, w=length(which(w)), wind=w)
# class(res) <- "imp"
# invisible(res)
# }
#
#
# `impRZilr2` <-
# function(x, maxit=10, eps=0.1, method="pls",
# dl=rep(0.05, ncol(x)), nComp = NULL,
# bruteforce=FALSE, noisemethod="residuals", noise=TRUE, R=10,
# verbose=FALSE){
#
#
# if( is.vector(x) ) stop("x must be a matrix or data frame")
# stopifnot((method %in% c("lm", "MM", "pls")))
# if( length(dl) < ncol(x)) stop(paste("dl has to be a vector of ", ncol(x)))
# if(method=="pls" & ncol(x)<5) stop("too less variables/parts for method pls")
# if(!is.null(nComp)){
# pre <- TRUE
# if(length(nComp) != ncol(x)) stop("nComp mmmmmust be NULL or of length ncol(x)")
# } else pre <- FALSE
#
# #################
# ## zeros to NA:
# x[x==0] <- NA
#
# ################
# ## sort variables of x based on
# ## decreasing number of missings in the variables
# wcol <- - abs(apply(x, 2, function(x) sum(is.na(x))))
# o <- order(wcol)
# x <- x[,o]
# if(verbose) cat("variables with decreasing number of missings:", o)
#
#
# #################
# ## index of missings / non-missings
# w <- is.na(x)
# wn <- !is.na(x)
# w2 <- apply(x, 1, function(x){ sum(is.na(x)) })
# # indNA <- apply(x, 2, function(x){any(is.na(x))})
#
# #################
# ## sort the columns of the data according to the amount of missings in the variables
# wcol <- apply(x, 2, function(x) length(which(is.na(x))))
# indM <- sort(wcol, index.return=TRUE, decreasing=TRUE)$ix
# cn <- colnames(x)
# xcheck <- x
#
#
# ################
# ## initialisation
# indNA <- apply(x, 2, function(x){any(is.na(x))})
# for(i in 1:length(dl)){
# ind <- is.na(x[,i])
# if(length(ind) > 0) x[ind,i] <- dl[i]*runif(sum(ind),1/3,2/3)
# }
#
# xOrig <- x
#
# ################
# ## detection limit in ilr-space
# n <- nrow(x)
# d <- ncol(x)
# ### start the iteration
# if(verbose) cat("\n start the iteration:")
# it <- 1; criteria <- 99999999
#
# while(it <= maxit & criteria >= eps){
# if(verbose) cat("\n iteration", it, "; criteria =", criteria)
# xold <- x
# for(i in which(indNA)){
# if(verbose) cat("\n replacement on part", i)
# test <<- cbind(rep(dl[i], n), x[,-i,drop=FALSE])
# phi <- -isomLR(cbind(rep(dl[i], n), x[,-i,drop=FALSE]))[,1]
# # part <- cbind(x[,i,drop=FALSE], x[,-i,drop=FALSE])
# x[x < 2*.Machine$double.eps] <- 2*.Machine$double.eps
# xilr <- data.frame(-isomLR(cbind(x[,i,drop=FALSE], x[,-i,drop=FALSE])))
# c1 <- colnames(xilr)[1]
# colnames(xilr)[1] <- "V1"
# xilr <<- xilr
# response <- as.matrix(xilr[,1,drop=FALSE])
# predictors <- as.matrix(xilr[,-1,drop=FALSE])
# if(method=="lm"){
# reg1 <- lm(response ~ predictors)
# yhat <- predict(reg1, new.data=data.fram(predictors))
# } else if(method=="MM"){
# reg1 <- rlm(response ~ predictors, method="MM",maxit = 100)#rlm(V1 ~ ., data=xilr2, method="MM",maxit = 100)
# yhat <- predict(reg1, new.data=data.fram(predictors))
# } else if(method=="pls"){
# if(it == 1 & !pre){ ## evaluate ncomp.
# nComp[i] <- bootnComp(xilr[,!(colnames(xilr) == "V1"),drop=FALSE],y=xilr[,"V1"], R, plotting=TRUE)$res2
# }
# if(verbose) cat(" ; ncomp:",nComp[i])
# reg1 <- mvr(as.matrix(response) ~ as.matrix(predictors), ncomp=nComp[i], method="simpls")
# yhat <- predict(reg1, new.data=data.fram(predictors), ncomp=nComp[i])
# }
#
# # s <- sqrt(sum(reg1$res^2)/abs(nrow(xilr)-ncol(xilr))) ## quick and dirty: abs()
# s <- sqrt(sum(reg1$res^2)/nrow(xilr)) ## quick and dirty: abs()
# ex <- (phi - yhat)/s
# yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
# yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
# yhat[w[, i]])
# if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, i]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
# }
# xilr[w[, i], 1] <- yhat2sel
# xinv <- isomLRinv(-xilr)
# ## reordering of xOrig
# if(i %in% 2:(d-1)){
# xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
# }
# if(i == d){
# xinv <- cbind(xinv[,2:d], xinv[,1])
# }
#
# x <- adjust2(xinv, xOrig, w)
# }
#
# it <- it + 1
# criteria <- sum( ((xold - x)/x)^2, na.rm=TRUE) ## DIRTY: (na.rm=TRUE)
# if(verbose & criteria != 0) cat("\n iteration", it, "; criteria =", criteria)
# }
#
# #### add random error ###
# if(noise){
# for(i in which(indNA)){
# if(verbose) cat("\n add noise on variable", i)
#
# # add error terms
# inderr <<- w[,i]
# if(noisemethod == "residuals") {
# error <- sample(residuals( reg1 )[inderr],
# size=wcol[i], replace=TRUE)
# reg1$res[inderr] <- error
# } else {
# mu <- median(residuals( reg1 )[inderr])
# sigma <- mad(residuals( reg1 )[inderr])
# error <- rnorm(wcol[i], mean=mu, sd=sigma)
# reg1$res[inderr] <- error
# }
# # return realizations
# yhat[inderr] <- yhat[inderr] + error
#
#
# s <- sqrt(sum(reg1$res^2)/nrow(xilr)) ## quick and dirty: abs()
# ex <- (phi - yhat)/s
# yhat2sel <- ifelse(dnorm(ex[w[, i]]) > .Machine$double.eps,
# yhat[w[, i]] - s*dnorm(ex[w[, i]])/pnorm(ex[w[, i]]),
# yhat[w[, i]])
# if(any(is.na(yhat)) || any(yhat=="Inf")) stop("Problems in ilr because of infinite or NA estimates")
# # check if we are under the DL:
# if(any(yhat2sel >= phi[w[, i]])){
# yhat2sel <- ifelse(yhat2sel > phi[w[, i]], phi[w[, i]], yhat2sel)
# }
# xilr[w[, i], 1] <- yhat2sel
# xinv <- isomLRinv(-xilr)
# ## reordering of xOrig
# if(i %in% 2:(d-1)){
# xinv <- cbind(xinv[,2:i], xinv[,c(1,(i+1):d)])
# }
# if(i == d){
# xinv <- cbind(xinv[,2:d], xinv[,1])
# }
#
# x <- adjust2(xinv, xOrig, w)
# }
# }
# ### end add random error ###
#
# x <- x[,order(o)] ## checked: reordering is OK!
# colnames(x) <- colnames(xcheck)
# res <- list(x=x, criteria=criteria, iter=it,
# maxit=maxit, wind=w, nComp=nComp, method=method)
# class(res) <- "replaced"
# invisible(res)
# }
#
# bootnComp <- function(X,y, R=99, plotting=FLASE){
# ind <- 1:nrow(X)
# d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
# for(i in 1:R){
# bootind <- sample(ind)
# XX <- X
# yy <- y
# ds <- cbind(X[bootind,], as.numeric(y[bootind]))
# colnames(ds)[ncol(ds)] <- "V1"
# reg1 <- mvr(V1~., data=data.frame(ds), method="simpls", validation="CV")
# d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3, function(x) sum(((y - x)^2)) ) )
# }
# d <- na.omit(d)
# sdev <- apply(d, 1, sd, na.rm=TRUE)
# means <- apply(d, 1, mean, na.rm=TRUE)
# mi <- which.min(means)
# r <- round(ncol(X)/20)
# mi2 <- which.min(means[r:length(means)])+r-1
# minsd <- means - sdev > means[mi]
# check <- means
# check[!minsd] <- 99999999
# if(plotting) plot(means, type="l")
# res <- which.min(check)
# list(res=res, res2=mi2)
# }
#
#
# bootnCompHD <- function(X,y, R=99, plotting=FALSE){
# ind <- 1:nrow(X)
# d <- matrix(, ncol=R, nrow=nrow(X))#nrow(X))
# for(i in 1:R){
# bootind <- sample(ind)
# XX <- X
# yy <- y
# ds <- cbind(X[bootind,], as.numeric(y[bootind]))
# colnames(ds)[ncol(ds)] <- "V1"
# reg1 <- mvr(V1~., data=data.frame(ds), method="simpls", validation="CV")
# d[1:reg1$ncomp,i] <- as.numeric(apply(reg1$validation$pred, 3, function(x) sum(((y - x)^2)) ) )
# }
# d <- na.omit(d)
# sdev <- apply(d, 1, mad, na.rm=TRUE)
# means <- apply(d, 1, median, na.rm=TRUE)
# mi <- which.min(means)
# if(plotting) plot(means, type="l", col="blue", ylab="squared total prediction error", xlab="number of components")
# themean <- mean(means)
# thesd <- sd(means)
# abovethreshold <- themean - sdev > means
# check <- means
# check[!abovethreshold] <- 9999999999
# res <- which.min(check)
# # minsd <- means - sdev > means[mi]
# # check <- means
# # check[!minsd] <- 99999999
# # res <- which.min(check)
# if(plotting){
# abline(v=res, lwd=3)
# abline(h=mi, col="red")
# # abline(h=means-sdev, lty=3)
# }
# list(res=res, mean=means)
# }
#
#
#
# ## test adjust2:
#
# adjust2 <- function (xImp, xOrig, wind){
# xneu = xImp
# s1 <- rowSums(xOrig, na.rm = TRUE)
# for (i in 1:nrow(xImp)) {
# if(any(wind[i,])) s <- sum(xImp[i, !wind[i, ]]) else s <- 1
# if(any(wind[i,])) s2 <- sum(xImp[i, wind[i, ]]) else s2 <- 0
# fac <- s/(s + s2)
# s1[i] <- s1[i]/fac
# }
# impS <- s1/rowSums(xImp)
# for (i in 1:ncol(xImp)) {
# xneu[, i] <- xImp[, i] * impS
# }
# xImp <- xneu
# return(xImp)
# }