impKNNa.R
```````impKNNa` <-
function(x, method="knn", k=3,
metric="Aitchison", agg="median", primitive=FALSE, normknn=TRUE, das=FALSE, adj="median"){
### Coda Version 2!
### Nearest Neighbor imputation algorithm of Missing values
### based on distance matrices.
###
### Program by Matthias Templ, Oktober 2008
### R Version: 2.7.1
###

# TODO: help file mit adj ergaenzen

x <- as.matrix(x)
class(x) <- "matrix"

N <- dim(x)[1]
P <- dim(x)[2]

if(any(rowSums(is.na(x)) == P)) stop("One or more observations constist of only NA's")

xOrig <- xmiss <- x

w <- is.na(x)
w2 <- !is.na(x)
if(metric=="Euclidean" & primitive==FALSE){
findknn <- function(x, i, j){
## find knn
m1 <- which(!is.na(x[i,]))
mi <- which(!is.na(x[,j,drop=FALSE]) & !is.na(rowSums(x[,m1,drop=FALSE])) )
d <- rowSums(t((t(x[mi,m1,drop=FALSE]) - x[i,m1])^2))
names(d) <- mi
as.numeric(names(which(d <= quantile(d, k/N))))
}
}
if(metric=="Euclidean" & primitive==TRUE){
findknn <- function(x, i, j){
## find knn
m1 <- which(!is.na(x[i,]))
mi <- which(!is.na(x[,j,drop=FALSE]) )
d <- rowSums(t((t(x[mi,m1,drop=FALSE]) - x[i,m1])^2),na.rm=TRUE)
names(d) <- mi
as.numeric(names(which(d <= quantile(d, k/N))))
}
}

if(metric=="Aitchison" & primitive==FALSE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) & c(!is.na(rowSums(x[,m1,drop=FALSE]))))
xclr <- cenLR(rbind(x[mi, m1, drop=FALSE], x[i, m1]))\$x.clr

d <- rowSums(t(abs(t(xclr[-nrow(xclr),]) - c(xclr[nrow(xclr),]))))
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
### new change for zeros paper:
#			print(is.numeric(xclr))
#if(is.numeric(xclr)) xclr <- matrix(xclr, ncol=length(xclr))
#d <- rowSums(t(abs(t(xclr[-nrow(xclr),,drop=FALSE]) - c(xclr[nrow(xclr),,drop=FALSE]))), na.rm=TRUE)
#names(d) <- mi
#print(d)
#wA <- which(d <= quantile(d, k/length(d)))
### end new change
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==TRUE & das == TRUE){
findknn <- function(x, i, j){
m1 <-  which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) )
da <- function(x,y){
d <- 0
p <- length(x)
for(i in 1:(p-1)){
for(j in (i+1):p){
d <- d + (log(x[i]/x[j]) - log(y[i]/y[j]))^2
}
}
d=d/p
d
}
ref <- x[i,m1]
d <- apply(x[mi, m1, drop=FALSE], 1, function(z){da(x=z, y=ref)})
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}

if(metric=="Aitchison" & primitive==FALSE & das == TRUE){
findknn <- function(x, i, j){
m1 <-  which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) & c(!is.na(rowSums(x[,m1,drop=FALSE]))))
da <- function(x,y){
d <- 0
p <- length(x)
for(i in 1:(p-1)){
for(j in (i+1):p){
d <- d + (log(x[i]/x[j]) - log(y[i]/y[j]))^2
}
}
d=d/p
d
}
ref <- x[i,m1]
d <- apply(x[mi, m1, drop=FALSE], 1, function(z){da(x=z, y=ref)})
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==TRUE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
a <- c(!is.na(x[,j,drop=FALSE]))
b <- ifelse(rowSums(is.na(x[,-j])) == 0, TRUE, FALSE)
mi <- which(a & b)
xclr <- cenLR(rbind(x[mi, m1, drop=FALSE], x[i, m1]))\$x.clr
d <- rowSums(t(abs(t(xclr[-nrow(xclr),]) - c(xclr[nrow(xclr),]))), na.rm=TRUE)
#d[d == 0] <- NA   ## dirty
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d), na.rm=TRUE))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}

if(metric=="Euclidean"){
imp <- function(x, i, j){
## workhorse: do the imputation
knn <- findknn(x, i, j)
get(agg)(x[knn,j], na.rm=TRUE) #median
}
}

if(metric=="Aitchison" & normknn == FALSE){
imp <- function(x, i, j){
## workhorse: do the imputation
knn <- findknn(x, i, j)
### median, normalization:
#fac <- knn\$xclriSum/knn\$medKnnSum
#median(x[knn\$knn, j])#*fac  # fac anders berechnen
medSum <- get(adj)(apply(x[knn\$knn, knn\$m1, drop=FALSE], 2, function(x,...){get(agg)(x, na.rm=TRUE)} ), na.rm=TRUE)
xSum <- get(adj)(x[i, knn\$m1, drop=FALSE], na.rm=TRUE)
fac <- xSum/medSum
get(agg)(x[knn\$knn, j], na.rm=TRUE)*fac #median
}
}

if(metric=="Aitchison" & normknn == TRUE){
imp <- function(x, i, j){
knn <- findknn(x, i, j)
##normalization:
#xSum <- sum(x[i, knn\$m1, drop=FALSE], na.rm=TRUE)
xSum <- get(adj)(x[i, knn\$m1, drop=FALSE], na.rm=TRUE)
if( length(knn\$knn) == 1){
#knnSum <- sum(x[knn\$knn, knn\$m1], na.rm=TRUE)
} else{
#knnSum <- rowSums(x[knn\$knn, knn\$m1, drop=FALSE], na.rm=TRUE)
knnSum <- apply(x[knn\$knn, knn\$m1, drop=FALSE], 1, function(x,...){get(agg)(x, na.rm=TRUE)} )
}
fac <- xSum/knnSum
get(agg)(x[knn\$knn, j] * fac, na.rm=TRUE)
## statt Mean das geom. Mittel?
}
}

#if(metric == "Aitchison" & ratios == TRUE){
#    knn <- findknn(x, i, j)
#    p <- ncol(x)
#    y <- apply(apply(x[knn\$knn, , drop = FALSE], 1, function(x){x[2:p]/x[1]}),1,median)
#    x[i,j] <- 1
#    x[i,-j] <- y
#}

#if(metric=="Aitchison" & unweighted == TRUE){
#  imp <- function(x, i, j){
#    knn <- findknn(x, i, j)
#    x[i,j] <- get(agg)(x[knn\$knn, j])
#  }
#}

for(i in 1:N){
for(j in 1:P){
if(is.na(x[i,j])){
x[i,j] <- imp(xmiss, i, j)
}
}
}

w <- is.na(xOrig)
colnames(x) <- colnames(xOrig)
res <- list(xOrig=xOrig, xImp=x, criteria=NULL, iter=NULL, w=length(which(w)), wind=w, metric=metric)
class(res) <- "imp"
res

}

``````