https://github.com/cran/robCompositions
Tip revision: 45c93a18fe3e83941bd865d89680d47d6b1d846e authored by Matthias Templ on 15 May 2012, 00:00:00 UTC
version 1.6.0
version 1.6.0
Tip revision: 45c93a1
impKNNa.R
`impKNNa` <-
function(x, method="knn", k=3,
metric="Aitchison", agg="median", primitive=FALSE, normknn=TRUE, das=FALSE, adj="median"){
### Coda Version 2!
### Nearest Neighbor imputation algorithm of Missing values
### based on distance matrices.
###
### Program by Matthias Templ, Oktober 2008
### R Version: 2.7.1
### Copyright : TU-Wien, 2008
###
# TODO: help file mit adj ergaenzen
x <- as.matrix(x)
class(x) <- "matrix"
N <- dim(x)[1]
P <- dim(x)[2]
if(any(rowSums(is.na(x)) == P)) stop("One or more observations constist of only NA's")
xOrig <- xmiss <- x
w <- is.na(x)
w2 <- !is.na(x)
if(metric=="Euclidean" & primitive==FALSE){
findknn <- function(x, i, j){
## find knn
m1 <- which(!is.na(x[i,]))
mi <- which(!is.na(x[,j,drop=FALSE]) & !is.na(rowSums(x[,m1,drop=FALSE])) )
d <- rowSums(t((t(x[mi,m1,drop=FALSE]) - x[i,m1])^2))
names(d) <- mi
as.numeric(names(which(d <= quantile(d, k/N))))
}
}
if(metric=="Euclidean" & primitive==TRUE){
findknn <- function(x, i, j){
## find knn
m1 <- which(!is.na(x[i,]))
mi <- which(!is.na(x[,j,drop=FALSE]) )
d <- rowSums(t((t(x[mi,m1,drop=FALSE]) - x[i,m1])^2),na.rm=TRUE)
names(d) <- mi
as.numeric(names(which(d <= quantile(d, k/N))))
}
}
if(metric=="Aitchison" & primitive==FALSE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) & c(!is.na(rowSums(x[,m1,drop=FALSE]))))
xclr <- clr(rbind(x[mi, m1, drop=FALSE], x[i, m1]))$x.clr
d <- rowSums(t(abs(t(xclr[-nrow(xclr),]) - c(xclr[nrow(xclr),]))))
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==TRUE & das == TRUE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) )
da <- function(x,y){
d <- 0
p <- length(x)
for(i in 1:(p-1)){
for(j in (i+1):p){
d <- d + (log(x[i]/x[j]) - log(y[i]/y[j]))^2
}
}
d=d/p
d
}
ref <- x[i,m1]
d <- apply(x[mi, m1, drop=FALSE], 1, function(z){da(x=z, y=ref)})
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==FALSE & das == TRUE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
mi <- which(c(!is.na(x[,j,drop=FALSE])) & c(!is.na(rowSums(x[,m1,drop=FALSE]))))
da <- function(x,y){
d <- 0
p <- length(x)
for(i in 1:(p-1)){
for(j in (i+1):p){
d <- d + (log(x[i]/x[j]) - log(y[i]/y[j]))^2
}
}
d=d/p
d
}
ref <- x[i,m1]
d <- apply(x[mi, m1, drop=FALSE], 1, function(z){da(x=z, y=ref)})
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d)))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Aitchison" & primitive==TRUE){
findknn <- function(x, i, j){
m1 <- which(!is.na(x[i,]))
a <- c(!is.na(x[,j,drop=FALSE]))
b <- ifelse(rowSums(is.na(x[,-j])) == 0, TRUE, FALSE)
mi <- which(a & b)
xclr <- clr(rbind(x[mi, m1, drop=FALSE], x[i, m1]))$x.clr
d <- rowSums(t(abs(t(xclr[-nrow(xclr),]) - c(xclr[nrow(xclr),]))), na.rm=TRUE)
#d[d == 0] <- NA ## dirty
names(d) <- mi
wA <- which(d <= quantile(d, k/length(d), na.rm=TRUE))
w <- as.numeric(names(wA))
list(knn=w, m1=m1)
}
}
if(metric=="Euclidean"){
imp <- function(x, i, j){
## workhorse: do the imputation
knn <- findknn(x, i, j)
get(agg)(x[knn,j], na.rm=TRUE) #median
}
}
if(metric=="Aitchison" & normknn == FALSE){
imp <- function(x, i, j){
## workhorse: do the imputation
knn <- findknn(x, i, j)
### median, normalization:
#fac <- knn$xclriSum/knn$medKnnSum
#median(x[knn$knn, j])#*fac # fac anders berechnen
medSum <- get(adj)(apply(x[knn$knn, knn$m1, drop=FALSE], 2, function(x,...){get(agg)(x, na.rm=TRUE)} ), na.rm=TRUE)
xSum <- get(adj)(x[i, knn$m1, drop=FALSE], na.rm=TRUE)
fac <- xSum/medSum
get(agg)(x[knn$knn, j], na.rm=TRUE)*fac #median
}
}
if(metric=="Aitchison" & normknn == TRUE){
imp <- function(x, i, j){
knn <- findknn(x, i, j)
##normalization:
#xSum <- sum(x[i, knn$m1, drop=FALSE], na.rm=TRUE)
xSum <- get(adj)(x[i, knn$m1, drop=FALSE], na.rm=TRUE)
if( length(knn$knn) == 1){
#knnSum <- sum(x[knn$knn, knn$m1], na.rm=TRUE)
knnSum <- get(adj)(x[knn$knn, knn$m1], na.rm=TRUE)
} else{
#knnSum <- rowSums(x[knn$knn, knn$m1, drop=FALSE], na.rm=TRUE)
knnSum <- apply(x[knn$knn, knn$m1, drop=FALSE], 1, function(x,...){get(agg)(x, na.rm=TRUE)} )
}
fac <- xSum/knnSum
get(agg)(x[knn$knn, j] * fac, na.rm=TRUE)
## statt Mean das geom. Mittel?
}
}
#if(metric == "Aitchison" & ratios == TRUE){
# knn <- findknn(x, i, j)
# p <- ncol(x)
# y <- apply(apply(x[knn$knn, , drop = FALSE], 1, function(x){x[2:p]/x[1]}),1,median)
# x[i,j] <- 1
# x[i,-j] <- y
#}
#if(metric=="Aitchison" & unweighted == TRUE){
# imp <- function(x, i, j){
# knn <- findknn(x, i, j)
# x[i,j] <- get(agg)(x[knn$knn, j])
# }
#}
for(i in 1:N){
for(j in 1:P){
if(is.na(x[i,j])){
x[i,j] <- imp(xmiss, i, j)
}
}
}
w <- is.na(xOrig)
colnames(x) <- colnames(xOrig)
res <- list(xOrig=xOrig, xImp=x, criteria=NULL, iter=NULL, w=length(which(w)), wind=w, metric=metric)
class(res) <- "imp"
res
}