colpairs.R
#Given an nxp matrix m and a function f,
# returns the pxp matrix got by applying f to all pairs of columns of m.
colpairs <- function(m,f,diag=0,na.omit=FALSE,...){
flocal <- function(i,j)
if (!is.null(diag) && (i == j))
diag
else {
x <- m[,i]
y <- m[,j]
if (na.omit) {
d <- na.omit(cbind(x,y))
x <- d[,1]
y <- d[,2]}
f(x,y,...) }
p <- ncol(m)
m1 <- matrix(rep(1:p,p),nrow=p,ncol=p)
ind <- mapply("c",m1,t(m1))
ans <- apply(ind,2, function(i) flocal(i[1],i[2]))
ans <- matrix(ans,nrow=p,ncol=p)
colnames(ans) <- colnames(m)
rownames(ans) <- colnames(m)
ans
}
km2 <- function(x,y){
x <- x - mean(x)
y <- y - mean(y)
sum(x*x)+ sum(y*y)
}
# Computes the sum of all distances between pairs of
# objects whose coordinates are contained in x and y.
gtot <- function(x,y,...)
2*sum(dist(cbind(x,y),...))
# Computes the average total distance from one object to all other
# objects, where x and y contain the object cordinates.
gave <- function(x,y,...)
2*sum(dist(cbind(x,y),...))/length(x)
# Computes the cluster diameter- the maximum distance between
# objects whose coordinates are contained in x and y.
diameter <- function(x,y,...){
d <- dist(cbind(x,y),...)
max(d)
}
# Computes the cluster star distance- the minimum of the total distance from
# one object to another, where x and y contain the object cordinates.
star <- function(x,y,...){
d <- vec2distm(dist(cbind(x,y),...))
min(apply(d,2,sum))
}
# Computes the silhouette distance of a partition of the objects in
# x and y, where group contains the object memberships.
sil <- function(x,y,groups,...){
# require(cluster)
igroups <- unclass(factor(groups))
d <- dist(cbind(x,y),...)
s <- silhouette(igroups,d)
summary(s)$avg.width
}
# Computes the agglomerative coefficient, from agnes.
ac <- function(x,y,...){
# require(cluster)
ag <- agnes(cbind(x,y),keep.diss=FALSE,keep.data=FALSE,...)
ag$ac
}
# Computes the total line length in a parallel coordinate plot
# of x and y.
pclen <- function(x,y) sum(abs(y-x))
# Computes the average (per object) line length in a parallel coordinate plot
# where each x object is connected to all y objects.
pcglen <- function(x,y)
sum(outer(x,y,function(a,b) abs(a-b)))/length(x)
# Applies the function gfun to each group of x and y values
# and combines the results using the function cfun.
#(...) arguments are passed to gfun.
partition.crit <- function(x,y,groups,gfun= gave,cfun=sum,...){
dgroups <- unique(groups)
gm <- sapply(dgroups,function(g) gfun(x[groups==g],y[groups==g],...))
cfun(gm)
}