https://github.com/cran/PPCI
Tip revision: af9863621ad2e9f8042207ff0463fe04d9485b6e authored by David Hofmeyr on 22 November 2017, 09:14:29 UTC
version 0.1.0
version 0.1.0
Tip revision: af98636
clustereval.R
### function cluster_performance() computes external cluster performance measures: Purity, V-Measure, Normalised Mutual Information and Adjusted Rand Index
## arguments:
# assigned = vector of cluster assignments
# labels = true class labels (same length as assigned)
# beta = weight parameter used in calculation of V-measure. Higher values apply higher weight to homogeneity over completeness.
cluster_performance = function(assigned, labels, beta = 1){
n <- length(labels)
T <- table(assigned, labels)
RS <- rowSums(T)
CS <- colSums(T)
## V-measure
CK <- - sum(apply(T, 1, function(x) return(sum(x[which(x>0)]*log(x[which(x>0)]/sum(x))))))/n
KC <- - sum(apply(T, 2, function(x) return(sum(x[which(x>0)]*log(x[which(x>0)]/sum(x))))))/n
K <- - sum(apply(T, 1, function(x) return(sum(x)*log(sum(x)/n))))/n
C <- - sum(apply(T, 2, function(x) return(sum(x)*log(sum(x)/n))))/n
if(C!=0){
h <- 1 - CK/C
}
else{
h <- 0
}
if(K!=0){
c <- 1 - KC/K
}
else{
c <- 0
}
if(h==0 && c==0) v.measure <- 0
else v.measure <- (1+beta)*h*c/(beta*h+c)
## Purity
purity <- sum(apply(T, 1, function(x) return(max(x))))/n
## Adjusted Rand Index
O <- sum(sapply(T, function(t) choose(t, 2)))
E <- (sum(sapply(RS, function(t) choose(t, 2)))*sum(sapply(CS, function(t) choose(t, 2))))/choose(n, 2)
M <- (sum(sapply(RS, function(t) choose(t, 2))) + sum(sapply(CS, function(t) choose(t, 2))))/2
adj.rand <- (O-E)/(M-E)
## Normalised Mutual Information
prod <- RS%*%t(CS)
Tp <- T
Tp[which(T==0)] <- 1e-10
IXY <- sum(T*log(Tp*n/prod))
HX <- sum(RS*log(RS/n))
HY <- sum(CS*log(CS/n))
NMI <- IXY/sqrt(HX*HY)
c(adj.rand = adj.rand, purity = purity, v.measure = v.measure, nmi = NMI)
}
### function success_ratio() computes the success ratio (Pavlidis et al. 2016) of a binary partition
## arguments:
# assigned = vector of cluster assignments (takes at most two distinct values)
# labels = true class labels (same length as assigned)
success_ratio = function(assigned, labels){
n = length(labels)
#### Find the sizes of the overlaps between assigned and labels
T = table(assigned, labels)
#### determine which of the 2 aggregated classes to assign the classes
split = apply(T, 2, which.max)
#### if an aggregated class is empty, exit. failure to split any classes
if(length(unique(split))==1){
return(0)
}
#### calculate success and error
success = min(sum(T[1,split==1]), sum(T[2,split==2]))
error = sum(apply(T, 2, min))
success/(success+error)
}