similarity.variables.R
similarity.variables <-
function(data, method=c("associationMeasures", "distcor"), associationFun=association, check.psd=TRUE, make.psd=TRUE){
# data: data.frame of original data
# method: method to calculate distances: combination of association measures ("associationMeasures") or distance correlation ("distcor")
# associationFun: function that calculates association measure for each pair of variables in case of method="association"
# check.psd: check if resulting similarity matrix S is positive semi-definite?
# make.psd: if S is not p.s.d., shall it be transformed to be p.s.d.? (only done if also check.psd=TRUE)
method <- match.arg(method)
#n <- nrow(data)
p <- ncol(data)
if(method == "associationMeasures"){
S <- matrix(0, nrow=p, ncol=p)
for(i in 1:p){
for(j in 1:p){
if(i > j){
# distance = sqrt(1 - association)
S[i,j] <- associationFun(data[,i], data[,j])
}
}
}
dimnames(S) <- list(names(data), names(data))
# make it symmetric (since only lower triangle was calculated)
S <- S + t(S)
diag(S) <- 1
# check if S is p.s.d.
if(check.psd){
psd <- all(eigen(S, only.values=TRUE)$values >= 0)
# if S is not p.s.d., get "nearest p.s.d. matrix"
if(!psd){
if(!make.psd)
warning("similarity matrix is not positive semidefinite")
else{
S <- Matrix::nearPD(S, keepDiag=TRUE, conv.norm.type="F")$mat
#warning("similarity matrix was adjusted to be positive semidefinite")
}
}
}
}
else if(method == "distcor"){
dcm <- dcormat_bc(data)
S <- sign(dcm) * sqrt(abs(dcm))
}
return(as.matrix(S))
}