Raw File
similarity.variables <-
function(data, method=c("associationMeasures", "distcor"), associationFun=association, check.psd=TRUE, make.psd=TRUE){
# data: data.frame of original data 
# method: method to calculate distances: combination of association measures ("associationMeasures") or distance correlation ("distcor")
# associationFun: function that calculates association measure for each pair of variables in case of method="association"
# check.psd: check if resulting similarity matrix S is positive semi-definite?
# make.psd: if S is not p.s.d., shall it be transformed to be p.s.d.? (only done if also check.psd=TRUE)
  
  method <- match.arg(method)
  
  #n <- nrow(data)
  p <- ncol(data)
  
  if(method == "associationMeasures"){
  S <- matrix(0, nrow=p, ncol=p)
  for(i in 1:p){
    for(j in 1:p){
      if(i > j){
        # distance = sqrt(1 - association)
        S[i,j] <- associationFun(data[,i], data[,j])
      }
    }
  }
  dimnames(S) <- list(names(data), names(data))
  
  # make it symmetric (since only lower triangle was calculated) 
  S <- S + t(S) 
  diag(S) <- 1
  
  # check if S is p.s.d.
  if(check.psd){
    psd <- all(eigen(S, only.values=TRUE)$values >= 0)
  
    # if S is not p.s.d., get "nearest p.s.d. matrix"
    if(!psd){
      if(!make.psd)
        warning("similarity matrix is not positive semidefinite")
      else{
        S <- Matrix::nearPD(S, keepDiag=TRUE, conv.norm.type="F")$mat
        #warning("similarity matrix was adjusted to be positive semidefinite")
      }
    }
  }
  }
  
  else if(method == "distcor"){
    dcm <- dcormat_bc(data)  
    S <- sign(dcm) * sqrt(abs(dcm))
  }

  return(as.matrix(S))
}
back to top