We are hiring ! See our job offers.
https://github.com/cran/CluMix
Raw File
Tip revision: dd414a45be07f9c033c980461d041111b898f0a7 authored by Manuela Hummel on 29 December 2016, 10:52:10 UTC
version 1.3.1
Tip revision: dd414a4
dist.subjects.R
dist.subjects <-
function(data, weights){
#function(data, type=list()){
# !! to be done: allow also asymmetric binary variables
  
  # variable classes (binary can be any of numeric, factor, ordered, logic)
  #dc <- sapply(data, function(x) ifelse(length(na.omit(unique(x))) == 2, "binary", data.class)

  # if all variables are numeric, use Euclidean distance
  dc <- sapply(data, data.class)
  if(all(dc == "numeric"))
    D <- dist(data)
  
  # if not, use Gower's distance with Podani's extension
  else{
    # !! depending on type, define asymmetric binary variables for parameter asym.bin  
  
    # binary variables have to be numeric
    K <- sapply(data[,dc == "factor", drop=FALSE], function(x) length(levels(x)))
    bin <- names(K)[K == 2]
    data[,bin] <- sapply(data[,bin], function(x) as.numeric(x) - 1)
    
    # in case there are logical variables
    if(any(dc == "logical"))
      data[,dc == "logical"] <- sapply(data[,dc == "logical"], as.numeric)
  
    D <- FD::gowdis(x=data, w=weights, ord="metric") # asym.bin=!!
    D <- sqrt(D)  # gowdis calculates D = 1-S, but we want D = sqrt(1-S)
  }
  return(D)
}
back to top