Revision 12acd76ec1d613505e4bb20b3012e8d8507a310a authored by Lars Kotthoff on 16 May 2018, 20:38:09 UTC, committed by cran-robot on 16 May 2018, 20:38:09 UTC
1 parent 970c4e6
selector.chi.squared.R
### CHI-SQUARED
# classification and regression
# continous and discrete data
chi.squared <- function(formula, data) {
new_data = get.data.frame.from.formula(formula, data)
new_data = discretize.all(formula,new_data)
class_data = new_data[[1]]
new_data = new_data[-1] #new_data without class attr
results = sapply(new_data, function(w) {
cont = table(class_data, w)
row_sums = apply(cont, 1, sum)
col_sums = apply(cont, 2, sum)
all_sum = sum(col_sums)
expected_matrix = t(as.matrix(col_sums) %*% t(as.matrix(row_sums))) / all_sum
chis = sum((cont - expected_matrix) ^ 2 / expected_matrix)
if(chis == 0 || length(col_sums) < 2 || length (row_sums) < 2) {
return(0)
} else {
# phi or Cramer's V
return(sqrt(chis / (all_sum * min(length(col_sums) - 1, length(row_sums) - 1))))
}
})
attr_names = dimnames(new_data)[[2]]
return(data.frame(attr_importance = results, row.names = attr_names))
}
Computing file changes ...