https://github.com/cran/FSelector
Raw File
Tip revision: 12acd76ec1d613505e4bb20b3012e8d8507a310a authored by Lars Kotthoff on 16 May 2018, 20:38:09 UTC
version 0.31
Tip revision: 12acd76
selector.chi.squared.R
### CHI-SQUARED
# classification and regression
# continous and discrete data
chi.squared <- function(formula, data) {
	
	new_data = get.data.frame.from.formula(formula, data)
	new_data = discretize.all(formula,new_data)
	
	class_data = new_data[[1]]
	new_data = new_data[-1] #new_data without class attr
	
	results = sapply(new_data, function(w) {
			cont = table(class_data, w)
			row_sums = apply(cont, 1, sum)
			col_sums = apply(cont, 2, sum)
			all_sum = sum(col_sums)
			expected_matrix = t(as.matrix(col_sums) %*% t(as.matrix(row_sums))) / all_sum
			chis = sum((cont - expected_matrix) ^ 2 / expected_matrix)
			
			if(chis == 0 || length(col_sums) < 2 || length (row_sums) < 2) {
				return(0)
			} else {
				# phi or Cramer's V
				return(sqrt(chis / (all_sum * min(length(col_sums) - 1, length(row_sums) - 1))))
			}
		})

	attr_names = dimnames(new_data)[[2]]
	return(data.frame(attr_importance = results, row.names = attr_names))
}
back to top