Revision a6a4107a08051dfddc3c733102d002fd8617ab9e authored by Lars Kotthoff on 25 October 2014, 00:00:00 UTC, committed by Gabor Csardi on 25 October 2014, 00:00:00 UTC
1 parent c389439
selector.info.gain.R
### INFORMATION GAIN BASED ALGORITHMS
# classification and regression
# continous and discrete data
information.gain <- function(formula, data) {
information.gain.body(formula, data, type = "infogain")
}
gain.ratio <- function(formula, data) {
information.gain.body(formula, data, type = "gainratio")
}
symmetrical.uncertainty <- function(formula, data) {
information.gain.body(formula, data, type = "symuncert")
}
information.gain.body <- function(formula, data, type = c("infogain", "gainratio", "symuncert")) {
type = match.arg(type)
new_data = get.data.frame.from.formula(formula, data)
new_data = discretize.all(formula, new_data)
attr_entropies = sapply(new_data, entropyHelper)
class_entropy = attr_entropies[1]
attr_entropies = attr_entropies[-1]
joint_entropies = sapply(new_data[-1], function(t) {
entropyHelper(data.frame(cbind(new_data[[1]], t)))
})
results = class_entropy + attr_entropies - joint_entropies
if(type == "gainratio") {
results = results / attr_entropies
} else if(type == "symuncert") {
results = 2 * results / (attr_entropies + class_entropy)
}
attr_names = dimnames(new_data)[[2]][-1]
return(data.frame(attr_importance = results, row.names = attr_names))
}
Computing file changes ...