Revision 04f271f14fba8652b1ba48e065b58dbbc8b36bbd authored by Piotr Romanski on 11 April 2009, 00:00:00 UTC, committed by Gabor Csardi on 11 April 2009, 00:00:00 UTC
0 parent
Raw File
selector.info.gain.R
### INFORMATION GAIN BASED ALGORITHMS
# classification and regression
# continous and discrete data

information.gain <- function(formula, data) {
	information.gain.body(formula, data, type = "infogain")
}

gain.ratio <- function(formula, data) {
	information.gain.body(formula, data, type = "gainratio")	
}

symmetrical.uncertainty <- function(formula, data) {
	information.gain.body(formula, data, type = "symuncert")	
}

information.gain.body <- function(formula, data, type = c("infogain", "gainratio", "symuncert")) {
	type = match.arg(type)
	new_data = get.data.frame.from.formula(formula, data)
	new_data = discretize.all(formula, new_data)
	
	attr_entropies = sapply(new_data, entropy)
	class_entropy = attr_entropies[1]
	attr_entropies = attr_entropies[-1]
	joint_entropies = sapply(new_data[-1], function(t) {
			joint.entropy(new_data[[1]], t)
		})
	results = class_entropy + attr_entropies - joint_entropies

	if(type == "gainratio") {
		results = results / attr_entropies
	} else if(type == "symuncert") {
		results = 2 * results / (attr_entropies	+ class_entropy)
	}

	attr_names = dimnames(new_data)[[2]][-1]
	return(data.frame(attr_importance = results, row.names = attr_names))
}
back to top