Content - 06cb50163146365a16149b49971e3ab28ced149f - 70cd59d/R/model.avg.R

visit type:
Tip revision: 69fda28b879d6147f7dda937d28114b28cc8ebc8 authored by Kamil Bartoń on 14 May 2014, 02:44:24 UTC
version 1.10.0
Tip revision: 69fda28
model.avg.R
`model.avg` <-
function (object, ..., revised.var = TRUE) {
	if (isTRUE("method" %in% names(match.call())))
		stop("argument 'method' is no longer accepted")
	UseMethod("model.avg")
}

.coefarr.avg <-
function(cfarr, weight, revised.var, full, alpha) {	
	weight <- weight / sum(weight)
	nCoef <- dim(cfarr)[3L]
	if(full) {
		cfarr[, 1:2, ][is.na(cfarr[, 1:2, ])] <- 0
		if(!all(is.na(cfarr[, 3L, ])))
			cfarr[ ,3L, ][is.na(cfarr[ , 3L, ])] <- Inf
	}
	
	avgcoef <- array(dim = c(nCoef, 5L),
		dimnames = list(dimnames(cfarr)[[3L]], c("Estimate",
		"Std. Error", "Adjusted SE", "Lower CI", "Upper CI")))
	for(i in seq_len(nCoef))
		avgcoef[i, ] <- par.avg(cfarr[, 1L, i], cfarr[, 2L, i], weight,
			df = cfarr[, 3L, i], alpha = alpha, revised.var = revised.var)
		
	avgcoef[is.nan(avgcoef)] <- NA
	return(avgcoef)
}


`model.avg.model.selection` <-
function(object, subset, fit = FALSE, ..., revised.var = TRUE) {

	if(!missing(subset)) {
		cl <- match.call()
		cl[[1L]] <- as.name("subset")
		names(cl)[2L] <- "x"
		object <- eval(cl[1L:3L], parent.frame())
	}
	if(fit || length(list(...))) {
		cl <- match.call()
		cl$fit <- NULL
		arg1 <- names(cl)[-(1L:2L)] %in% names(formals("model.avg.default"))
		cl1 <- cl[c(TRUE, TRUE, !arg1)]
		cl1[[1L]] <- as.name("get.models")
		cl2 <- cl[c(TRUE, TRUE, arg1)]
		cl2[[2L]] <- cl1
		cl2[[1L]] <- as.name("model.avg")
		#message("recreating the model objects")
		return(eval(cl2, parent.frame()))
	}

	ct <- attr(object, "coefTables")

	cfarr <- coefArray(ct)
	coefNames <- c(dimnames(cfarr)[[3L]])
	weight <- object$weight / sum(object$weight)

	avgcoef <- .coefarr.avg(cfarr, weight, revised.var, FALSE, alpha = 0.05)
	missing.par <- is.na(cfarr[, 1L, ])

	coef.shrink <- avgcoef[, 1L] *
		colSums(array(weight * as.double(!missing.par),
					  dim = dim(cfarr)[c(1L, 3L)]))
					  
	#allterms1 <- lapply(attr(object, "calls"), function(x)
		#getAllTerms(as.formula(x[[switch(as.character(x[[1L]]),
			#lme=, lme.formula= "fixed", gls= "model", "formula")]])))
	all.terms <- attr(object, "terms")
	all.vterms <- all.terms[!(all.terms %in% attr(all.terms, "interceptLabel")
		| apply(is.na(object[, all.terms]), 2L, all))]
	#allterms1 <- apply(!is.na(object[, all.vterms, drop = FALSE]), 1L, function(x) all.vterms[x])
	allterms1 <- applyrns(!is.na(object[, all.vterms, drop = FALSE]), function(x) all.vterms[x])
	all.model.names <- .modelNames(allTerms = allterms1, uqTerms = all.vterms)

	mstab <- object[, -(seq_len(ncol(object) - 5L))]
	colnames(mstab)[4L:5L] <- c("Delta", "Weight")
	rownames(mstab) <- all.model.names

	avgcoef[is.nan(avgcoef)] <- NA_real_

	ret <- list(
		summary = as.data.frame(mstab),
		term.codes = attr(all.model.names, "variables"),
		avg.model = avgcoef,
		coef.shrinkage = coef.shrink,
		coefArray = cfarr,
		importance = importance(object),
		beta = attr(object, "beta"),
		term.names = coefNames,
		x = NULL,
		residuals = NULL,
		formula = if(!is.null(attr(object, "global")))
			formula(attr(object, "global")) else NULL,
		call = match.call()
	)

	attr(ret, "beta") <- attr(object, "beta")
	attr(ret, "nobs") <- attr(object, "nobs")
	attr(ret, "revised.var") <- revised.var
	class(ret) <- "averaging"
	return(ret)
}


`model.avg.default` <-
function(object, ..., beta = FALSE,
	rank = NULL, rank.args = NULL, revised.var = TRUE,
	dispersion = NULL, ct.args = NULL) {

	if (inherits(object, "list")) {
		if(length(object) == 0L) stop("'object' is an empty list")
		models <- object
		object <- object[[1L]]
        if (!is.null(rank) || is.null(rank <- attr(models, "rank"))) {
            rank <- .getRank(rank, rank.args = rank.args, object = object)
      	}
	} else {
		models <- list(object, ...)
        rank <- .getRank(rank, rank.args = rank.args, object = object)
	}

	nModels <- length(models)
	if(nModels == 1L) stop("only one model supplied. Nothing to do")
	.checkModels(models)

    alpha <- 0.05
	.fnull <- function(...) return(NULL)
	ICname <- deparse(attr(rank, "call")[[1L]])

	allterms1 <- lapply(models, getAllTerms)
	all.terms <- unique(unlist(allterms1, use.names = FALSE))

	# sort by level (main effects first)
	all.terms <- all.terms[order(vapply(gregexpr(":", all.terms),
		function(x) if(x[1L] == -1L) 0L else length(x), numeric(1L)), all.terms)]

	# all.model.names <- modelNames(models, asNumeric = FALSE,
		# withRandomTerms = FALSE, withFamily = FALSE)
	all.model.names <- .modelNames(allTerms = allterms1, uqTerms = all.terms)

	#if(is.null(names(models))) names(models) <- all.model.names
	
	coefTableCall <- as.call(c(alist(coefTable, models[[i]],
		dispersion = dispersion[i]), ct.args))

	# check if models are unique:
	if(!is.null(dispersion)) dispersion <- rep(dispersion, length.out = nModels)
	coefTables <- vector(nModels, mode = "list")
	for(i in seq_len(nModels)) coefTables[[i]] <-
		eval(coefTableCall)
		#coefTable(models[[i]], dispersion = dispersion[i])
	mcoeffs <- lapply(coefTables, "[", , 1L)

	dup <- unique(sapply(mcoeffs, function(i) which(sapply(mcoeffs, identical, i))))
	dup <- dup[sapply(dup, length) > 1L]
	if (length(dup) > 0L) stop("models are not unique. Duplicates: ",
		prettyEnumStr(sapply(dup, paste, sep = "", collapse = " = "),
			quote = "'"))


	LL <- .getLik(object)
	logLik <- LL$logLik
	lLName <- LL$name

	ic <- vapply(models, rank, numeric(1L))
	logLiks <- lapply(models, logLik)
	delta <- ic - min(ic)
	weight <- exp(-delta / 2) / sum(exp(-delta / 2))
	model.order <- order(weight, decreasing = TRUE)

	# ----!!! From now on, everything MUST BE ORDERED by 'weight' !!!-----------
	mstab <- cbind(df = vapply(logLiks, attr, numeric(1L), "df"),
		logLik = as.numeric(logLiks), IC = ic, Delta = delta, Weight = weight,
		deparse.level = 0L)
	if(!is.null(dispersion)) mstab <- cbind(mstab, Dispersion = dispersion)
	rownames(mstab) <- all.model.names
	mstab <- mstab[model.order, ]
	weight <- mstab[, "Weight"] # has been sorted in table
	models <- models[model.order]
	coefTables <- coefTables[model.order]

	if (beta) {
		response.sd <- sd(model.response(model.frame(object)))
		for(i in seq_along(coefTables))
			coefTables[[i]][, 1L:2L] <-
				coefTables[[i]][, 1L:2L] *
				apply(model.matrix(models[[i]]), 2L, sd) / response.sd
	}

	cfarr <- coefArray(coefTables)
	coefNames <- c(dimnames(cfarr)[[3L]])
	avgcoef <- .coefarr.avg(cfarr, weight, revised.var, FALSE, alpha = 0.05)
	missing.par <- is.na(cfarr[, 1L, ])
	coef.shrink <- avgcoef[, 1L] *
		colSums(array(weight * as.double(!missing.par),
					  dim = dim(cfarr)[c(1L, 3L)]))

    names(all.terms) <- seq_along(all.terms)
	colnames(mstab)[3L] <- ICname

	# Benchmark: 3.7x faster
	#system.time(for(i in 1:10000) t(array(unlist(p), dim=c(length(all.terms),length(models)))))
	#system.time(for(i in 1:10000) do.call("rbind", p))

	vpresent <- do.call("rbind", lapply(models, function(x)
		all.terms %in% getAllTerms(x)))

	importance <- apply(weight * vpresent, 2L, sum)
	names(importance) <- all.terms
	importance <- sort(importance, decreasing = TRUE)
	attr(importance, "n.models") <- structure(colSums(vpresent), names = all.terms)
	class(importance) <- c("importance", "numeric") 

	#global.mm <- model.matrix(fit)
	#cnmmxx2 <- unique(unlist(lapply(gm, function(x) names(coef(x)))))
	#mmx <- gmm[, cnmmxx[match(colnames(gmm), cnmmxx, nomatch = 0)]]

	mmxs <- tryCatch(cbindDataFrameList(lapply(models, model.matrix)),
					 error = .fnull, warning = .fnull)

	# Far less efficient:
	#mmxs <- lapply(models, model.matrix)
	#mx <- mmxs[[1]];
	#for (i in mmxs[-1])
	#	mx <- cbind(mx, i[,!(colnames(i) %in% colnames(mx)), drop=FALSE])

	# residuals averaged (with brute force)
	#rsd <- tryCatch(apply(vapply(models, residuals, residuals(object)), 1L,
		#weighted.mean, w = weight), error = .fnull)
	#rsd <- NULL
	## XXX: how to calc residuals ?
	
	modelClasses <- lapply(models, class)
	frm <-
	if(all(vapply(modelClasses[-1L], identical, logical(1L), modelClasses[[1L]]))) {
		trm <- tryCatch(terms(models[[1L]]),
				error = function(e) terms(formula(models[[1L]])))
		response <- attr(trm, "response")
		m1 <- models[[1L]]
		makeArgs(m1, all.terms, opt = list(
			response = if(response > 0L) attr(trm, "variables")[[response + 1L]] else NULL,
			gmFormulaEnv = environment(formula(m1)),
			intercept = ! identical(unique(unlist(lapply(allterms1, attr, "intercept"))), 0),
			interceptLabel = unique(unlist(lapply(allterms1, attr, "interceptLabel"))),
			#	random = attr(allTerms0, "random"),
			gmCall = getCall(m1),
			gmEnv = parent.frame(),
			allTerms = all.terms,
			random = . ~ .
			))[[1L]]
	} else NA
	

	ret <- list(
		summary = as.data.frame(mstab),
		term.codes = attr(all.model.names, "variables"),
		avg.model = avgcoef,
		coef.shrinkage = coef.shrink,
		coefArray = cfarr,
		importance = importance,
		beta = beta,
		term.names = coefNames,
		x = mmxs,
		residuals = NULL, # no residuals
		formula = frm,
		call = match.call()
	)

	attr(ret, "modelList") <- models
	attr(ret, "beta") <- beta
	attr(ret, "nobs") <- nobs(object)
	attr(ret, "revised.var") <- revised.var
	class(ret) <- "averaging"
	return(ret)
}

`coef.averaging` <-
function(object, full = FALSE, ...) if(full) object$coef.shrinkage else
	object$avg.model[, 1L]


 #TODO: predict p -="response" + average on response scale
 
`predict.averaging` <-
function(object, newdata = NULL, se.fit = FALSE, interval = NULL,
	type = NA, backtransform = FALSE,
	full = TRUE, ...) {

	if (!missing(interval)) .NotYetUsed("interval", error = FALSE)
	
	if(backtransform && !is.na(type) && type == "response")
		warning("back-transforming predictions already on response scale")

	models <- attr(object, "modelList")
	if(is.null(models)) stop("can only predict from 'averaging' object created with a model list")

	# Benchmark: vapply is ~4x faster
	#system.time(for(i in 1:1000) sapply(models, inherits, what="gam")) /
	#system.time(for(i in 1:1000) vapply(models, inherits, logical(1L), what="gam"))

	# If all models inherit from lm:
	if ((missing(se.fit) || !se.fit)
		&& (is.na(type) || type == "link")
		&& all(linherits(models, c(gam = FALSE, lm = TRUE)))
		&& !any(is.na(object$coef.shrinkage))
		) {
		
		coeff <- coef(object, full = full)
		frm <- formula(object)

		tt <- delete.response(terms(frm))
		X <- model.matrix(object)

		if (missing(newdata) || is.null(newdata)) {
			Xnew <- X
		} else {
			xlev <- unlist(unname(lapply(models, "[[", "xlevels")),
						   recursive = FALSE, use.names = TRUE)
			Xnew <- model.matrix(tt, data = newdata, xlev = xlev)
		}

		Xnew <- Xnew[, match(names(coeff), colnames(Xnew), nomatch = 0L)]
		ret <- (Xnew %*% coeff)[, 1L]
		
		Xnew

		#if (se.fit) {
		#	scale <- 1
		#	covmx <- solve(t(X) %*% X)
		#	se <- sqrt(diag(Xnew %*% covmx %*% t(Xnew))) * sqrt(scale)
		#	return(list(fit = y, se.fit = se))
		#}
	} else {
		# otherwise, use brute force:

		if(full == FALSE) warning("argument 'full' ignored")

		cl <- as.list(match.call())
		cl$backtransform <- cl$full <- NULL
		cl[[1L]] <- as.name("predict")
		#if("type" %in% names(cl)) cl$type <- "link"
		if(!missing(newdata)) cl$newdata <- as.name("newdata")
		cl <- as.call(cl)

		#predict.lme <- MuMIn:::predict.lme

		pred <- lapply(models, function(x) {
			cl[[2L]] <- x
			y <- tryCatch({
				y <- eval(cl)
				if(is.numeric(y)) y else structure(as.list(y[c(1L, 2L)]),
					names = c("fit", "se.fit"))
				}, error = function(e) e)
		})

		err <- sapply(pred, inherits, "condition")
		if (any(err)) {
			lapply(pred[err], warning)
			stop(sprintf(ngettext(sum(err), "'predict' for model %s caused error",
				"'predict' for models %s caused errors"),
				prettyEnumStr(names(models[err]), quote = "'")
				))
		}

		.untransform <- function(fit, se.fit = NULL, models) {
			fam <- tryCatch(vapply(models, function(z)
				unlist(family(z)[c("family", "link")]), character(2L)),
							error = function(e) NULL)
			if (!is.null(fam)) {
				if(any(fam[, 1L] != fam[, -1L]))
				stop("cannot calculate prediction on a response scale ",
					 "with models using different families or link functions")
				fam1 <- family(models[[1L]])
				if(is.null(se.fit))
					return(fam1$linkinv(fit))
				else return(list(fit = fam1$linkinv(fit),
					se.fit = se.fit * abs(fam1$mu.eta(fit))
					))
			}
			return(NULL)
		}

		if (all(sapply(pred, is.list))) {
		#if(all(sapply(pred, function(x) c("fit", "se.fit") %in% names(x)))) {
			fit <- do.call("cbind", lapply(pred, "[[", "fit"))
			se.fit <- do.call("cbind", lapply(pred, "[[", "se.fit"))
			revised.var <- attr(object, "revised.var")

			apred <- unname(vapply(seq(nrow(fit)), function(i)
				par.avg(fit[i, ], se.fit[i, ], weight = object$summary$Weight,
					df = NA_integer_, revised.var = revised.var),
					FUN.VALUE = double(5L)))

			# TODO: ase!
			#no.ase <- all(is.na(object$avg.model[,3]))
			# if(no.ase) 2 else 3
			ret <-  if (backtransform)
					.untransform(apred[1L, ], apred[2L, ], models = models) else
						list(fit = apred[1L, ], se.fit = apred[2L, ])
		} else {
			#tryCatch({
			i <- !vapply(pred, is.numeric, FALSE)
			if(any(i)) pred[i] <- lapply(pred[i], "[[", 1L)
			ret <- apply(do.call("cbind", pred), 1L, weighted.mean,
				w = object$summary$Weight)
			if (backtransform)
				ret <- .untransform(ret, models = models)
		}

	}
	return(ret)
}

`fitted.averaging` <-
function (object, ...) predict.averaging(object)

`model.matrix.averaging` <-
function (object, ...) object$x

`summary.averaging` <-
function (object, ...) {
	cf <- object$avg.model
	
	.makecoefmat <- function(cf) {
		no.ase <- all(is.na(cf[, 3L]))
		z <- abs(cf[, 1L] / cf[, if(no.ase) 2L else 3L])
		pval <- 2 * pnorm(z, lower.tail = FALSE)
		cbind(cf[, if(no.ase) 1L:2L else 1L:3L],
			`z value` = z, `Pr(>|z|)` = zapsmall(pval))
	}
		
	object$coefmat <- .makecoefmat(object$avg.model)
	object$coefmat.full <- .makecoefmat(.coefarr.avg(object$coefArray, object$summary$Weight,
		attr(object, "revised.var"), TRUE, 0.05)) 
	object$coef.nmod <- colSums(!is.na(object$coefArray[, 1L,]))
	
	structure(object, class = c("summary.averaging", "averaging"))
}

`confint.averaging` <-
function (object, parm, level = 0.95, full = FALSE, ...) {
    a2 <- 1 - level
    a <- a2 / 2
    cf <- object$coefArray[, 1L, ]
    pnames <- colnames(cf)
    if (missing(parm)) parm <- pnames
		else if (is.numeric(parm)) parm <- pnames[parm]
	missing.par <- is.na(cf)
    se <- object$coefArray[, 2L, ]
    dfs <- object$coefArray[, 3L, ]
	if(full) {
		se[missing.par] <- cf[missing.par] <- 0
		if(!all(is.na(dfs))) dfs[missing.par] <- Inf
	}

    wts <- object$summary$Weight
    ci <- t(sapply(parm, function(i)
		par.avg(cf[,i], se[,i], wts, dfs[, i], alpha = a2)))[, 4L:5L]
    pct <- .xget("stats", "format.perc")(c(a, 1L - a), 3L)

	ci[is.na(object$coef.shrinkage), ] <- NA_real_
    colnames(ci) <- pct
    return(ci)
}

`print.summary.averaging` <-
function (x, digits = max(3L, getOption("digits") - 3L),
    signif.stars = getOption("show.signif.stars"), ...) {

    cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
        "\n\n", sep = "")

    cat("Component models: \n")
	print(round(as.matrix(x$summary), 2L), na.print = "")

	cat("\nTerm codes: \n")
	print.default(x$term.codes, quote = FALSE)

	cat("\nModel-averaged coefficients: \n")
	if (nnotdef <- sum(is.na(x$coefmat[, 1L])))
		 cat("(", nnotdef, " not defined because of singularities in all ",
			"component models) \n", sep = "")

	hasPval <- TRUE
    printCoefmat(x$coefmat, P.values = hasPval, has.Pvalue = hasPval,
		digits = digits, signif.stars = FALSE)

	cat("\nFull model-averaged coefficients (with shrinkage):", "\n")
    printCoefmat(x$coefmat.full, P.values = hasPval, has.Pvalue = hasPval,
		digits = digits, signif.stars = signif.stars)

	#if (no.ase) cat("Confidence intervals are unadjusted \n")
	#printCoefmat(matrix(x$coef.shrinkage, nrow = 1L,
		#dimnames = list("", x$term.names)), P.values = FALSE,
		#has.Pvalue = FALSE, cs.ind = seq_along(x$term.names), tst.ind = NULL)

	cat("\nRelative variable importance: \n")
	print(round(x$importance, 2L))
}

`print.averaging` <-
function(x, ...) {
    cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
        "\n\n", sep = "")
    cat("Component models:", "\n")
	comp.names <- rownames(x$summary)
	comp.names[comp.names == ""] <- "null"
	cat(format(sQuote(comp.names), justify = "l"), fill = TRUE)
	cat("\nCoefficients:", "\n")
	print(rbind(subset = x$avg.model[, 1L], full = x$coef.shrinkage))
    x
}

`vcov.averaging` <- function (object, full = FALSE, ...) {
	models <- attr(object, "modelList")
	if(is.null(models)) stop("can calculate covariance matrix from ",
							 "'averaging' object created with a model list")

	vcovs <- lapply(lapply(models, vcov), as.matrix)
	names.all <- dimnames(object$coefArray)[[3L]]
	nvars <- length(names.all)
	nvarseq <- seq(nvars)
	wts <- object$summary$Weight
	wts <- wts / sum(wts) # normalize just in case

	vcov0 <- matrix(if(full) 0 else NA_real_, nrow = nvars,
		ncol = nvars, dimnames = list(names.all, names.all))

	vcovs2 <- lapply(vcovs, function(v) {
		i <- match(fixCoefNames(dimnames(v)[[1L]]), names.all)
		vcov0[i, i] <- v
		return(vcov0)
	})
	b1 <- object$coefArray[, 1L, ]
	if(full) {
		b1[is.na(b1)] <- 0
		avgb <- object$coef.shrinkage
	} else {
		avgb <- object$avg.model[, 1L]
	}
	
	res <- sapply(nvarseq, function(c1) sapply(nvarseq, function(c2) {
		 weighted.mean(sapply(vcovs2, "[", c1, c2) + (b1[, c1] - avgb[c1]) *
		 (b1[, c2] - avgb[c2]), wts, na.rm = TRUE)
	}))
	dimnames(res) <- list(names.all, names.all)
	return(res)
}

`logLik.averaging` <- function (object, ...) {
	models <- attr(object, "modelList")
	if(is.null(models)) {
		nobs <- attr(object, "nobs")
		apply(object$summary, 1L, function(x) structure(list(x[2L]),
			df = x[1L], nobs = nobs, class = "logLik"))
	} else {
		structure(lapply(attr(object, "modelList"), .getLogLik()),
			names = rownames(object$summary))
	}
}

`coefTable.averaging` <-
function (model, full = FALSE, ...) {
	if(full) {
		summary(model)$coefmat.full[, 1L:2L]
	} else model$avg.model[, 1L:2L]
}
Browse the archive

https://github.com/cran/MuMIn