`model.avg` <- function (object, ..., revised.var = TRUE) { if (isTRUE("method" %in% names(match.call()))) stop("argument 'method' is no longer accepted") UseMethod("model.avg") } .coefarr.avg <- function(cfarr, weight, revised.var, full, alpha) { weight <- weight / sum(weight) nCoef <- dim(cfarr)[3L] if(full) { cfarr[, 1:2, ][is.na(cfarr[, 1:2, ])] <- 0 if(!all(is.na(cfarr[, 3L, ]))) cfarr[ ,3L, ][is.na(cfarr[ , 3L, ])] <- Inf } avgcoef <- array(dim = c(nCoef, 5L), dimnames = list(dimnames(cfarr)[[3L]], c("Estimate", "Std. Error", "Adjusted SE", "Lower CI", "Upper CI"))) for(i in seq_len(nCoef)) avgcoef[i, ] <- par.avg(cfarr[, 1L, i], cfarr[, 2L, i], weight, df = cfarr[, 3L, i], alpha = alpha, revised.var = revised.var) avgcoef[is.nan(avgcoef)] <- NA return(avgcoef) } `model.avg.model.selection` <- function(object, subset, fit = FALSE, ..., revised.var = TRUE) { if(!missing(subset)) { cl <- match.call() cl[[1L]] <- as.name("subset") names(cl)[2L] <- "x" object <- eval(cl[1L:3L], parent.frame()) } if(fit || length(list(...))) { cl <- match.call() cl$fit <- NULL arg1 <- names(cl)[-(1L:2L)] %in% names(formals("model.avg.default")) cl1 <- cl[c(TRUE, TRUE, !arg1)] cl1[[1L]] <- as.name("get.models") if(is.null(cl1$subset)) cl1$subset <- NA cl2 <- cl[c(TRUE, TRUE, arg1)] cl2[[2L]] <- cl1 cl2[[1L]] <- as.name("model.avg") #message("recreating the model objects") return(eval(cl2, parent.frame())) } ct <- attr(object, "coefTables") cfarr <- coefArray(ct) coefNames <- c(dimnames(cfarr)[[3L]]) weight <- object$weight / sum(object$weight) avgcoef <- .coefarr.avg(cfarr, weight, revised.var, FALSE, alpha = 0.05) missing.par <- is.na(cfarr[, 1L, ]) coef.shrink <- avgcoef[, 1L] * colSums(array(weight * as.double(!missing.par), dim = dim(cfarr)[c(1L, 3L)])) #allterms1 <- lapply(attr(object, "calls"), function(x) #getAllTerms(as.formula(x[[switch(as.character(x[[1L]]), #lme=, lme.formula= "fixed", gls= "model", "formula")]]))) all.terms <- attr(object, "terms") all.vterms <- all.terms[!(all.terms %in% attr(all.terms, "interceptLabel") | apply(is.na(object[, all.terms]), 2L, all))] #allterms1 <- apply(!is.na(object[, all.vterms, drop = FALSE]), 1L, function(x) all.vterms[x]) allterms1 <- applyrns(!is.na(object[, all.vterms, drop = FALSE]), function(x) all.vterms[x]) all.model.names <- .modelNames(allTerms = allterms1, uqTerms = all.vterms) mstab <- object[, -(seq_len(ncol(object) - 5L))] colnames(mstab)[4L:5L] <- c("Delta", "Weight") rownames(mstab) <- all.model.names avgcoef[is.nan(avgcoef)] <- NA_real_ ret <- list( summary = as.data.frame(mstab), term.codes = attr(all.model.names, "variables"), avg.model = avgcoef, coef.shrinkage = coef.shrink, coefArray = cfarr, importance = importance(object), beta = attr(object, "beta"), term.names = coefNames, x = NULL, residuals = NULL, formula = if(!is.null(attr(object, "global"))) formula(attr(object, "global")) else NULL, call = match.call() ) attr(ret, "beta") <- attr(object, "beta") attr(ret, "nobs") <- attr(object, "nobs") attr(ret, "revised.var") <- revised.var class(ret) <- "averaging" return(ret) } `model.avg.default` <- function(object, ..., beta = FALSE, rank = NULL, rank.args = NULL, revised.var = TRUE, dispersion = NULL, ct.args = NULL) { if (inherits(object, "list")) { if(length(object) == 0L) stop("'object' is an empty list") models <- object object <- object[[1L]] if (!is.null(rank) || is.null(rank <- attr(models, "rank"))) { rank <- .getRank(rank, rank.args = rank.args, object = object) } } else { models <- list(object, ...) rank <- .getRank(rank, rank.args = rank.args, object = object) } nModels <- length(models) if(nModels == 1L) stop("only one model supplied. Nothing to do") .checkModels(models) alpha <- 0.05 .fnull <- function(...) return(NULL) ICname <- deparse(attr(rank, "call")[[1L]]) allterms1 <- lapply(models, getAllTerms) all.terms <- unique(unlist(allterms1, use.names = FALSE)) # sort by level (main effects first) all.terms <- all.terms[order(vapply(gregexpr(":", all.terms), function(x) if(x[1L] == -1L) 0L else length(x), numeric(1L)), all.terms)] # all.model.names <- modelNames(models, asNumeric = FALSE, # withRandomTerms = FALSE, withFamily = FALSE) all.model.names <- .modelNames(allTerms = allterms1, uqTerms = all.terms) #if(is.null(names(models))) names(models) <- all.model.names coefTableCall <- as.call(c(alist(coefTable, models[[i]], dispersion = dispersion[i]), ct.args)) # check if models are unique: if(!is.null(dispersion)) dispersion <- rep(dispersion, length.out = nModels) coefTables <- vector(nModels, mode = "list") for(i in seq_len(nModels)) coefTables[[i]] <- eval(coefTableCall) #coefTable(models[[i]], dispersion = dispersion[i]) mcoeffs <- lapply(coefTables, "[", , 1L) dup <- unique(sapply(mcoeffs, function(i) which(sapply(mcoeffs, identical, i)))) dup <- dup[sapply(dup, length) > 1L] if (length(dup) > 0L) stop("models are not unique. Duplicates: ", prettyEnumStr(sapply(dup, paste, sep = "", collapse = " = "), quote = "'")) LL <- .getLik(object) logLik <- LL$logLik lLName <- LL$name ic <- vapply(models, rank, numeric(1L)) logLiks <- lapply(models, logLik) delta <- ic - min(ic) weight <- exp(-delta / 2) / sum(exp(-delta / 2)) model.order <- order(weight, decreasing = TRUE) # ----!!! From now on, everything MUST BE ORDERED by 'weight' !!!----------- mstab <- cbind(df = vapply(logLiks, attr, numeric(1L), "df"), logLik = as.numeric(logLiks), IC = ic, Delta = delta, Weight = weight, deparse.level = 0L) if(!is.null(dispersion)) mstab <- cbind(mstab, Dispersion = dispersion) rownames(mstab) <- all.model.names mstab <- mstab[model.order, ] weight <- mstab[, "Weight"] # has been sorted in table models <- models[model.order] coefTables <- coefTables[model.order] if (beta) { response.sd <- sd(model.response(model.frame(object))) for(i in seq_along(coefTables)) coefTables[[i]][, 1L:2L] <- coefTables[[i]][, 1L:2L] * apply(model.matrix(models[[i]]), 2L, sd) / response.sd } cfarr <- coefArray(coefTables) coefNames <- c(dimnames(cfarr)[[3L]]) avgcoef <- .coefarr.avg(cfarr, weight, revised.var, FALSE, alpha = 0.05) missing.par <- is.na(cfarr[, 1L, ]) coef.shrink <- avgcoef[, 1L] * colSums(array(weight * as.double(!missing.par), dim = dim(cfarr)[c(1L, 3L)])) names(all.terms) <- seq_along(all.terms) colnames(mstab)[3L] <- ICname # Benchmark: 3.7x faster #system.time(for(i in 1:10000) t(array(unlist(p), dim=c(length(all.terms),length(models))))) #system.time(for(i in 1:10000) do.call("rbind", p)) vpresent <- do.call("rbind", lapply(models, function(x) all.terms %in% getAllTerms(x))) importance <- apply(weight * vpresent, 2L, sum) names(importance) <- all.terms o <- order(importance, decreasing = TRUE) importance <- importance[o] attr(importance, "n.models") <- structure(colSums(vpresent)[o], names = all.terms) class(importance) <- c("importance", "numeric") #global.mm <- model.matrix(fit) #cnmmxx2 <- unique(unlist(lapply(gm, function(x) names(coef(x))))) #mmx <- gmm[, cnmmxx[match(colnames(gmm), cnmmxx, nomatch = 0)]] mmxs <- tryCatch(cbindDataFrameList(lapply(models, model.matrix)), error = .fnull, warning = .fnull) # Far less efficient: #mmxs <- lapply(models, model.matrix) #mx <- mmxs[[1]]; #for (i in mmxs[-1]) # mx <- cbind(mx, i[,!(colnames(i) %in% colnames(mx)), drop=FALSE]) # residuals averaged (with brute force) #rsd <- tryCatch(apply(vapply(models, residuals, residuals(object)), 1L, #weighted.mean, w = weight), error = .fnull) #rsd <- NULL ## XXX: how to calc residuals ? modelClasses <- lapply(models, class) frm <- if(all(vapply(modelClasses[-1L], identical, logical(1L), modelClasses[[1L]]))) { trm <- tryCatch(terms(models[[1L]]), error = function(e) terms(formula(models[[1L]]))) response <- attr(trm, "response") m1 <- models[[1L]] makeArgs(m1, all.terms, opt = list( response = if(response > 0L) attr(trm, "variables")[[response + 1L]] else NULL, gmFormulaEnv = environment(formula(m1)), intercept = ! identical(unique(unlist(lapply(allterms1, attr, "intercept"))), 0), interceptLabel = unique(unlist(lapply(allterms1, attr, "interceptLabel"))), # random = attr(allTerms0, "random"), gmCall = getCall(m1), gmEnv = parent.frame(), allTerms = all.terms, random = . ~ . ))[[1L]] } else NA ret <- list( summary = as.data.frame(mstab), term.codes = attr(all.model.names, "variables"), avg.model = avgcoef, coef.shrinkage = coef.shrink, coefArray = cfarr, importance = importance, beta = beta, term.names = coefNames, x = mmxs, residuals = NULL, # no residuals formula = frm, call = match.call() ) attr(ret, "modelList") <- models attr(ret, "beta") <- beta attr(ret, "nobs") <- nobs(object) attr(ret, "revised.var") <- revised.var class(ret) <- "averaging" return(ret) } `coef.averaging` <- function(object, full = FALSE, ...) if(full) object$coef.shrinkage else object$avg.model[, 1L] #TODO: predict p -="response" + average on response scale `predict.averaging` <- function(object, newdata = NULL, se.fit = FALSE, interval = NULL, type = NA, backtransform = FALSE, full = TRUE, ...) { if (!missing(interval)) .NotYetUsed("interval", error = FALSE) if(backtransform && !is.na(type) && type == "response") warning("back-transforming predictions already on response scale") models <- attr(object, "modelList") if(is.null(models)) stop("can only predict from 'averaging' object created with a model list") # Benchmark: vapply is ~4x faster #system.time(for(i in 1:1000) sapply(models, inherits, what="gam")) / #system.time(for(i in 1:1000) vapply(models, inherits, logical(1L), what="gam")) # If all models inherit from lm: if ((missing(se.fit) || !se.fit) && (is.na(type) || type == "link") && all(linherits(models, c(gam = FALSE, lm = TRUE))) && !any(is.na(object$coef.shrinkage)) ) { coeff <- coef(object, full = full) frm <- formula(object) tt <- delete.response(terms(frm)) X <- model.matrix(object) if (missing(newdata) || is.null(newdata)) { Xnew <- X } else { xlev <- unlist(unname(lapply(models, "[[", "xlevels")), recursive = FALSE, use.names = TRUE) Xnew <- model.matrix(tt, data = newdata, xlev = xlev) } Xnew <- Xnew[, match(names(coeff), colnames(Xnew), nomatch = 0L)] ret <- (Xnew %*% coeff)[, 1L] Xnew #if (se.fit) { # scale <- 1 # covmx <- solve(t(X) %*% X) # se <- sqrt(diag(Xnew %*% covmx %*% t(Xnew))) * sqrt(scale) ## TODO: use matmult # return(list(fit = y, se.fit = se)) #} } else { # otherwise, use brute force: if(full == FALSE) warning("argument 'full' ignored") cl <- as.list(match.call()) cl$backtransform <- cl$full <- NULL cl[[1L]] <- as.name("predict") #if("type" %in% names(cl)) cl$type <- "link" if(!missing(newdata)) cl$newdata <- as.name("newdata") cl <- as.call(cl) #predict.lme <- MuMIn:::predict.lme pred <- lapply(models, function(x) { cl[[2L]] <- x y <- tryCatch({ y <- eval(cl) if(is.numeric(y)) y else structure(as.list(y[c(1L, 2L)]), names = c("fit", "se.fit")) }, error = function(e) e) }) err <- sapply(pred, inherits, "condition") if (any(err)) { lapply(pred[err], warning) stop(sprintf(ngettext(sum(err), "'predict' for model %s caused error", "'predict' for models %s caused errors"), prettyEnumStr(names(models[err]), quote = "'") )) } .untransform <- function(fit, se.fit = NULL, models) { fam <- tryCatch(vapply(models, function(z) unlist(family(z)[c("family", "link")]), character(2L)), error = function(e) NULL) if (!is.null(fam)) { if(any(fam[, 1L] != fam[, -1L])) stop("cannot calculate prediction on a response scale ", "with models using different families or link functions") fam1 <- family(models[[1L]]) if(is.null(se.fit)) return(fam1$linkinv(fit)) else return(list(fit = fam1$linkinv(fit), se.fit = se.fit * abs(fam1$mu.eta(fit)) )) } return(NULL) } if (all(sapply(pred, is.list))) { #if(all(sapply(pred, function(x) c("fit", "se.fit") %in% names(x)))) { fit <- do.call("cbind", lapply(pred, "[[", "fit")) se.fit <- do.call("cbind", lapply(pred, "[[", "se.fit")) revised.var <- attr(object, "revised.var") apred <- unname(vapply(seq(nrow(fit)), function(i) par.avg(fit[i, ], se.fit[i, ], weight = object$summary$Weight, df = NA_integer_, revised.var = revised.var), FUN.VALUE = double(5L))) # TODO: ase! #no.ase <- all(is.na(object$avg.model[,3])) # if(no.ase) 2 else 3 ret <- if (backtransform) .untransform(apred[1L, ], apred[2L, ], models = models) else list(fit = apred[1L, ], se.fit = apred[2L, ]) } else { #tryCatch({ i <- !vapply(pred, is.numeric, FALSE) if(any(i)) pred[i] <- lapply(pred[i], "[[", 1L) ret <- apply(do.call("cbind", pred), 1L, weighted.mean, w = object$summary$Weight) if (backtransform) ret <- .untransform(ret, models = models) } } return(ret) } `fitted.averaging` <- function (object, ...) predict.averaging(object) `model.matrix.averaging` <- function (object, ...) object$x `summary.averaging` <- function (object, ...) { cf <- object$avg.model .makecoefmat <- function(cf) { no.ase <- all(is.na(cf[, 3L])) z <- abs(cf[, 1L] / cf[, if(no.ase) 2L else 3L]) pval <- 2 * pnorm(z, lower.tail = FALSE) cbind(cf[, if(no.ase) 1L:2L else 1L:3L], `z value` = z, `Pr(>|z|)` = zapsmall(pval)) } object$coefmat <- .makecoefmat(object$avg.model) object$coefmat.full <- .makecoefmat(.coefarr.avg(object$coefArray, object$summary$Weight, attr(object, "revised.var"), TRUE, 0.05)) object$coef.nmod <- colSums(!is.na(object$coefArray[, 1L,])) structure(object, class = c("summary.averaging", "averaging")) } `confint.averaging` <- function (object, parm, level = 0.95, full = FALSE, ...) { a2 <- 1 - level a <- a2 / 2 cf <- object$coefArray[, 1L, ] pnames <- colnames(cf) if (missing(parm)) parm <- pnames else if (is.numeric(parm)) parm <- pnames[parm] missing.par <- is.na(cf) se <- object$coefArray[, 2L, ] dfs <- object$coefArray[, 3L, ] if(full) { se[missing.par] <- cf[missing.par] <- 0 if(!all(is.na(dfs))) dfs[missing.par] <- Inf } wts <- object$summary$Weight ci <- t(sapply(parm, function(i) par.avg(cf[,i], se[,i], wts, dfs[, i], alpha = a2)))[, 4L:5L] pct <- .xget("stats", "format.perc")(c(a, 1L - a), 3L) ci[is.na(object$coef.shrinkage), ] <- NA_real_ colnames(ci) <- pct return(ci) } `print.summary.averaging` <- function (x, digits = max(3L, getOption("digits") - 3L), signif.stars = getOption("show.signif.stars"), ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") cat("Component models: \n") print(round(as.matrix(x$summary), 2L), na.print = "") cat("\nTerm codes: \n") print.default(x$term.codes, quote = FALSE) cat("\nModel-averaged coefficients: \n") if (nnotdef <- sum(is.na(x$coefmat[, 1L]))) cat("(", nnotdef, " not defined because of singularities in all ", "component models) \n", sep = "") hasPval <- TRUE printCoefmat(x$coefmat, P.values = hasPval, has.Pvalue = hasPval, digits = digits, signif.stars = signif.stars, signif.legend = FALSE) cat("\nFull model-averaged coefficients (with shrinkage):", "\n") printCoefmat(x$coefmat.full, P.values = hasPval, has.Pvalue = hasPval, digits = digits, signif.stars = signif.stars) #if (no.ase) cat("Confidence intervals are unadjusted \n") #printCoefmat(matrix(x$coef.shrinkage, nrow = 1L, #dimnames = list("", x$term.names)), P.values = FALSE, #has.Pvalue = FALSE, cs.ind = seq_along(x$term.names), tst.ind = NULL) cat("\nRelative variable importance: \n") print(round(x$importance, 2L)) } `print.averaging` <- function(x, ...) { cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") cat("Component models:", "\n") comp.names <- rownames(x$summary) comp.names[comp.names == ""] <- "null" cat(format(sQuote(comp.names), justify = "l"), fill = TRUE) cat("\nCoefficients:", "\n") print(rbind(subset = x$avg.model[, 1L], full = x$coef.shrinkage)) x } `vcov.averaging` <- function (object, full = FALSE, ...) { models <- attr(object, "modelList") if(is.null(models)) stop("can calculate covariance matrix from ", "'averaging' object created with a model list") vcovs <- lapply(lapply(models, vcov), as.matrix) names.all <- dimnames(object$coefArray)[[3L]] nvars <- length(names.all) nvarseq <- seq(nvars) wts <- object$summary$Weight wts <- wts / sum(wts) # normalize just in case vcov0 <- matrix(if(full) 0 else NA_real_, nrow = nvars, ncol = nvars, dimnames = list(names.all, names.all)) vcovs2 <- lapply(vcovs, function(v) { i <- match(fixCoefNames(dimnames(v)[[1L]]), names.all) vcov0[i, i] <- v return(vcov0) }) b1 <- object$coefArray[, 1L, ] if(full) { b1[is.na(b1)] <- 0 avgb <- object$coef.shrinkage } else { avgb <- object$avg.model[, 1L] } res <- sapply(nvarseq, function(c1) sapply(nvarseq, function(c2) { weighted.mean(sapply(vcovs2, "[", c1, c2) + (b1[, c1] - avgb[c1]) * (b1[, c2] - avgb[c2]), wts, na.rm = TRUE) })) dimnames(res) <- list(names.all, names.all) return(res) } `logLik.averaging` <- function (object, ...) { models <- attr(object, "modelList") if(is.null(models)) { nobs <- attr(object, "nobs") apply(object$summary, 1L, function(x) structure(list(x[2L]), df = x[1L], nobs = nobs, class = "logLik")) } else { structure(lapply(attr(object, "modelList"), .getLogLik()), names = rownames(object$summary)) } } `coefTable.averaging` <- function (model, full = FALSE, ...) { if(full) { summary(model)$coefmat.full[, 1L:2L] } else model$avg.model[, 1L:2L] }