swh:1:snp:cf20732a2a91717cec7759cd75c5d1a72600b018
Tip revision: 85d0a0463621c30063752c5577e591be0de5ae60 authored by Dominique Makowski on 26 July 2021, 08:40:08 UTC
version 0.10.5
version 0.10.5
Tip revision: 85d0a04
check_prior.R
#' Check if Prior is Informative
#'
#' Performs a simple test to check whether the prior is informative to the posterior. This idea, and the accompanying heuristics, were discussed in \href{https://statmodeling.stat.columbia.edu/2019/08/10/}{this blogpost}.
#'
#' @param method Can be \code{"gelman"} or \code{"lakeland"}. For the
#' \code{"gelman"} method, if the SD of the posterior is more than 0.1 times
#' the SD of the prior, then the prior is considered as informative. For the
#' \code{"lakeland"} method, the prior is considered as informative if the
#' posterior falls within the 95\% HDI of the prior.
#' @param simulate_priors Should prior distributions be simulated using
#' \code{\link{simulate_prior}} (default; faster) or sampled via
#' \code{\link{unupdate}} (slower, more accurate).
#' @inheritParams effective_sample
#' @inheritParams hdi
#'
#' @return A data frame with two columns: The parameter names and the quality
#' of the prior (which might be \code{"informative"}, \code{"uninformative"})
#' or \code{"not determinable"} if the prior distribution could not be
#' determined).
#'
#' @examples
#' \dontrun{
#' library(bayestestR)
#' if (require("rstanarm")) {
#' model <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0)
#' check_prior(model, method = "gelman")
#' check_prior(model, method = "lakeland")
#'
#' # An extreme example where both methods diverge:
#' model <- stan_glm(mpg ~ wt,
#' data = mtcars[1:3, ],
#' prior = normal(-3.3, 1, FALSE),
#' prior_intercept = normal(0, 1000, FALSE),
#' refresh = 0
#' )
#' check_prior(model, method = "gelman")
#' check_prior(model, method = "lakeland")
#' plot(si(model)) # can provide visual confirmation to the Lakeland method
#' }
#' }
#' @references https://statmodeling.stat.columbia.edu/2019/08/10/
#' @export
check_prior <- function(model, method = "gelman", simulate_priors = TRUE, ...) {
UseMethod("check_prior")
}
#' @export
check_prior.brmsfit <- function(model,
method = "gelman",
simulate_priors = TRUE,
effects = c("fixed", "random", "all"),
component = c("conditional", "zi", "zero_inflated", "all"),
parameters = NULL,
verbose = TRUE,
...) {
# check arguments
effects <- match.arg(effects)
component <- match.arg(component)
posteriors <- insight::get_parameters(
model,
effects = effects,
component = component,
parameters = parameters
)
if (isTRUE(simulate_priors)) {
priors <- simulate_prior(
model,
effects = effects,
component = component,
parameters = parameters,
verbose = verbose
)
} else {
priors <- unupdate(model, verbose = FALSE)
priors <- insight::get_parameters(
priors,
effects = effects,
component = component,
parameters = parameters
)
}
.check_prior(priors, posteriors, method,
verbose = verbose,
cleaned_parameters = insight::clean_parameters(model)
)
}
#' @export
check_prior.stanreg <- check_prior.brmsfit
#' @export
check_prior.blavaan <- check_prior.brmsfit
#' @keywords internal
.check_prior <- function(priors, posteriors, method = "gelman", verbose = TRUE, cleaned_parameters = NULL) {
# sanity check for matching parameters. Some weird priors like
# rstanarm's R2 prior might cause problems
if (!is.null(cleaned_parameters) && ncol(priors) != ncol(posteriors)) {
## TODO for now only fixed effects
if ("Effects" %in% colnames(cleaned_parameters)) {
cleaned_parameters <- cleaned_parameters[cleaned_parameters$Effects == "fixed", ]
}
# rename cleaned parameters, so they match name of prior parameter column
cp <- cleaned_parameters$Cleaned_Parameter
cp <- gsub("(.*)(\\.|\\[)\\d+(\\.|\\])", "\\1", cp)
cp[cp == "Intercept"] <- "(Intercept)"
cleaned_parameters$Cleaned_Parameter <- cp
colnames(priors)[colnames(priors) == "Intercept"] <- "(Intercept)"
# at this point, the colnames of "posteriors" should match "cp$Parameter",
# while colnames of "priors" should match "cp$Cleaned_Parameter". To ensure
# that ncol of priors is the same as ncol of posteriors, we now duplicate
# prior columns and match them with the posteriors
if (ncol(posteriors) > ncol(priors)) {
matched_columns <- stats::na.omit(match(cleaned_parameters$Cleaned_Parameter, colnames(priors)))
matched_column_names <- stats::na.omit(match(colnames(priors), cleaned_parameters$Cleaned_Parameter))
priors <- priors[matched_columns]
} else {
matched_columns <- stats::na.omit(match(colnames(priors), cleaned_parameters$Cleaned_Parameter))
matched_column_names <- stats::na.omit(match(cleaned_parameters$Cleaned_Parameter, colnames(priors)))
priors <- priors[matched_columns]
}
colnames(priors) <- cleaned_parameters$Parameter[matched_column_names]
}
# still different ncols?
if (ncol(priors) != ncol(posteriors)) {
common_columns <- intersect(colnames(priors), colnames(posteriors))
priors <- priors[common_columns]
posteriors <- posteriors[common_columns]
if (verbose) {
warning("Parameters and priors could not be fully matched. Only returning results for parameters with matching priors.", call. = FALSE)
}
}
# for priors whose distribution cannot be simulated, prior values are
# all NA. Catch those, and warn user
all_missing <- sapply(priors, function(i) {
all(is.na(i))
})
if (any(all_missing) && verbose) {
warning("Some priors could not be simulated.", call. = FALSE)
}
.gelman <- function(prior, posterior) {
if (all(is.na(prior))) {
"not determinable"
} else if (stats::sd(posterior, na.rm = TRUE) > 0.1 * stats::sd(prior, na.rm = TRUE)) {
"informative"
} else {
"uninformative"
}
}
.lakeland <- function(prior, posterior) {
if (all(is.na(prior))) {
"not determinable"
} else {
hdi <- hdi(prior, ci = .95)
r <- rope(posterior, ci = 1, range = c(hdi$CI_low, hdi$CI_high))
if (as.numeric(r) > 0.99) {
"informative"
} else {
"misinformative"
}
}
}
if (method == "gelman") {
result <- mapply(.gelman, priors, posteriors)
} else if (method == "lakeland") {
result <- mapply(.lakeland, priors, posteriors)
} else {
stop("method should be 'gelman' or 'lakeland'.")
}
data.frame(
Parameter = names(posteriors),
Prior_Quality = unname(result),
stringsAsFactors = FALSE
)
}