sns.methods.R
######################################
# #
# Methods for sns objects #
# #
######################################
# convenience function for partitioning the state space
sns.make.part <- function(K, nsubset, method = "naive") {
if (method != "naive") stop("invalid method") #TODO: consider implementing more sophisticated partitioning methods
if (nsubset > K) stop("number of partitions cannot exceed state space dimensionality")
# deterimining number of coordinates per subset (nvec)
nvec <- rep(0, nsubset)
nleft <- K
c <- 1
while (nleft > 0) {
nvec[c] <- nvec[c] + 1
nleft <- nleft - 1
c <- c %% nsubset + 1
}
# assigning coordinates to subsets
ret <- list()
c <- 0
for (n in 1:nsubset) {
ret[[n]] <- as.integer(c + 1:nvec[n])
c <- c + nvec[n]
}
if (sns.check.part(ret, K)) return (ret)
else stop("unexpectedly invalid state space partitioning")
}
# function for checking that state space partitioning is valid
# (mutually-exclusive and collectively-exhaustive)
sns.check.part <- function(part, K) {
return (identical(as.integer(sort(unlist(part))), 1:K))
}
# validating twice-differentiability and concavity of log-density
sns.check.logdensity <- function(x, fghEval
#, numderiv = 0
, numderiv.method = c("Richardson", "complex"), numderiv.args = list()
, blocks = append(list(1:length(x)), as.list(1:length(x)))
, dx = rep(1.0, length(x))
, nevals = 100, negdef.tol = 1e-8
, ...) {
dx <- rep(dx, length(x)) # in case scalar dx is upplied by user
fgh.ini <- fghEval(x, ...)
ret <- list(check.ld.struct = NA, numderiv = NA, check.length.g = NA, check.dim.h = NA
, x.mat = NA, fgh.ini = NA, fgh.num.ini = NA
#, fgh.list = NA, fgh.num.list = NA
, f.vec = NA, g.mat = NA, g.mat.num = NA, h.array = NA, h.array.num = NA
, t.evals = NA, t.num.eval = NA
, is.g.num.finite = NA, is.h.num.finite = NA
, is.g.finite = NA, is.h.finite = NA
, g.diff.max = NA, h.diff.max = NA
, is.negdef.num = NA, is.negdef = NA
)
attr(ret, "class") <- "sns.check.logdensity"
# determine if analytical derivatives are provided
# numderiv = 0: g,h analytical
# numderiv = 1: g analytical
# numderiv = 2: no analytical
is.fgh.list <- is.list(fgh.ini)
if (is.fgh.list) {
fgh.list.names <- names(fgh.ini)
have.fgh <- c("f", "g", "h") %in% fgh.list.names
if (all(have.fgh)) { # no numerical differentiation needed
ret$check.ld.struct <- TRUE
ret$numderiv <- 0
} else if (all(have.fgh[1:2])) { # numerical gradient needed
ret$check.ld.struct <- TRUE
ret$numderiv <- 1
} else {
ret$check.ld.struct <- FALSE
return (ret)
}
} else { # need numerical gradient and Hessian
ret$check.ld.struct <- TRUE
ret$numderiv <- 2
}
# fghEval with numerical differentiation
numderiv.method <- match.arg(numderiv.method)
if (ret$numderiv == 2) {
fghEval.num <- function(x, ...) {
f <- fghEval(x, ...)
g <- grad(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args)
h <- hessian(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args)
return (list(f = f, g = g, h = h))
}
} else {
fghEval.num <- function(x, ...) {
f <- fghEval(x, ...)$f
g <- grad(func = function(x, ...) fghEval(x, ...)$f, x = x, ..., method = numderiv.method, method.args = numderiv.args)
h <- hessian(func = function(x, ...) fghEval(x, ...)$f, x = x, ..., method = numderiv.method, method.args = numderiv.args)
return (list(f = f, g = g, h = h))
}
}
fgh.num.ini <- fghEval.num(x, ...)
#return (fgh.num.ini)
# check dimensional consistency of gradient and Hessian
K <- length(x)
if (ret$numderiv == 0) {
ret$check.length.g <- length(fgh.ini$g) == K
ret$check.dim.h <- nrow(fgh.ini$h) == K && ncol(fgh.ini$h) == K
} else if (ret$numderiv == 1) {
ret$check.length.g <- length(fgh.ini$g) == K
ret$check.dim.h <- nrow(fgh.num.ini$h) == K && ncol(fgh.num.ini$h) == K
} else if (ret$numderiv == 2) {
ret$check.length.g <- length(fgh.num.ini$g) == K
ret$check.dim.h <- nrow(fgh.num.ini$h) == K && ncol(fgh.num.ini$h) == K
}
ret$x.mat <- sapply(1:K, function(k) runif(nevals, min = x[k] - 0.5 * dx[k], max = x[k] + 0.5 * dx[k]))
ret$x.mat[1, ] <- x
t <- proc.time()[3]
fgh.list <- lapply(1:nevals, function(n) fghEval(ret$x.mat[n, ], ...))
ret$t.evals <- proc.time()[3] - t
t <- proc.time()[3]
fgh.num.list <- lapply(1:nevals, function(n) fghEval.num(ret$x.mat[n, ], ...))
ret$t.num.evals <- proc.time()[3] - t
# twice-differentiability
f.vec <- sapply(fgh.num.list, function(u) u$f)
index.f.finite <- which(is.finite(f.vec))
n.f.finite <- length(index.f.finite)
ret$f.vec <- f.vec
g.mat.num <- t(sapply(fgh.num.list, function(u) u$g))
ret$g.mat.num <- g.mat.num
ret$is.g.num.finite <- all(is.finite(g.mat.num[index.f.finite, ]))
h.array.num <- array(t(sapply(fgh.num.list, function(u) u$h)), dim = c(nevals, K, K))
ret$h.array.num <- h.array.num
ret$is.h.num.finite <- all(is.finite(h.array.num[index.f.finite, , ]))
# closeness of analytical and numerical results (we need better metrics of difference)
if (ret$numderiv < 2) {
g.mat <- t(sapply(fgh.list, function(u) u$g))
ret$g.mat <- g.mat
ret$is.g.finite <- all(is.finite(g.mat[index.f.finite, ]))
ret$g.diff.max <- max(sapply(1:n.f.finite, function(i)
sqrt(sum((g.mat[index.f.finite[i], ] - g.mat.num[index.f.finite[i], ])^2))/sqrt(sum(g.mat[index.f.finite[i], ]^2))))
}
if (ret$numderiv == 0) {
h.array <- array(t(sapply(fgh.list, function(u) u$h)), dim = c(nevals, K, K))
ret$h.array <- h.array
ret$is.h.finite <- all(is.finite(h.array[index.f.finite, , ]))
ret$h.diff.max <- max(sapply(1:n.f.finite, function(i)
norm(h.array[index.f.finite[i], , ] - h.array.num[index.f.finite[i], , ], type = "F")/norm(h.array[index.f.finite[i], , ], type = "F")))
}
# concavity (negative definiteness)
sns.is.positive.definite <- function(A, tol = 1e-8) all(eigen(A)$values > tol)
ret$is.negdef.num <- sapply(1:length(blocks), function(i) all(sapply(1:n.f.finite, function(u) {
h.tmp <- h.array.num[index.f.finite[u], , ]
sns.is.positive.definite(-h.tmp[blocks[[i]], blocks[[i]], drop = F], tol = negdef.tol)
})))
if (ret$numderiv == 0) {
ret$is.negdef <- sapply(1:length(blocks), function(i) all(sapply(1:n.f.finite, function(u) {
h.tmp <- h.array[index.f.finite[u], , ]
sns.is.positive.definite(-h.tmp[blocks[[i]], blocks[[i]], drop = F], tol = negdef.tol)
})))
}
return (ret)
}
print.sns.check.logdensity <- function(x, ...) {
sns.TF.to.YesNo <- function(x) ifelse(x, "Yes", "No")
if (!x$check.ld.struct) {
cat("log-density output list has invalid names\n")
return (invisible(NULL))
}
numderiv <- x$numderiv
#cat("log-density is valid\n")
cat("number of finite function evals:", length(which(is.finite(x$f.vec))), "(out of ", length(x$f.vec), ")\n")
cat("recommended numderiv value:", numderiv, "\n")
cat("is length of gradient vector correct?", sns.TF.to.YesNo(x$check.length.g), "\n")
cat("are dims of Hessian matrix correct?", sns.TF.to.YesNo(x$check.dim.h), "\n")
cat("is numerical gradient finite?", sns.TF.to.YesNo(x$is.g.num.finite), "\n")
cat("is numerical Hessian finite?", sns.TF.to.YesNo(x$is.h.num.finite), "\n")
if (numderiv < 2) {
cat("is analytical gradient finite?", sns.TF.to.YesNo(x$is.g.finite), "\n")
cat("maximum relative diff in gradient:", x$g.diff.max, "\n")
}
if (numderiv == 0) {
cat("is analytical Hessian finite?", sns.TF.to.YesNo(x$is.h.finite), "\n")
cat("maximum relative diff in Hessian:", x$h.diff.max, "\n")
}
cat("is numeric Hessian (block) negative-definite?", sns.TF.to.YesNo(x$is.negdef.num), "\n")
if (numderiv == 0) {
cat("is analytical Hessian (block) negative-definite?", sns.TF.to.YesNo(x$is.negdef), "\n")
}
return (invisible(NULL))
}
# predict methods
predict.sns <- function(object, fpred
, nburnin = max(nrow(object)/2, attr(object, "nnr"))
, end = nrow(object), thin = 1, ...) {
niter <- nrow(object)
nnr <- attr(object, "nnr")
nmcmc <- niter - nnr
if (nburnin < nnr) warning("it is strongly suggested that burnin period includes NR iterations (which are not valid MCMC iterations)")
myseq <- seq(from = nburnin + 1, to = end, by = thin)
pred <- apply(object[myseq, ], 1, fpred, ...)
class(pred) <- "predict.sns"
return (pred)
}
summary.predict.sns <- function(object, quantiles = c(0.025, 0.5, 0.975)
, ess.method = c("coda", "ise"), ...) {
smp.mean <- rowMeans(object)
smp.sd <- apply(object, 1, sd)
smp.ess <- ess(t(object), method = ess.method[1])
smp.quantiles <- t(apply(object, 1, quantile, probs = quantiles))
ret <- list(mean = smp.mean, sd = smp.sd, ess = smp.ess, quantiles = smp.quantiles, nseq = ncol(object))
class(ret) <- "summary.predict.sns"
return (ret)
}
print.summary.predict.sns <- function(x, ...) {
cat("prediction sample statistics:\n")
cat("\t(nominal sample size: ", x$nseq, ")\n", sep="")
stats <- cbind(x$mean, x$sd, x$ess, x$quantiles)
colnames(stats)[1:3] <- c("mean", "sd", "ess")
rownames(stats) <- c(1:length(x$mean))
printCoefmat(stats[1:min(length(x$mean), 6), ])
if (length(x$mean) > 6) cat("...\n")
}
# print method
print.sns <- function(x, ...) {
cat("Stochastic Newton Sampler (SNS)\n")
cat("state space dimensionality: ", ncol(x), "\n")
if (!is.null(attr(x, "part"))) cat("state space partitioning: ", attr(x, "part"), " subsets\n")
cat("total iterations: ", nrow(x), "\n")
cat("\t(initial) NR iterations:", attr(x, "nnr"), "\n")
cat("\t(final) MCMC iterations:", nrow(x) - attr(x, "nnr"), "\n")
}
# summary methods
# primary output:
# 1) acceptance rate
# 2) mean relative deviation (if available)
# 3) sample statistics (mean, sd, quantiles, ess, pval) (if available)
summary.sns <- function(object, quantiles = c(0.025, 0.5, 0.975)
, pval.ref = 0.0, nburnin = max(nrow(object)/2, attr(object, "nnr"))
, end = nrow(object), thin = 1, ess.method = c("coda", "ise"), ...) {
K <- ncol(object)
nnr <- attr(object, "nnr")
if (nburnin < nnr) warning("it is strongly suggested that burnin period includes NR iterations (which are not valid MCMC iterations)")
# number of subsets in state space partitioning
npart <- max(1, length(attr(object, "part")))
# average relative deviation of function value from quadratic approximation (post-burnin)
if (!is.null(attr(object, "reldev"))) reldev.mean <- mean(attr(object, "reldev"), na.rm = TRUE)
else reldev.mean <- NA
nsmp <- end - nburnin
if (nsmp > 0) {
# average acceptance rate for MH transition proposals
accept.rate <- sum(attr(object, "accept")[nburnin + 1:nsmp, ]) / length(attr(object, "accept")[nburnin + 1:nsmp, ])
myseq <- seq(from = nburnin + 1, to = end, by = thin)
nseq <- length(myseq)
smp.mean <- colMeans(object[myseq, ])
smp.sd <- apply(object[myseq, ], 2, sd)
smp.ess <- ess(object[myseq, ], method = ess.method[1])
smp.quantiles <- t(apply(object[myseq, ], 2, quantile, probs = quantiles))
smp.pval <- apply(object[myseq, ], 2, sns.calc.pval, ref = pval.ref, na.rm = FALSE)
} else {
accept.rate <- NA
nseq <- 0
smp.mean <- NA
smp.sd <- NA
smp.ess <- NA
smp.quantiles <- NA
smp.pval <- NA
}
ret <- list(K = K, nnr = nnr, nburnin = nburnin, end = end, thin = thin
, niter = nrow(object), nsmp = nsmp, nseq = nseq, npart = npart
, accept.rate = accept.rate, reldev.mean = reldev.mean
, pval.ref = pval.ref, ess.method = ess.method
, smp = list(mean = smp.mean, sd = smp.sd, ess = smp.ess, quantiles = smp.quantiles, pval = smp.pval))
class(ret) <- "summary.sns"
return (ret)
}
print.summary.sns <- function(x, ...) {
cat("Stochastic Newton Sampler (SNS)\n")
cat("state space dimensionality: ", x$K, "\n")
if (x$npart > 1) cat("state space partitioning: ", x$npart, " subsets\n")
cat("total iterations: ", x$niter, "\n")
cat("\tNR iterations: ", x$nnr, "\n")
cat("\tburn-in iterations: ", x$nburnin, "\n")
cat("\tend iteration: ", x$end, "\n")
cat("\tthinning interval: ", x$thin, "\n")
cat("\tsampling iterations (before thinning): ", x$nsmp, "\n")
#cat("\tsampling iterations (after thinning): ", x$nseq, "\n")
cat("acceptance rate: ", x$accept.rate, "\n")
if (!is.na(x$reldev.mean)) cat("\tmean relative deviation from quadratic approx:", format(100*x$reldev.mean, digits=3), "% (post-burnin)\n")
if (x$nsmp > 0) {
cat("sample statistics:\n")
cat("\t(nominal sample size: ", x$nseq, ")\n", sep="")
stats <- cbind(x$smp$mean, x$smp$sd, x$smp$ess, x$smp$quantiles, x$smp$pval)
colnames(stats)[c(1:3, 4 + ncol(x$smp$quantiles))] <- c("mean", "sd", "ess", "p-val")
rownames(stats) <- c(1:x$K)
printCoefmat(stats[1:min(x$K, 6), ], P.values = TRUE, has.Pvalue = TRUE)
if (x$K > 6) cat("...\n")
cat("summary of ess:\n")
print(summary(x$smp$ess))
}
}
# plot method
plot.sns <- function(x, nburnin = max(nrow(x)/2, attr(x, "nnr"))
, select = if (length(x) <= 10) 1:5 else 1, ...) {
init <- attr(x, "init")
lp.init <- attr(x, "lp.init")
lp <- attr(x, "lp")
# in all cases, vertical line delineates transition from nr to mcmc mode
K <- ncol(x)
niter <- nrow(x)
nnr <- attr(x, "nnr")
if (nburnin < nnr) warning("it is strongly suggested that burnin period includes NR iterations (which are not valid MCMC iterations)")
# log-probability trace plot
if (1 %in% select) {
plot(0:niter, c(lp.init, lp), type = "l"
, xlab = "iter", ylab = "log-probability", main = "Log-Probability Trace Plot")
if (nnr > 0 && nnr < niter) abline(v = nnr + 0.5, lty = 2, col = "red")
}
# state vector trace plots
if (2 %in% select) {
for (k in 1:K) {
plot(0:niter, c(init[k], x[, k]), type = "l"
, xlab = "iter", ylab = paste("x[", k, "]", sep = ""), main = "State Variable Trace Plot")
if (nnr > 0 && nnr < niter) abline(v = nnr + 0.5, lty = 2, col = "red")
}
}
if (nburnin < niter) {
if (3 %in% select) {
# effective sample size (horizontal line is maximum possible effective sample size)
my.ess <- ess(x[(nburnin + 1):niter, ])
plot(1:K, my.ess, xlab = "k", ylab = "effective sample size", ylim = c(0, niter - nburnin), main = "Effective Sample Size by Coordinate")
abline(h = niter - nburnin, lty = 2, col = "red")
}
if (4 %in% select) {
# state vector (univariate) histograms
K <- ncol(x)
for (k in 1:K) {
hist(x[(nburnin + 1):niter, k], xlab = paste("x[", k, "]", sep = ""), main = "State Variable Histogram (post-burnin)")
abline(v = mean(x[(nburnin + 1):niter, k]), lty = 2, col = "red")
}
}
if (5 %in% select) {
# state vector (univariate) autocorrelation plots
K <- ncol(x)
for (k in 1:K) {
acf(x[(nburnin + 1):niter, k], xlab = paste("x[", k, "]", sep = ""), main = "State Variable Autocorrelation Plot (post-burnin)")
}
}
}
}
sns.calc.pval <- function(x, ref=0.0, na.rm = FALSE) { # add flag for one-sided vs. two-sided
if (na.rm) x <- x[!is.na(x)]
bigger <- median(x)>ref
if (sd(x)<.Machine$double.eps) {
ret <- NA
} else {
ret <- max(1/length(x), 2*length(which(if (bigger) x<ref else x>ref))/length(x)) # TODO: justify minimum value
}
attr(ret, "bigger") <- bigger
return (ret)
}
# convenience function for numerical augmentation of a log-density
sns.fghEval.numaug <- function(fghEval, numderiv = 0
, numderiv.method = c("Richardson", "simple"), numderiv.args = list()) {
numderiv <- as.integer(numderiv)
if (numderiv > 0) {
numderiv.method <- match.arg(numderiv.method)
if (numderiv == 1) { # we need numeric hessian
fghEval.int <- function(x, ...) {
fg <- fghEval(x, ...)
h <- hessian(func = function(x, ...) fghEval(x, ...)$f, x = x, ..., method = numderiv.method, method.args = numderiv.args)
return (list(f = fg$f, g = fg$g, h = h))
}
} else { # we need numeric gradient and hessian
fghEval.int <- function(x, ...) {
f <- fghEval(x, ...)
g <- grad(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args)
h <- hessian(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args)
return (list(f = f, g = g, h = h))
}
}
} else {
fghEval.int <- fghEval
}
}