###################################### # # # Methods for sns objects # # # ###################################### # convenience function for partitioning the state space sns.make.part <- function(K, nsubset, method = "naive") { if (method != "naive") stop("invalid method") #TODO: consider implementing more sophisticated partitioning methods if (nsubset > K) stop("number of partitions cannot exceed state space dimensionality") # deterimining number of coordinates per subset (nvec) nvec <- rep(0, nsubset) nleft <- K c <- 1 while (nleft > 0) { nvec[c] <- nvec[c] + 1 nleft <- nleft - 1 c <- c %% nsubset + 1 } # assigning coordinates to subsets ret <- list() c <- 0 for (n in 1:nsubset) { ret[[n]] <- as.integer(c + 1:nvec[n]) c <- c + nvec[n] } if (sns.check.part(ret, K)) return (ret) else stop("unexpectedly invalid state space partitioning") } # function for checking that state space partitioning is valid # (mutually-exclusive and collectively-exhaustive) sns.check.part <- function(part, K) { return (identical(as.integer(sort(unlist(part))), 1:K)) } # validating twice-differentiability and concavity of log-density sns.check.logdensity <- function(x, fghEval #, numderiv = 0 , numderiv.method = c("Richardson", "complex"), numderiv.args = list() , blocks = append(list(1:length(x)), as.list(1:length(x))) , dx = rep(1.0, length(x)) , nevals = 100, negdef.tol = 1e-8 , ...) { dx <- rep(dx, length(x)) # in case scalar dx is upplied by user fgh.ini <- fghEval(x, ...) ret <- list(check.ld.struct = NA, numderiv = NA, check.length.g = NA, check.dim.h = NA , x.mat = NA, fgh.ini = NA, fgh.num.ini = NA #, fgh.list = NA, fgh.num.list = NA , f.vec = NA, g.mat = NA, g.mat.num = NA, h.array = NA, h.array.num = NA , t.evals = NA, t.num.eval = NA , is.g.num.finite = NA, is.h.num.finite = NA , is.g.finite = NA, is.h.finite = NA , g.diff.max = NA, h.diff.max = NA , is.negdef.num = NA, is.negdef = NA ) attr(ret, "class") <- "sns.check.logdensity" # determine if analytical derivatives are provided # numderiv = 0: g,h analytical # numderiv = 1: g analytical # numderiv = 2: no analytical is.fgh.list <- is.list(fgh.ini) if (is.fgh.list) { fgh.list.names <- names(fgh.ini) have.fgh <- c("f", "g", "h") %in% fgh.list.names if (all(have.fgh)) { # no numerical differentiation needed ret$check.ld.struct <- TRUE ret$numderiv <- 0 } else if (all(have.fgh[1:2])) { # numerical gradient needed ret$check.ld.struct <- TRUE ret$numderiv <- 1 } else { ret$check.ld.struct <- FALSE return (ret) } } else { # need numerical gradient and Hessian ret$check.ld.struct <- TRUE ret$numderiv <- 2 } # fghEval with numerical differentiation numderiv.method <- match.arg(numderiv.method) if (ret$numderiv == 2) { fghEval.num <- function(x, ...) { f <- fghEval(x, ...) g <- grad(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args) h <- hessian(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args) return (list(f = f, g = g, h = h)) } } else { fghEval.num <- function(x, ...) { f <- fghEval(x, ...)$f g <- grad(func = function(x, ...) fghEval(x, ...)$f, x = x, ..., method = numderiv.method, method.args = numderiv.args) h <- hessian(func = function(x, ...) fghEval(x, ...)$f, x = x, ..., method = numderiv.method, method.args = numderiv.args) return (list(f = f, g = g, h = h)) } } fgh.num.ini <- fghEval.num(x, ...) #return (fgh.num.ini) # check dimensional consistency of gradient and Hessian K <- length(x) if (ret$numderiv == 0) { ret$check.length.g <- length(fgh.ini$g) == K ret$check.dim.h <- nrow(fgh.ini$h) == K && ncol(fgh.ini$h) == K } else if (ret$numderiv == 1) { ret$check.length.g <- length(fgh.ini$g) == K ret$check.dim.h <- nrow(fgh.num.ini$h) == K && ncol(fgh.num.ini$h) == K } else if (ret$numderiv == 2) { ret$check.length.g <- length(fgh.num.ini$g) == K ret$check.dim.h <- nrow(fgh.num.ini$h) == K && ncol(fgh.num.ini$h) == K } ret$x.mat <- sapply(1:K, function(k) runif(nevals, min = x[k] - 0.5 * dx[k], max = x[k] + 0.5 * dx[k])) ret$x.mat[1, ] <- x t <- proc.time()[3] fgh.list <- lapply(1:nevals, function(n) fghEval(ret$x.mat[n, ], ...)) ret$t.evals <- proc.time()[3] - t t <- proc.time()[3] fgh.num.list <- lapply(1:nevals, function(n) fghEval.num(ret$x.mat[n, ], ...)) ret$t.num.evals <- proc.time()[3] - t # twice-differentiability f.vec <- sapply(fgh.num.list, function(u) u$f) index.f.finite <- which(is.finite(f.vec)) n.f.finite <- length(index.f.finite) ret$f.vec <- f.vec g.mat.num <- t(sapply(fgh.num.list, function(u) u$g)) ret$g.mat.num <- g.mat.num ret$is.g.num.finite <- all(is.finite(g.mat.num[index.f.finite, ])) h.array.num <- array(t(sapply(fgh.num.list, function(u) u$h)), dim = c(nevals, K, K)) ret$h.array.num <- h.array.num ret$is.h.num.finite <- all(is.finite(h.array.num[index.f.finite, , ])) # closeness of analytical and numerical results (we need better metrics of difference) if (ret$numderiv < 2) { g.mat <- t(sapply(fgh.list, function(u) u$g)) ret$g.mat <- g.mat ret$is.g.finite <- all(is.finite(g.mat[index.f.finite, ])) ret$g.diff.max <- max(sapply(1:n.f.finite, function(i) sqrt(sum((g.mat[index.f.finite[i], ] - g.mat.num[index.f.finite[i], ])^2))/sqrt(sum(g.mat[index.f.finite[i], ]^2)))) } if (ret$numderiv == 0) { h.array <- array(t(sapply(fgh.list, function(u) u$h)), dim = c(nevals, K, K)) ret$h.array <- h.array ret$is.h.finite <- all(is.finite(h.array[index.f.finite, , ])) ret$h.diff.max <- max(sapply(1:n.f.finite, function(i) norm(h.array[index.f.finite[i], , ] - h.array.num[index.f.finite[i], , ], type = "F")/norm(h.array[index.f.finite[i], , ], type = "F"))) } # concavity (negative definiteness) sns.is.positive.definite <- function(A, tol = 1e-8) all(eigen(A)$values > tol) ret$is.negdef.num <- sapply(1:length(blocks), function(i) all(sapply(1:n.f.finite, function(u) { h.tmp <- h.array.num[index.f.finite[u], , ] sns.is.positive.definite(-h.tmp[blocks[[i]], blocks[[i]], drop = F], tol = negdef.tol) }))) if (ret$numderiv == 0) { ret$is.negdef <- sapply(1:length(blocks), function(i) all(sapply(1:n.f.finite, function(u) { h.tmp <- h.array[index.f.finite[u], , ] sns.is.positive.definite(-h.tmp[blocks[[i]], blocks[[i]], drop = F], tol = negdef.tol) }))) } return (ret) } print.sns.check.logdensity <- function(x, ...) { sns.TF.to.YesNo <- function(x) ifelse(x, "Yes", "No") if (!x$check.ld.struct) { cat("log-density output list has invalid names\n") return (invisible(NULL)) } numderiv <- x$numderiv #cat("log-density is valid\n") cat("number of finite function evals:", length(which(is.finite(x$f.vec))), "(out of ", length(x$f.vec), ")\n") cat("recommended numderiv value:", numderiv, "\n") cat("is length of gradient vector correct?", sns.TF.to.YesNo(x$check.length.g), "\n") cat("are dims of Hessian matrix correct?", sns.TF.to.YesNo(x$check.dim.h), "\n") cat("is numerical gradient finite?", sns.TF.to.YesNo(x$is.g.num.finite), "\n") cat("is numerical Hessian finite?", sns.TF.to.YesNo(x$is.h.num.finite), "\n") if (numderiv < 2) { cat("is analytical gradient finite?", sns.TF.to.YesNo(x$is.g.finite), "\n") cat("maximum relative diff in gradient:", x$g.diff.max, "\n") } if (numderiv == 0) { cat("is analytical Hessian finite?", sns.TF.to.YesNo(x$is.h.finite), "\n") cat("maximum relative diff in Hessian:", x$h.diff.max, "\n") } cat("is numeric Hessian (block) negative-definite?", sns.TF.to.YesNo(x$is.negdef.num), "\n") if (numderiv == 0) { cat("is analytical Hessian (block) negative-definite?", sns.TF.to.YesNo(x$is.negdef), "\n") } return (invisible(NULL)) } # predict methods predict.sns <- function(object, fpred , nburnin = max(nrow(object)/2, attr(object, "nnr")) , end = nrow(object), thin = 1, ...) { niter <- nrow(object) nnr <- attr(object, "nnr") nmcmc <- niter - nnr if (nburnin < nnr) warning("it is strongly suggested that burnin period includes NR iterations (which are not valid MCMC iterations)") myseq <- seq(from = nburnin + 1, to = end, by = thin) pred <- apply(object[myseq, ], 1, fpred, ...) class(pred) <- "predict.sns" return (pred) } summary.predict.sns <- function(object, quantiles = c(0.025, 0.5, 0.975) , ess.method = c("coda", "ise"), ...) { smp.mean <- rowMeans(object) smp.sd <- apply(object, 1, sd) smp.ess <- ess(t(object), method = ess.method[1]) smp.quantiles <- t(apply(object, 1, quantile, probs = quantiles)) ret <- list(mean = smp.mean, sd = smp.sd, ess = smp.ess, quantiles = smp.quantiles, nseq = ncol(object)) class(ret) <- "summary.predict.sns" return (ret) } print.summary.predict.sns <- function(x, ...) { cat("prediction sample statistics:\n") cat("\t(nominal sample size: ", x$nseq, ")\n", sep="") stats <- cbind(x$mean, x$sd, x$ess, x$quantiles) colnames(stats)[1:3] <- c("mean", "sd", "ess") rownames(stats) <- c(1:length(x$mean)) printCoefmat(stats[1:min(length(x$mean), 6), ]) if (length(x$mean) > 6) cat("...\n") } # print method print.sns <- function(x, ...) { cat("Stochastic Newton Sampler (SNS)\n") cat("state space dimensionality: ", ncol(x), "\n") if (!is.null(attr(x, "part"))) cat("state space partitioning: ", attr(x, "part"), " subsets\n") cat("total iterations: ", nrow(x), "\n") cat("\t(initial) NR iterations:", attr(x, "nnr"), "\n") cat("\t(final) MCMC iterations:", nrow(x) - attr(x, "nnr"), "\n") } # summary methods # primary output: # 1) acceptance rate # 2) mean relative deviation (if available) # 3) sample statistics (mean, sd, quantiles, ess, pval) (if available) summary.sns <- function(object, quantiles = c(0.025, 0.5, 0.975) , pval.ref = 0.0, nburnin = max(nrow(object)/2, attr(object, "nnr")) , end = nrow(object), thin = 1, ess.method = c("coda", "ise"), ...) { K <- ncol(object) nnr <- attr(object, "nnr") if (nburnin < nnr) warning("it is strongly suggested that burnin period includes NR iterations (which are not valid MCMC iterations)") # number of subsets in state space partitioning npart <- max(1, length(attr(object, "part"))) # average relative deviation of function value from quadratic approximation (post-burnin) if (!is.null(attr(object, "reldev"))) reldev.mean <- mean(attr(object, "reldev"), na.rm = TRUE) else reldev.mean <- NA nsmp <- end - nburnin if (nsmp > 0) { # average acceptance rate for MH transition proposals accept.rate <- sum(attr(object, "accept")[nburnin + 1:nsmp, ]) / length(attr(object, "accept")[nburnin + 1:nsmp, ]) myseq <- seq(from = nburnin + 1, to = end, by = thin) nseq <- length(myseq) smp.mean <- colMeans(object[myseq, ]) smp.sd <- apply(object[myseq, ], 2, sd) smp.ess <- ess(object[myseq, ], method = ess.method[1]) smp.quantiles <- t(apply(object[myseq, ], 2, quantile, probs = quantiles)) smp.pval <- apply(object[myseq, ], 2, sns.calc.pval, ref = pval.ref, na.rm = FALSE) } else { accept.rate <- NA nseq <- 0 smp.mean <- NA smp.sd <- NA smp.ess <- NA smp.quantiles <- NA smp.pval <- NA } ret <- list(K = K, nnr = nnr, nburnin = nburnin, end = end, thin = thin , niter = nrow(object), nsmp = nsmp, nseq = nseq, npart = npart , accept.rate = accept.rate, reldev.mean = reldev.mean , pval.ref = pval.ref, ess.method = ess.method , smp = list(mean = smp.mean, sd = smp.sd, ess = smp.ess, quantiles = smp.quantiles, pval = smp.pval)) class(ret) <- "summary.sns" return (ret) } print.summary.sns <- function(x, ...) { cat("Stochastic Newton Sampler (SNS)\n") cat("state space dimensionality: ", x$K, "\n") if (x$npart > 1) cat("state space partitioning: ", x$npart, " subsets\n") cat("total iterations: ", x$niter, "\n") cat("\tNR iterations: ", x$nnr, "\n") cat("\tburn-in iterations: ", x$nburnin, "\n") cat("\tend iteration: ", x$end, "\n") cat("\tthinning interval: ", x$thin, "\n") cat("\tsampling iterations (before thinning): ", x$nsmp, "\n") #cat("\tsampling iterations (after thinning): ", x$nseq, "\n") cat("acceptance rate: ", x$accept.rate, "\n") if (!is.na(x$reldev.mean)) cat("\tmean relative deviation from quadratic approx:", format(100*x$reldev.mean, digits=3), "% (post-burnin)\n") if (x$nsmp > 0) { cat("sample statistics:\n") cat("\t(nominal sample size: ", x$nseq, ")\n", sep="") stats <- cbind(x$smp$mean, x$smp$sd, x$smp$ess, x$smp$quantiles, x$smp$pval) colnames(stats)[c(1:3, 4 + ncol(x$smp$quantiles))] <- c("mean", "sd", "ess", "p-val") rownames(stats) <- c(1:x$K) printCoefmat(stats[1:min(x$K, 6), ], P.values = TRUE, has.Pvalue = TRUE) if (x$K > 6) cat("...\n") cat("summary of ess:\n") print(summary(x$smp$ess)) } } # plot method plot.sns <- function(x, nburnin = max(nrow(x)/2, attr(x, "nnr")) , select = if (length(x) <= 10) 1:5 else 1, ...) { init <- attr(x, "init") lp.init <- attr(x, "lp.init") lp <- attr(x, "lp") # in all cases, vertical line delineates transition from nr to mcmc mode K <- ncol(x) niter <- nrow(x) nnr <- attr(x, "nnr") if (nburnin < nnr) warning("it is strongly suggested that burnin period includes NR iterations (which are not valid MCMC iterations)") # log-probability trace plot if (1 %in% select) { plot(0:niter, c(lp.init, lp), type = "l" , xlab = "iter", ylab = "log-probability", main = "Log-Probability Trace Plot") if (nnr > 0 && nnr < niter) abline(v = nnr + 0.5, lty = 2, col = "red") } # state vector trace plots if (2 %in% select) { for (k in 1:K) { plot(0:niter, c(init[k], x[, k]), type = "l" , xlab = "iter", ylab = paste("x[", k, "]", sep = ""), main = "State Variable Trace Plot") if (nnr > 0 && nnr < niter) abline(v = nnr + 0.5, lty = 2, col = "red") } } if (nburnin < niter) { if (3 %in% select) { # effective sample size (horizontal line is maximum possible effective sample size) my.ess <- ess(x[(nburnin + 1):niter, ]) plot(1:K, my.ess, xlab = "k", ylab = "effective sample size", ylim = c(0, niter - nburnin), main = "Effective Sample Size by Coordinate") abline(h = niter - nburnin, lty = 2, col = "red") } if (4 %in% select) { # state vector (univariate) histograms K <- ncol(x) for (k in 1:K) { hist(x[(nburnin + 1):niter, k], xlab = paste("x[", k, "]", sep = ""), main = "State Variable Histogram (post-burnin)") abline(v = mean(x[(nburnin + 1):niter, k]), lty = 2, col = "red") } } if (5 %in% select) { # state vector (univariate) autocorrelation plots K <- ncol(x) for (k in 1:K) { acf(x[(nburnin + 1):niter, k], xlab = paste("x[", k, "]", sep = ""), main = "State Variable Autocorrelation Plot (post-burnin)") } } } } sns.calc.pval <- function(x, ref=0.0, na.rm = FALSE) { # add flag for one-sided vs. two-sided if (na.rm) x <- x[!is.na(x)] bigger <- median(x)>ref if (sd(x)<.Machine$double.eps) { ret <- NA } else { ret <- max(1/length(x), 2*length(which(if (bigger) xref))/length(x)) # TODO: justify minimum value } attr(ret, "bigger") <- bigger return (ret) } # convenience function for numerical augmentation of a log-density sns.fghEval.numaug <- function(fghEval, numderiv = 0 , numderiv.method = c("Richardson", "simple"), numderiv.args = list()) { numderiv <- as.integer(numderiv) if (numderiv > 0) { numderiv.method <- match.arg(numderiv.method) if (numderiv == 1) { # we need numeric hessian fghEval.int <- function(x, ...) { fg <- fghEval(x, ...) h <- hessian(func = function(x, ...) fghEval(x, ...)$f, x = x, ..., method = numderiv.method, method.args = numderiv.args) return (list(f = fg$f, g = fg$g, h = h)) } } else { # we need numeric gradient and hessian fghEval.int <- function(x, ...) { f <- fghEval(x, ...) g <- grad(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args) h <- hessian(func = fghEval, x = x, ..., method = numderiv.method, method.args = numderiv.args) return (list(f = f, g = g, h = h)) } } } else { fghEval.int <- fghEval } }