\name{tgp} \alias{tgp} \title{ Generic interface to treed Gaussian process models } \description{ A generic interface to treed Gaussian process models used by many of the functions of class \code{"tgp"}: \code{\link{bgpllm}}, \code{\link{btlm}}, \code{\link{blm}}, \code{\link{bgp}}, \code{\link{btgpllm}} \code{\link{bgp}}, and \code{\link{plot.tgp}}, \code{\link{tgp.trees}}. This more complicated interface is provided for a finer control of the model parameterization. } \usage{ tgp(X, Z, XX = NULL, BTE = c(2000, 7000, 2), R = 1, m0r1 = FALSE, linburn = FALSE, params = NULL, pred.n = TRUE, ds2x = FALSE, ego = FALSE, traces = FALSE, verb = 1) } \arguments{ \item{X}{\code{data.frame}, \code{matrix}, or vector of inputs \code{X} } \item{Z}{ Vector of output responses \code{Z} of length equal to the leading dimension (rows) of \code{X} } \item{XX}{ Optional \code{data.frame}, \code{matrix}, or vector of predictive input locations with the same number of columns as \code{X} } \item{BTE}{ 3-vector of Monte-carlo parameters (B)urn in, (T)otal, and (E)very. Predictive samples are saved every E MCMC rounds starting at round B, stopping at T. } \item{R}{ Number of repeats or restarts of BTE MCMC rounds, default \code{R=1} is no restarts } \item{m0r1}{If \code{TRUE} the responses \code{Z} will be scaled to have a mean of zero and a range of 1; default is \code{FALSE}} \item{linburn}{ If \code{TRUE} initializes MCMC with \code{B} (additional) rounds of Bayesian linear CART (\code{bcart}); default is \code{FALSE} } \item{params}{ Generic parameters list which can be provided for a more flexible model. See \code{\link{tgp.default.params}} for more details about the parameter list} \item{pred.n}{ \code{TRUE} (default) value results in prediction at the inputs \code{X}; \code{FALSE} skips prediction at \code{X} resulting in a faster implementation} \item{ds2x}{ \code{TRUE} results in ALC (Active Learning--Cohn) computation of expected reduction in uncertainty calculations at the \code{XX} locations, which can be used for adaptive sampling; \code{FALSE} (default) skips this computation, resulting in a faster implementation} \item{ego}{ \code{TRUE} results in EGO (Expected Global Optimization) computation of expected information about the location of the minimum reduction in uncertainty calculations at the \code{XX} locations, which can be used for adaptive sampling; \code{FALSE} (default) skips this computation, resulting in a faster implementation} \item{traces}{ \code{TRUE} results in a saving of samples from the posterior distribution for most of the parameters in the model. The default is \code{FALSE} for speed/storage reasons. See note below } \item{verb}{ Level of verbosity of R-console print statements: from 0 (none); 1 (default) which shows the \dQuote{progress meter}; 2 includes an echo of initialization parameters; up to 3 and 4 (max) with more info about successful tree operations.} } \value{ \code{tgp} returns an object of class \code{"tgp"}. The function \code{\link{plot.tgp}} can be used to help visualize results. An object of type \code{"tgp"} is a list containing at least the following components... The final two (\code{parts} \& \code{trees}) are tree-related outputs unique to the T (tree) class functions-- those which have a positive first (alpha) parameter in \code{params$tree <- c(alpha, beta, minpart}. Tree viewing is supported by \code{\link{tgp.trees}}. \item{state}{unsigned short[3] random number seed to C} \item{X}{Input argument: \code{data.frame} of inputs \code{X}} \item{n}{Number of rows in \code{X}, i.e., \code{dim(X)[1]}} \item{d}{Number of cols in \code{X}, i.e., \code{dim(X)[2]}} \item{Z}{Vector of output responses \code{Z}} \item{XX}{Input argument: \code{data.frame} of predictive locations \code{XX}} \item{nn}{Number of rows in \code{XX}, i.e., \code{dim(XX)[1]}} \item{BTE}{Input argument: Monte-carlo parameters} \item{R}{Input argument: restarts} \item{linburn}{Input argument: initialize MCMC with linear CART} \item{params}{\code{list} of model parameters generated by \code{\link{tgp.default.params}}} \item{dparams}{Double-representation of model input parameters used by C-code} \item{Zp.mean}{Vector of mean predictive estimates at \code{X} locations} \item{Zp.q1}{Vector of 5\% predictive quantiles at \code{X} locations} \item{Zp.q2}{Vector of 95\% predictive quantiles at \code{X} locations} \item{Zp.q}{Vector of quantile norms \code{Zp.q2 - Zp.q1}} \item{ZZ.q1}{Vector of 5\% predictive quantiles at \code{XX} locations} \item{ZZ.q2}{Vector of 95\% predictive quantiles at \code{XX} locations} \item{ZZ.q}{Vector of quantile norms \code{ZZ.q2 - ZZ.q1}, used by the Active Learning--MacKay (ALM) adaptive sampling algorithm} \item{Ds2x}{If argument \code{ds2x=TRUE}, this vector contains ALC statistics for \code{XX} locations} \item{ego}{If argument \code{ego=TRUE}, this vector contains EGO statistics for \code{XX} locations} \item{response}{Name of response \code{Z} if supplied by \code{data.frame} in argument, or \dQuote{z} if none provided} \item{parts}{Internal representation of the regions depicted by partitions of the maximum a' posteriori (MAP) tree} \item{trees}{\code{list} of trees (\pkg{maptree} representation) which were MAP as a function of each tree height sampled between MCMC rounds \code{B} and \code{T}} \item{traces}{\code{list} containing traces of most of the model parameters and posterior predictive distributions at input locations \code{XX}. See note below} \item{verb}{Input argument: verbosity level} } \references{ Gramacy, R. B., Lee, H. K. H. (2006). \emph{Bayesian treed Gaussian process models.} Available as UCSC Technical Report ams2006-01. Gramacy, R. B., Lee, H. K. H. (2006). \emph{Adaptive design of supercomputer experiments.} Available as UCSC Technical Report ams2006-02. Gramacy, R. B., Lee, H. K. H., \& Macready, W. (2004). \emph{Parameter space exploration with Gaussian process trees.} ICML (pp. 353--360). Omnipress \& ACM Digital Library. Chipman, H., George, E., \& McCulloch, R. (1998). \emph{Bayesian CART model search (with discussion).} Journal of the American Statistical Association, \bold{93}, 935--960. Chipman, H., George, E., \& McCulloch, R. (2002). \emph{Bayesian treed models.} Machine Learning, \bold{48}, 303--324. \url{http://www.ams.ucsc.edu/~rbgramacy/tgp.html} } \author{ Robert B. Gramacy \email{rbgramacy@ams.ucsc.edu}} \seealso{ \code{\link{tgp.default.params}}, \code{\link{bgpllm}}, \code{\link{btlm}}, \code{\link{blm}}, \code{\link{bgp}}, \code{\link{btgpllm}} \code{\link{bgp}}, \code{\link{plot.tgp}}, \code{\link{tgp.trees}}} \note{ Inputs \code{X, XX, Z} containing \code{NaN, NA, Inf} are discarded with non-fatal warnings Upon execution, MCMC reports are made every 1,000 rounds to indicate progress Stationary (non-treed) processes on larger inputs (e.g., \code{X,Z}) of size greater than 500, *might* be slow in execution, especially on older machines. Once the C code starts executing, it can be interrupted in the usual way: either via Ctrl-C (Unix-alikes) or pressing the Stop button in the \R-GUI. When this happens, interrupt messages will indicate which required cleanup measures completed before returning control to \R Regarding \code{traces=TRUE}: Samples from the posterior will be collected for all parameters in the model, except those of the hierarchical priors, e.g., \code{b0}, etc. Traces for some parameters are stored in memory, others in files. GP parameters are collected with reference to the locations in \code{XX}, resulting \code{nn=dim{XX}[2]} traces of \code{d,g,s2,tau2}, etc. Therefore, it is recommended that \code{nn} is chosen to be a small, representative, set of input locations. Besides GP parameters, traces are saved for the tree partitions, areas under the LLM, log posterior (as a function of tree height), and samples \code{ZZ} from the posterior predictive distribution at \code{XX} } \examples{ ## ## Many of the examples below illustrate the above ## function(s) on random data. Thus it can be fun ## (and informative) to run them several times. ## # # simple linear response # # input and predictive data X <- seq(0,1,length=50) XX <- seq(0,1,length=99) Z <- 1 + 2*X + rnorm(length(X),sd=0.25) # out <- blm(X=X, Z=Z, XX=XX) # try Linear Model with tgp p <- tgp.default.params(2) p$tree <- c(0,0,10) # no tree p$gamma <- c(-1,0.2,0.7) # force llm out <- tgp(X=X,Z=Z,XX=XX,params=p) plot(out) # plot the surface # # 1-d Example # # construct some 1-d nonstationary data X <- seq(0,20,length=100) XX <- seq(0,20,length=99) Z <- (sin(pi*X/5) + 0.2*cos(4*pi*X/5)) * (X <= 9.6) lin <- X>9.6; Z[lin] <- -1 + X[lin]/10 Z <- Z + rnorm(length(Z), sd=0.1) # out <- btlm(X=X, Z=Z, XX=XX) # try Linear CART with tgp p <- tgp.default.params(2) p$gamma <- c(-1,0.2,0.7) # force llm out <- tgp(X=X,Z=Z,XX=XX,params=p) plot(out) # plot the surface tgp.trees(out) # plot the MAP trees # out <- btgp(X=X, Z=Z, XX=XX) # use a treed GP with tgp p <- tgp.default.params(2) p$gamma <- c(0,0.2,0.7) # force no llm out <- tgp(X=X,Z=Z,XX=XX,params=p) plot(out) # plot the surface tgp.trees(out) # plot the MAP trees # # 2-d example # (using the isotropic correlation function) # # construct some 2-d nonstationary data exp2d.data <- exp2d.rand() X <- exp2d.data$X; Z <- exp2d.data$Z XX <- exp2d.data$XX # try a GP with tgp # out <- bgp(X=X, Z=Z, XX=XX, corr="exp") p <- tgp.default.params(3) p$tree <- c(0,0,10) # no tree p$gamma <- c(0,0.2,0.7) # no llm p$corr <- "exp" out <- tgp(X=X,Z=Z,XX=XX,params=p) plot(out) # plot the surface # try a treed GP LLM with tgp # out <- btgpllm(X=X,Z=Z,XX=XX,corr="exp") p <- tgp.default.params(3) p$corr <- "exp" out <- tgp(X=X,Z=Z,XX=XX,params=p) plot(out) # plot the surface tgp.trees(out) # plot the MAP trees # # Motorcycle Accident Data # # get the data require(MASS) # try a custom treed GP LLM with tgp, without m0r1 p <- tgp.default.params(2) p$bprior <- "b0" # beta linear prior for common mean p$nug.p <- c(1.0,0.1,10.0,0.1) # mixture nugget prior out <- tgp(X=mcycle[,1], Z=mcycle[,2], params=p, BTE=c(2000,22000,2)) # run mcmc longer plot(out) # plot the surface tgp.trees(out) # plot the MAP trees # for other examples try the demos or the vignette } \keyword{ nonparametric } \keyword{ nonlinear } \keyword{ smooth } \keyword{ models } \keyword{ spatial } \keyword{ tree }