Revision 63d8a43408637eef9a81e05ffd7e6ff3afa51947 authored by Robert B. Gramacy on 20 September 2006, 00:00:00 UTC, committed by Gabor Csardi on 20 September 2006, 00:00:00 UTC
1 parent 622e02d
tgp.Rd
\name{tgp}
\alias{tgp}
\title{ Generic interface to treed Gaussian process models }
\description{
A generic interface to treed Gaussian process models used by
many of the functions of class \code{"tgp"}:
\code{\link{bgpllm}}, \code{\link{btlm}},
\code{\link{blm}}, \code{\link{bgp}}, \code{\link{btgpllm}} \code{\link{bgp}},
and \code{\link{plot.tgp}}, \code{\link{tgp.trees}}.
This more complicated interface is provided for a finer control of the model
parameterization.
}
\usage{
tgp(X, Z, XX = NULL, BTE = c(2000, 7000, 2), R = 1, m0r1 = FALSE,
linburn = FALSE, params = NULL, pred.n = TRUE,
ds2x = FALSE, ego = FALSE, traces = FALSE, verb = 1)
}
\arguments{
\item{X}{\code{data.frame}, \code{matrix}, or vector of inputs \code{X} }
\item{Z}{ Vector of output responses \code{Z} of length equal to the
leading dimension (rows) of \code{X} }
\item{XX}{ Optional \code{data.frame}, \code{matrix}, or vector of
predictive input locations with the same number of columns as \code{X} }
\item{BTE}{ 3-vector of Monte-carlo parameters (B)urn in, (T)otal, and
(E)very. Predictive samples are saved every E MCMC rounds starting
at round B, stopping at T. }
\item{R}{ Number of repeats or restarts of BTE MCMC rounds, default
\code{R=1} is no restarts }
\item{m0r1}{If \code{TRUE} the responses \code{Z} will be scaled to have a mean of
zero and a range of 1; default is \code{FALSE}}
\item{linburn}{ If \code{TRUE} initializes MCMC with \code{B} (additional)
rounds of Bayesian linear CART (\code{bcart}); default is \code{FALSE} }
\item{params}{ Generic parameters list which can be provided for a more flexible model.
See \code{\link{tgp.default.params}} for more details about the parameter list}
\item{pred.n}{ \code{TRUE} (default) value results in prediction at the inputs
\code{X}; \code{FALSE} skips prediction at \code{X} resulting in
a faster implementation}
\item{ds2x}{ \code{TRUE} results in ALC (Active Learning--Cohn) computation of expected
reduction in uncertainty calculations at the \code{XX} locations, which can be used
for adaptive sampling; \code{FALSE} (default) skips this computation, resulting in
a faster implementation}
\item{ego}{ \code{TRUE} results in EGO (Expected Global Optimization)
computation of expected information about the location of the minimum
reduction in uncertainty calculations at the \code{XX} locations, which can be used
for adaptive sampling; \code{FALSE} (default) skips this computation, resulting in
a faster implementation}
\item{traces}{ \code{TRUE} results in a saving of samples from the
posterior distribution for most of the parameters in the model. The
default is \code{FALSE} for speed/storage reasons. See note below }
\item{verb}{ Level of verbosity of R-console print statements: from 0
(none); 1 (default) which shows the \dQuote{progress meter}; 2
includes an echo of initialization parameters; up to 3 and 4 (max)
with more info about successful tree operations.}
}
\value{
\code{tgp} returns an object of class \code{"tgp"}. The function \code{\link{plot.tgp}}
can be used to help visualize results.
An object of type \code{"tgp"} is a list containing at least the following
components... The final two (\code{parts} \& \code{trees}) are
tree-related outputs unique to the T (tree) class functions-- those which
have a positive first (alpha) parameter in
\code{params$tree <- c(alpha, beta, minpart}.
Tree viewing is supported by \code{\link{tgp.trees}}.
\item{state}{unsigned short[3] random number seed to C}
\item{X}{Input argument: \code{data.frame} of inputs \code{X}}
\item{n}{Number of rows in \code{X}, i.e., \code{dim(X)[1]}}
\item{d}{Number of cols in \code{X}, i.e., \code{dim(X)[2]}}
\item{Z}{Vector of output responses \code{Z}}
\item{XX}{Input argument: \code{data.frame} of predictive locations \code{XX}}
\item{nn}{Number of rows in \code{XX}, i.e., \code{dim(XX)[1]}}
\item{BTE}{Input argument: Monte-carlo parameters}
\item{R}{Input argument: restarts}
\item{linburn}{Input argument: initialize MCMC with linear CART}
\item{params}{\code{list} of model parameters generated by
\code{\link{tgp.default.params}}}
\item{dparams}{Double-representation of model input parameters used by C-code}
\item{Zp.mean}{Vector of mean predictive estimates at \code{X} locations}
\item{Zp.q1}{Vector of 5\% predictive quantiles at \code{X} locations}
\item{Zp.q2}{Vector of 95\% predictive quantiles at \code{X} locations}
\item{Zp.q}{Vector of quantile norms \code{Zp.q2 - Zp.q1}}
\item{ZZ.q1}{Vector of 5\% predictive quantiles at \code{XX} locations}
\item{ZZ.q2}{Vector of 95\% predictive quantiles at \code{XX} locations}
\item{ZZ.q}{Vector of quantile norms \code{ZZ.q2 - ZZ.q1}, used by the
Active Learning--MacKay (ALM) adaptive sampling algorithm}
\item{Ds2x}{If argument \code{ds2x=TRUE}, this vector contains ALC
statistics for \code{XX} locations}
\item{ego}{If argument \code{ego=TRUE}, this vector contains EGO
statistics for \code{XX} locations}
\item{response}{Name of response \code{Z} if supplied by \code{data.frame}
in argument, or \dQuote{z} if none provided}
\item{parts}{Internal representation of the regions depicted by partitions of
the maximum a' posteriori (MAP) tree}
\item{trees}{\code{list} of trees (\pkg{maptree} representation) which
were MAP as a function of each tree height sampled between MCMC
rounds \code{B} and \code{T}}
\item{traces}{\code{list} containing traces of most of the model
parameters and posterior predictive distributions at input locations
\code{XX}. See note below}
\item{verb}{Input argument: verbosity level}
}
\references{
Gramacy, R. B., Lee, H. K. H. (2006).
\emph{Bayesian treed Gaussian process models.}
Available as UCSC Technical Report ams2006-01.
Gramacy, R. B., Lee, H. K. H. (2006).
\emph{Adaptive design of supercomputer experiments.}
Available as UCSC Technical Report ams2006-02.
Gramacy, R. B., Lee, H. K. H., \& Macready, W. (2004).
\emph{Parameter space exploration with Gaussian process trees.}
ICML (pp. 353--360). Omnipress \& ACM Digital Library.
Chipman, H., George, E., \& McCulloch, R. (1998).
\emph{Bayesian CART model search (with discussion).}
Journal of the American Statistical Association, \bold{93},
935--960.
Chipman, H., George, E., \& McCulloch, R. (2002).
\emph{Bayesian treed models.}
Machine Learning, \bold{48}, 303--324.
\url{http://www.ams.ucsc.edu/~rbgramacy/tgp.html}
}
\author{ Robert B. Gramacy \email{rbgramacy@ams.ucsc.edu}}
\seealso{ \code{\link{tgp.default.params}}, \code{\link{bgpllm}}, \code{\link{btlm}},
\code{\link{blm}}, \code{\link{bgp}}, \code{\link{btgpllm}} \code{\link{bgp}},
\code{\link{plot.tgp}}, \code{\link{tgp.trees}}}
\note{
Inputs \code{X, XX, Z} containing \code{NaN, NA, Inf} are
discarded with non-fatal warnings
Upon execution, MCMC reports are made every 1,000 rounds to indicate progress
Stationary (non-treed) processes on larger inputs (e.g., \code{X,Z}) of
size greater than 500, *might* be slow in execution, especially on older
machines. Once the C code starts executing, it can be interrupted in
the usual way: either via Ctrl-C (Unix-alikes) or pressing the Stop
button in the \R-GUI. When this happens, interrupt messages will
indicate which required cleanup measures completed before returning
control to \R
Regarding \code{traces=TRUE}: Samples from the posterior will be
collected for all parameters in the model, except those of the
hierarchical priors, e.g., \code{b0}, etc. Traces for some parameters
are stored in memory, others in files. GP parameters are collected
with reference to the locations in \code{XX}, resulting
\code{nn=dim{XX}[2]} traces of \code{d,g,s2,tau2}, etc. Therefore, it
is recommended that \code{nn} is chosen to be a small, representative,
set of input locations. Besides GP parameters, traces are saved for
the tree partitions, areas under the LLM, log posterior (as a function
of tree height), and samples \code{ZZ} from the posterior predictive
distribution at \code{XX}
}
\examples{
##
## Many of the examples below illustrate the above
## function(s) on random data. Thus it can be fun
## (and informative) to run them several times.
##
#
# simple linear response
#
# input and predictive data
X <- seq(0,1,length=50)
XX <- seq(0,1,length=99)
Z <- 1 + 2*X + rnorm(length(X),sd=0.25)
# out <- blm(X=X, Z=Z, XX=XX) # try Linear Model with tgp
p <- tgp.default.params(2)
p$tree <- c(0,0,10) # no tree
p$gamma <- c(-1,0.2,0.7) # force llm
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) # plot the surface
#
# 1-d Example
#
# construct some 1-d nonstationary data
X <- seq(0,20,length=100)
XX <- seq(0,20,length=99)
Z <- (sin(pi*X/5) + 0.2*cos(4*pi*X/5)) * (X <= 9.6)
lin <- X>9.6;
Z[lin] <- -1 + X[lin]/10
Z <- Z + rnorm(length(Z), sd=0.1)
# out <- btlm(X=X, Z=Z, XX=XX) # try Linear CART with tgp
p <- tgp.default.params(2)
p$gamma <- c(-1,0.2,0.7) # force llm
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) # plot the surface
tgp.trees(out) # plot the MAP trees
# out <- btgp(X=X, Z=Z, XX=XX) # use a treed GP with tgp
p <- tgp.default.params(2)
p$gamma <- c(0,0.2,0.7) # force no llm
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) # plot the surface
tgp.trees(out) # plot the MAP trees
#
# 2-d example
# (using the isotropic correlation function)
#
# construct some 2-d nonstationary data
exp2d.data <- exp2d.rand()
X <- exp2d.data$X; Z <- exp2d.data$Z
XX <- exp2d.data$XX
# try a GP with tgp
# out <- bgp(X=X, Z=Z, XX=XX, corr="exp")
p <- tgp.default.params(3)
p$tree <- c(0,0,10) # no tree
p$gamma <- c(0,0.2,0.7) # no llm
p$corr <- "exp"
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) # plot the surface
# try a treed GP LLM with tgp
# out <- btgpllm(X=X,Z=Z,XX=XX,corr="exp")
p <- tgp.default.params(3)
p$corr <- "exp"
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) # plot the surface
tgp.trees(out) # plot the MAP trees
#
# Motorcycle Accident Data
#
# get the data
require(MASS)
# try a custom treed GP LLM with tgp, without m0r1
p <- tgp.default.params(2)
p$bprior <- "b0" # beta linear prior for common mean
p$nug.p <- c(1.0,0.1,10.0,0.1) # mixture nugget prior
out <- tgp(X=mcycle[,1], Z=mcycle[,2], params=p,
BTE=c(2000,22000,2)) # run mcmc longer
plot(out) # plot the surface
tgp.trees(out) # plot the MAP trees
# for other examples try the demos or the vignette
}
\keyword{ nonparametric }
\keyword{ nonlinear }
\keyword{ smooth }
\keyword{ models }
\keyword{ spatial }
\keyword{ tree }
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...