Revision 63d8a43408637eef9a81e05ffd7e6ff3afa51947 authored by Robert B. Gramacy on 20 September 2006, 00:00:00 UTC, committed by Gabor Csardi on 20 September 2006, 00:00:00 UTC
1 parent 622e02d
Raw File
tgp.Rd
\name{tgp}
\alias{tgp}

\title{ Generic interface to treed Gaussian process models }
\description{
  A generic interface to treed Gaussian process models used by
  many of the functions of class \code{"tgp"}:
	\code{\link{bgpllm}}, \code{\link{btlm}}, 
	\code{\link{blm}}, \code{\link{bgp}}, \code{\link{btgpllm}} \code{\link{bgp}},
	and \code{\link{plot.tgp}}, \code{\link{tgp.trees}}.
  This more complicated interface is provided for a finer control of the model
  parameterization.
}

\usage{
tgp(X, Z, XX = NULL, BTE = c(2000, 7000, 2), R = 1, m0r1 = FALSE,
	linburn = FALSE, params = NULL, pred.n = TRUE,
        ds2x = FALSE, ego = FALSE, traces = FALSE, verb = 1)
}

\arguments{
  \item{X}{\code{data.frame}, \code{matrix}, or vector of inputs \code{X} }
  \item{Z}{ Vector of output responses \code{Z} of length equal to the
  	leading dimension (rows) of \code{X} }
  \item{XX}{ Optional \code{data.frame}, \code{matrix},  or vector of
    predictive input locations with the same number of columns as \code{X} }
  \item{BTE}{ 3-vector of Monte-carlo parameters (B)urn in, (T)otal, and
    (E)very. Predictive	samples are saved every E MCMC rounds starting
    at round B, stopping at T. }
  \item{R}{ Number of repeats or restarts of BTE MCMC rounds, default
    \code{R=1} is no restarts }
  \item{m0r1}{If \code{TRUE} the responses \code{Z} will be scaled to have a mean of
  	zero and a range of 1; default is \code{FALSE}}
  \item{linburn}{ If \code{TRUE} initializes MCMC with \code{B} (additional) 
  	rounds of Bayesian linear CART (\code{bcart}); default is \code{FALSE} }
  \item{params}{ Generic parameters list which can be provided for a more flexible model.
  	See \code{\link{tgp.default.params}} for more details about the parameter list}
  \item{pred.n}{ \code{TRUE} (default) value results in prediction at the inputs 
  	\code{X}; \code{FALSE} skips prediction at \code{X} resulting in
	a faster implementation}
  \item{ds2x}{ \code{TRUE} results in ALC (Active Learning--Cohn) computation of expected
  	reduction in uncertainty calculations at the \code{XX} locations, which can be used
	for adaptive sampling; \code{FALSE} (default) skips this computation, resulting in
	a faster implementation}
  \item{ego}{ \code{TRUE} results in EGO (Expected Global Optimization)
    computation of expected information about the location of the minimum
    reduction in uncertainty calculations at the \code{XX} locations, which can be used
    for adaptive sampling; \code{FALSE} (default) skips this computation, resulting in
    a faster implementation}
  \item{traces}{ \code{TRUE} results in a saving of samples from the
      posterior distribution for most of the parameters in the model.  The
      default is \code{FALSE} for speed/storage reasons. See note below }
  \item{verb}{ Level of verbosity of R-console print statements: from 0
    (none); 1 (default) which shows the \dQuote{progress meter}; 2
    includes an echo of initialization parameters; up to 3 and 4 (max)
    with more info about successful tree operations.}
  }


\value{
  \code{tgp} returns an object of class \code{"tgp"}.  The function \code{\link{plot.tgp}}
  can be used to help visualize results.

  An object of type \code{"tgp"} is a list containing at least the following
  components...  The final two (\code{parts} \& \code{trees}) are
  tree-related outputs unique to the T (tree) class functions-- those which
  have a positive first (alpha) parameter in
  \code{params$tree <- c(alpha, beta, minpart}.
  Tree viewing is supported by \code{\link{tgp.trees}}.

  \item{state}{unsigned short[3] random number seed to C}
  \item{X}{Input argument: \code{data.frame} of inputs \code{X}}
  \item{n}{Number of rows in \code{X}, i.e., \code{dim(X)[1]}}
  \item{d}{Number of cols in \code{X}, i.e., \code{dim(X)[2]}}
  \item{Z}{Vector of output responses \code{Z}}
  \item{XX}{Input argument: \code{data.frame} of predictive locations \code{XX}}
  \item{nn}{Number of rows in \code{XX}, i.e., \code{dim(XX)[1]}}
  \item{BTE}{Input argument: Monte-carlo parameters}
  \item{R}{Input argument: restarts}
  \item{linburn}{Input argument: initialize MCMC with linear CART}
  \item{params}{\code{list} of model parameters generated by 
  	\code{\link{tgp.default.params}}}
  \item{dparams}{Double-representation of model input parameters used by C-code}
  \item{Zp.mean}{Vector of mean predictive estimates at \code{X} locations}
  \item{Zp.q1}{Vector of 5\% predictive quantiles at \code{X} locations}
  \item{Zp.q2}{Vector of 95\% predictive quantiles at \code{X} locations}
  \item{Zp.q}{Vector of quantile norms \code{Zp.q2 - Zp.q1}}
  \item{ZZ.q1}{Vector of 5\% predictive quantiles at \code{XX} locations}
  \item{ZZ.q2}{Vector of 95\% predictive quantiles at \code{XX} locations}
  \item{ZZ.q}{Vector of quantile norms \code{ZZ.q2 - ZZ.q1}, used by the 
  	Active Learning--MacKay (ALM) adaptive sampling algorithm}
  \item{Ds2x}{If argument \code{ds2x=TRUE}, this vector contains ALC
    statistics for \code{XX} locations}
  \item{ego}{If argument \code{ego=TRUE}, this vector contains EGO
    statistics for \code{XX} locations}
  \item{response}{Name of response \code{Z} if supplied by \code{data.frame} 
  	in argument, or \dQuote{z} if none provided}

  \item{parts}{Internal representation of the regions depicted by partitions of
  	the maximum a' posteriori (MAP) tree}
  \item{trees}{\code{list} of trees (\pkg{maptree} representation) which
    were MAP as a function of each tree height sampled between MCMC
    rounds \code{B} and \code{T}}
  \item{traces}{\code{list} containing traces of most of the model
    parameters and posterior predictive distributions at input locations
    \code{XX}.  See note below}
  \item{verb}{Input argument: verbosity level}
}

\references{
Gramacy, R. B., Lee, H. K. H. (2006).
\emph{Bayesian treed Gaussian process models.}
Available as UCSC Technical Report ams2006-01. 

Gramacy, R. B., Lee, H. K. H. (2006).
\emph{Adaptive design of supercomputer experiments.}
Available as UCSC Technical Report ams2006-02. 

Gramacy, R. B., Lee, H. K. H., \& Macready, W. (2004).
\emph{Parameter space exploration with Gaussian process trees.} 
ICML (pp. 353--360).  Omnipress \& ACM Digital Library.

Chipman, H., George, E., \& McCulloch, R. (1998).
\emph{Bayesian CART model search (with discussion).}
Journal of the American Statistical Association, \bold{93},
935--960.

Chipman, H., George, E., \& McCulloch, R. (2002).
\emph{Bayesian treed models.}
Machine Learning, \bold{48}, 303--324.

\url{http://www.ams.ucsc.edu/~rbgramacy/tgp.html}
}

\author{ Robert B. Gramacy \email{rbgramacy@ams.ucsc.edu}}

\seealso{ \code{\link{tgp.default.params}}, \code{\link{bgpllm}}, \code{\link{btlm}}, 
	\code{\link{blm}}, \code{\link{bgp}}, \code{\link{btgpllm}} \code{\link{bgp}},
	\code{\link{plot.tgp}}, \code{\link{tgp.trees}}}

      \note{
	Inputs \code{X, XX, Z} containing \code{NaN, NA, Inf} are
discarded with non-fatal warnings

Upon execution, MCMC reports are made every 1,000 rounds to indicate progress

Stationary (non-treed) processes on larger inputs (e.g., \code{X,Z}) of
size greater than 500, *might* be slow in execution, especially on older
machines.  Once the C code starts executing, it can be interrupted in
the usual way: either via Ctrl-C (Unix-alikes) or pressing the Stop
button in the \R-GUI.  When this happens, interrupt messages will
indicate which required cleanup measures completed before returning
control to \R

Regarding \code{traces=TRUE}: Samples from the posterior will be
collected for all parameters in the model, except those of the
hierarchical priors, e.g., \code{b0}, etc.  Traces for some parameters
are stored in memory, others in files. GP parameters are collected
with reference to the locations in \code{XX}, resulting
\code{nn=dim{XX}[2]} traces of \code{d,g,s2,tau2}, etc.  Therefore, it
is recommended that \code{nn} is chosen to be a small, representative,
set of input locations.  Besides GP parameters, traces are saved for
the tree partitions, areas under the LLM, log posterior (as a function
of tree height), and samples \code{ZZ} from the posterior predictive
distribution at \code{XX}
}

\examples{
##
## Many of the examples below illustrate the above 
## function(s) on random data.  Thus it can be fun
## (and informative) to run them several times.
##

# 
# simple linear response
#

# input and predictive data
X <- seq(0,1,length=50)
XX <- seq(0,1,length=99)
Z <- 1 + 2*X + rnorm(length(X),sd=0.25)

# out <- blm(X=X, Z=Z, XX=XX)	# try Linear Model with tgp
p <- tgp.default.params(2)
p$tree <- c(0,0,10)        	# no tree
p$gamma <- c(-1,0.2,0.7)   	# force llm
out <- tgp(X=X,Z=Z,XX=XX,params=p) 
plot(out)			# plot the surface


#
# 1-d Example
# 

# construct some 1-d nonstationary data
X <- seq(0,20,length=100)
XX <- seq(0,20,length=99)
Z <- (sin(pi*X/5) + 0.2*cos(4*pi*X/5)) * (X <= 9.6)
lin <- X>9.6; 
Z[lin] <- -1 + X[lin]/10
Z <- Z + rnorm(length(Z), sd=0.1)

# out <- btlm(X=X, Z=Z, XX=XX) # try Linear CART with tgp
p <- tgp.default.params(2)
p$gamma <- c(-1,0.2,0.7)    	# force llm
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) 			# plot the surface
tgp.trees(out) 		 	# plot the MAP trees

# out <- btgp(X=X, Z=Z, XX=XX) 	# use a treed GP with tgp
p <- tgp.default.params(2)
p$gamma <- c(0,0.2,0.7)    	# force no llm
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) 			# plot the surface
tgp.trees(out) 		 	# plot the MAP trees


#
# 2-d example
# (using the isotropic correlation function)
#

# construct some 2-d nonstationary data
exp2d.data <- exp2d.rand()
X <- exp2d.data$X; Z <- exp2d.data$Z
XX <- exp2d.data$XX

# try a GP with tgp
# out <- bgp(X=X, Z=Z, XX=XX, corr="exp") 	
p <- tgp.default.params(3)
p$tree <- c(0,0,10)        	# no tree
p$gamma <- c(0,0.2,0.7)    	# no llm
p$corr <- "exp"	
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) 			# plot the surface

# try a treed GP LLM with tgp
# out <- btgpllm(X=X,Z=Z,XX=XX,corr="exp") 
p <- tgp.default.params(3)
p$corr <- "exp"	
out <- tgp(X=X,Z=Z,XX=XX,params=p)
plot(out) 			# plot the surface
tgp.trees(out) 		 	# plot the MAP trees


#
# Motorcycle Accident Data
#

# get the data
require(MASS)

# try a custom treed GP LLM with tgp, without m0r1
p <- tgp.default.params(2)
p$bprior <- "b0" # beta linear prior for common mean
p$nug.p <- c(1.0,0.1,10.0,0.1) # mixture nugget prior
out <- tgp(X=mcycle[,1], Z=mcycle[,2], params=p,
	   BTE=c(2000,22000,2)) # run mcmc longer
plot(out)			# plot the surface
tgp.trees(out)		 	# plot the MAP trees

# for other examples try the demos or the vignette
}

\keyword{ nonparametric }
\keyword{ nonlinear }
\keyword{ smooth }
\keyword{ models }
\keyword{ spatial }
\keyword{ tree }
back to top