predict.aster.Rd
\name{predict.aster}
\alias{predict.aster}
\alias{predict.aster.formula}
\concept{regression}
\concept{exponential family}
\concept{graphical model}
\title{Predict Method for Aster Model Fits}
\usage{
\method{predict}{aster}(object, x, root, modmat, amat,
parm.type = c("mean.value", "canonical"),
model.type = c("unconditional", "conditional"),
se.fit = FALSE, info = c("expected", "observed"),
info.tol = sqrt(.Machine$double.eps), newcoef = NULL, \ldots)
\method{predict}{aster.formula}(object, newdata, varvar, idvar, root, amat,
parm.type = c("mean.value", "canonical"),
model.type = c("unconditional", "conditional"),
se.fit = FALSE, info = c("expected", "observed"),
info.tol = sqrt(.Machine$double.eps), newcoef = NULL, \ldots)
}
\arguments{
\item{object}{a fitted object of class inheriting from \code{"aster"}
or \code{"aster.formula"}.}
\item{modmat}{a model matrix to use instead of \code{object$modmat}.
Must have the same structure (three-dimensional array, first index runs
over individuals, second over nodes of the graphical model, third over
covariates. Must have the same second and third dimensions as
\code{object$modmat}. The second and third components of
\code{dimnames(modmat)} and \code{dimnames(object$modmat)} must also be
the same.
May be missing, in which case \code{object$modmat} is used.
\code{predict.aster.formula} constructs such a \code{modmat} from
\code{object$formula}, the data frame \code{newdata}, and the variables
in the environment of the formula. When \code{newdata} is missing, then
\code{object$modmat} is used.}
\item{x}{response. Ignored and may be missing unless
\code{parm.type == "mean.value" && model.type == "conditional"}.
Even then may be missing when \code{modmat} is missing,
in which case \code{object$x} is used. A matrix whose first and
second dimensions and the corresponding dimnames agrees with
those of \code{modmat} and \code{object$modmat}.
\code{predict.aster.formula} constructs such an \code{x} from
the response variable name in \code{object$formula},
the data frame \code{newdata},
and the variables in the environment of the formula. When \code{newdata}
is missing, then \code{object$x} is used.}
\item{root}{root data. Ignored and may be missing unless
\code{parm.type == "mean.value"}.
Even then may be missing when \code{modmat} is missing,
in which case \code{object$root} is used. A matrix of the
same form as \code{x}.
\code{predict.aster.formula} looks up the variable supplied as
the argument \code{root} in the data frame \code{newdata} or in
the variables in the environment of the formula and makes it a matrix
of the same form as \code{x}. When \code{newdata}
is missing, then \code{object$root} is used.}
\item{amat}{if \code{zeta} is the requested prediction (mean value
or canonical, unconditional or conditional, depending on \code{parm.type}
and \code{model.type}), then we predict the linear function
\code{t(amat) \%*\% zeta}. May be missing, in which case the identity
linear function is used.
For \code{predict.aster}, a three-dimensional
array with \code{dim(amat)[1:2] == dim(modmat)[1:2]}.
For \code{predict.aster.formula}, a three-dimensional array
of the same dimensions as required for \code{predict.aster}
(even though \code{modmat} is not provided). First dimension
is number of individuals in \code{newdata}, if provided, otherwise
number of individuals in \code{object$data}. Second dimension
is number of variables (\code{length{object$pred})}.
}
\item{parm.type}{the type of parameter to predict. The default is
mean value parameters (the opposite of the default
for \code{\link{predict.glm}}), the expected value of a linear function
of the response under the MLE probability model (also called the
MLE of the mean value parameter). The expectation is unconditional
or conditional depending on \code{parm.type}.
The alternative \code{"canonical"} is the value of a linear function
of the MLE of canonical parameters under the MLE probability model.
The canonical parameter is unconditional
or conditional depending on \code{parm.type}.
The value of this argument can be abbreviated.
}
\item{model.type}{the type of model in which to predict. The default is
\code{"unconditional"} in which case the parameters (either mean value
or canonical, depending on the value of \code{parm.type}) are those
of an unconditional model.
The alternative is \code{"conditional"} in which case the parameters
are those of a conditional model.
The value of this argument can be abbreviated.
}
\item{se.fit}{logical switch indicating if standard errors are required.}
\item{info}{the type of Fisher information use to compute standard errors.}
\item{info.tol}{tolerance for eigenvalues of Fisher information.
If \code{eval} is the vector of eigenvalues of the information matrix,
then \code{eval < cond.tol * max(eval)} are considered zero. Hence the
corresponding eigenvectors are directions of constancy or recession of
the log likelihood.}
\item{newdata}{optionally, a data frame in which to look for variables with
which to predict. If omitted, see \code{modmat} above. See also details
section below.}
\item{varvar}{a variable of length \code{nrow(newdata)}, typically a
variable in \code{newdata}
that is a factor whose levels are character strings
treated as variable names. The number of variable names is \code{nnode}.
Must be of the form \code{rep(vars, each = nind)} where \code{vars} is
a vector of variable names. Not used if \code{newdata} is missing.}
\item{idvar}{a variable of length \code{nrow(newdata)}, typically a
variable in \code{newdata}
that indexes individuals. The number
of individuals is \code{nind}.
Must be of the form \code{rep(inds, times = nnode)} where \code{inds} is
a vector of labels for individuals. Not used if \code{newdata} is missing.}
\item{newcoef}{if not \code{NULL},
a variable of length \code{object$coefficients} and used
in its place when one wants predictions at other than the fitted
coefficient values.}
\item{\dots}{further arguments passed to or from other methods.}
}
\description{
Obtains predictions and optionally estimates standard errors of those
predictions from a fitted Aster model object.
}
\details{
Note that \code{model.type} need have nothing to do with the type
of the fitted aster model, which is \code{object$type}.
Whether the
fitted model is conditional or unconditional, one typically wants
\emph{unconditional} mean value parameters, because conditional mean
value parameters for hypothetical individuals depend on the hypothetical
data \code{x}, which usually makes no scientific sense.
If one does ask for \emph{conditional} mean value parameters, generally
the \dQuote{data} should satisfy \code{all(x == 1)} and \code{all(root == 1)},
so that the mean value parameters
are \dQuote{per unit of predecessor variable}, that is we \dQuote{predict}
\eqn{\psi''(\theta_{i j})}{psi''(theta[i, j])} rather than this multiplied
by \eqn{X_{i p(j)}}{x[i, p(j)]}, where \eqn{p(j)} is the mathematical
function defined by the R expression \code{pred[j]}.
Similarly, if \code{object$type == "conditional"}, then the conditional
canonical parameters are a linear function of the regression coefficients
\eqn{\theta = M \beta}{theta = M beta}, where \eqn{M} is the model matrix,
but one can predict either \eqn{\theta}{theta} or the unconditional
canonical parameters \eqn{\varphi}{phi},
as selected by \code{model.type}.
Similarly, if \code{object$type == "unconditional"},
so \eqn{\varphi = M \beta}{phi = M beta}, one can predict either
\eqn{\theta}{theta} or \eqn{\varphi}{phi}
as selected by \code{model.type}.
The specification of the prediction model is confusing because there
are so many possibilities. First the \dQuote{usual} case.
The fit was done using a formula, found in \code{object$formula}.
A data frame \code{newdata} that has the same variables as \code{object$data},
the data frame used in the fit, but may have different rows (representing
hypothetical individuals) is supplied.
But \code{newdata} must specify \emph{all nodes}
of the graphical model for each (hypothetical, new) individual,
just like \code{object$data} did for real observed individuals.
Hence \code{newdata} is typically constructed using \code{\link{reshape}}.
See also the details section of \code{\link{aster}}.
In this \dQuote{usual} case we need \code{varvar} and \code{idvar} to
tell us what rows of \code{newdata} correspond to which individuals and
nodes (the same role they played in the original fit by \code{\link{aster}}).
If we are predicting canonical parameters, then we do not need \code{root} or
\code{x}.
If we are predicting unconditional mean value parameters, then
we also need \code{root} but not \code{x}.
If we are predicting conditional mean value parameters, then
we also need both \code{root} and \code{x}.
In the \dQuote{usual} case, these are found in \code{newdata} and
not supplied as arguments to \code{predict}. Moreover, \code{x}
is not named \code{"x"} but is the response in \code{out$formula}.
The next case, \code{predict(object)} with no other arguments,
is often used with linear models (\code{\link{predict.lm}}),
but we expect will be little used for aster models. As for linear
models, this \dQuote{predicts} the observed data. In this case
\code{modmat}, \code{x}, and \code{root} are found in \code{object}
and nothing is supplied as an argument to \code{predict.aster}, except
perhaps \code{amat} if one wants a function of predictions for the observed
data.
The final case, also perhaps little used, is a fail-safe mode for problems
in which the R formula language just cannot be bludgeoned into doing what
you want. This is the same reason \code{\link{aster.default}} exists.
Then a model matrix can be constructed \dQuote{by hand}, and the function
\code{predict.aster} is used instead of \code{predict.aster.formula}.
Note that it is possible to use a \dQuote{constructed by hand}
model matrix even if \code{object} was produced by \code{\link{aster.formula}}.
Simply explicitly call \code{predict.aster} rather than \code{predict}
to override the R method dispatch (which would
call \code{predict.aster.formula} in this case).
}
\value{
If \code{se.fit = FALSE}, a vector of predictions.
If \code{se.fit = TRUE}, a list with components
\item{fit}{Predictions}
\item{se.fit}{Estimated standard errors}
}
\examples{
### see package vignette for explanation ###
library(aster)
data(echinacea)
vars <- c("ld02", "ld03", "ld04", "fl02", "fl03", "fl04",
"hdct02", "hdct03", "hdct04")
redata <- reshape(echinacea, varying = list(vars), direction = "long",
timevar = "varb", times = as.factor(vars), v.names = "resp")
redata <- data.frame(redata, root = 1)
pred <- c(0, 1, 2, 1, 2, 3, 4, 5, 6)
fam <- c(1, 1, 1, 1, 1, 1, 3, 3, 3)
hdct <- grep("hdct", as.character(redata$varb))
hdct <- is.element(seq(along = redata$varb), hdct)
redata <- data.frame(redata, hdct = as.integer(hdct))
aout4 <- aster(resp ~ varb + nsloc + ewloc + pop * hdct - pop,
pred, fam, varb, id, root, data = redata)
newdata <- data.frame(pop = levels(echinacea$pop))
for (v in vars)
newdata[[v]] <- 1
newdata$root <- 1
newdata$ewloc <- 0
newdata$nsloc <- 0
renewdata <- reshape(newdata, varying = list(vars),
direction = "long", timevar = "varb", times = as.factor(vars),
v.names = "resp")
hdct <- grep("hdct", as.character(renewdata$varb))
hdct <- is.element(seq(along = renewdata$varb), hdct)
renewdata <- data.frame(renewdata, hdct = as.integer(hdct))
nind <- nrow(newdata)
nnode <- length(vars)
amat <- array(0, c(nind, nnode, nind))
for (i in 1:nind)
amat[i , grep("hdct", vars), i] <- 1
foo <- predict(aout4, varvar = varb, idvar = id, root = root,
newdata = renewdata, se.fit = TRUE, amat = amat)
bar <- cbind(foo$fit, foo$se.fit)
dimnames(bar) <- list(as.character(newdata$pop), c("Estimate", "Std. Error"))
print(bar)
}
\keyword{models}
\keyword{regression}