# mpl.R # # $Revision: 5.19 $ $Date: 2005/04/12 20:27:05 $ # # mpl.engine() # Fit a point process model to a two-dimensional point pattern # by maximum pseudolikelihood # # mpl.prepare() # set up data for glm procedure # # ------------------------------------------------------------------- # "mpl" <- function(Q, trend = ~1, interaction = NULL, data = NULL, correction="border", rbord = 0, use.gam=FALSE) { .Deprecated("ppm", package="spatstat") ppm(Q, trend, interaction, data, correction, rbord, use.gam, method="mpl") } "mpl.engine" <- function(Q, trend = ~1, interaction = NULL, covariates = NULL, correction="border", rbord = 0, use.gam=FALSE, forcefit=FALSE ) { # # Extract quadrature scheme # if(verifyclass(Q, "ppp", fatal = FALSE)) { # warning("using default quadrature scheme") Q <- quadscheme(Q) } else if(!verifyclass(Q, "quad", fatal=FALSE)) stop("First argument Q should be a quadrature scheme") # # Data points X <- Q$data # # Data and dummy points together P <- union.quad(Q) # # # Interpret the call want.trend <- !is.null(trend) && !identical.formulae(trend, ~1) want.inter <- !is.null(interaction) && !is.null(interaction$family) the.version <- list(major=1, minor=5, release=7, date="$Date: 2005/04/12 20:27:05 $") if(use.gam && exists("is.R") && is.R()) require(mgcv) if(!want.trend && !want.inter && !forcefit) { # the model is the uniform Poisson process # The MPLE (= MLE) can be evaluated directly npts <- X$n volume <- area.owin(X$window) * markspace.integral(X) lambda <- npts/volume theta <- list("log(lambda)"=log(lambda)) maxlogpl <- npts * (log(lambda) - 1) rslt <- list( method = "mpl", theta = theta, coef = theta, trend = NULL, interaction = NULL, Q = Q, maxlogpl = maxlogpl, internal = list(), covariates = NULL, correction = correction, rbord = rbord, version = the.version) class(rslt) <- "ppm" return(rslt) } ################# P r e p a r e D a t a ###################### prep <- mpl.prepare(Q, X, P, trend, interaction, covariates, want.trend, want.inter, correction, rbord) fmla <- prep$fmla glmdata <- prep$glmdata ################# F i t i t #################################### # Fit the generalized linear/additive model. if(want.trend && use.gam) FIT <- gam(fmla, family=quasi(link=log, var=mu), weights=.mpl.W, data=glmdata, subset=(.mpl.SUBSET=="TRUE"), control=gam.control(maxit=50)) else FIT <- glm(fmla, family=quasi(link=log, var=mu), weights=.mpl.W, data=glmdata, subset=(.mpl.SUBSET=="TRUE"), control=glm.control(maxit=50)) ################ I n t e r p r e t f i t ####################### # Fitted coefficients co <- FIT$coef theta <- if(exists("is.R") && is.R()) NULL else dummy.coef(FIT) W <- glmdata$.mpl.W SUBSET <- glmdata$.mpl.SUBSET Z <- is.data(Q) Vnames <- prep$Vnames # attained value of max log pseudolikelihood maxlogpl <- -(deviance(FIT)/2 + sum(log(W[Z & SUBSET])) + sum(Z & SUBSET)) ###################################################################### # Clean up & return rslt <- list( method = "mpl", theta = theta, coef = co, trend = if(want.trend) trend else NULL, interaction = if(want.inter) interaction else NULL, Q = Q, maxlogpl = maxlogpl, internal = list(glmfit=FIT, glmdata=glmdata, Vnames=Vnames), covariates = covariates, correction = correction, rbord = rbord, version = the.version) class(rslt) <- "ppm" return(rslt) } ########################################################################## ### ///////////////////////////////////////////////////////////////////// ########################################################################## mpl.prepare <- function(Q, X, P, trend, interaction, covariates, want.trend, want.inter, correction, rbord, ...) { if(missing(want.trend)) want.trend <- !is.null(trend) && !identical.formulae(trend, ~1) if(missing(want.inter)) want.inter <- !is.null(interaction) && !is.null(interaction$family) # Validate/evaluate covariates if(want.trend && !is.null(covariates)) covariates.df <- mpl.get.covariates(covariates, P, "quadrature points") ################ C o m p u t e d a t a #################### ### Form the weights and the ``response variable''. .mpl <- list() .mpl$W <- w.quad(Q) .mpl$Z <- is.data(Q) .mpl$Y <- .mpl$Z/.mpl$W .mpl$MARKS <- marks.quad(Q) # is NULL for unmarked patterns glmdata <- data.frame(.mpl.W = .mpl$W, .mpl.Y = .mpl$Y) n <- nrow(glmdata) .mpl$SUBSET <- rep(TRUE, n) internal.names <- c(".mpl.W", ".mpl.Y", ".mpl.Z", ".mpl.SUBSET", "SUBSET", ".mpl") reserved.names <- c("x", "y", "marks", internal.names) zeroes <- attr(.mpl$W, "zeroes") if(!is.null(zeroes)) .mpl$SUBSET <- !zeroes ####################### T r e n d ############################## check.clashes <- function(forbidden, offered, where) { name.match <- outer(forbidden, offered, "==") if(any(name.match)) { is.matched <- apply(name.match, 2, any) matched.names <- (offered)[is.matched] if(sum(is.matched) == 1) { return(paste("The variable \"", matched.names, "\" in ", where, " is a reserved name", sep="")) } else { return(paste("The variables \"", paste(matched.names, collapse="\", \""), "\" in ", where, " are reserved names", sep="")) } } return("") } if(want.trend) { # Check for use of internal names in trend cc <- check.clashes(internal.names, termsinformula(trend), "the model formula") if(cc != "") stop(cc) # Default explanatory variables for trend glmdata <- data.frame(glmdata, x=P$x, y=P$y) if(!is.null(.mpl$MARKS)) glmdata <- data.frame(glmdata, marks=.mpl$MARKS) # if(!is.null(covariates)) { # Check for duplication of reserved names cc <- check.clashes(reserved.names, names(covariates), "\'covariates\'") if(cc != "") stop(cc) # Append `covariates.df' to `glmdata' glmdata <- data.frame(glmdata,covariates.df) } } ###################### I n t e r a c t i o n #################### Vnames <- NULL if(want.inter) { verifyclass(interaction, "interact") # Calculations require a matrix (data) x (data + dummy) indicating equality E <- equals.quad(Q) # Form the matrix of "regression variables" V. # The rows of V correspond to the rows of P (quadrature points) # while the column(s) of V are the regression variables (log-potentials) V <- interaction$family$eval(X, P, E, interaction$pot, interaction$par, correction, ...) if(!is.matrix(V)) stop("interaction evaluator did not return a matrix") # Augment data frame by appending the regression variables for interactions. # # If there are no names provided for the columns of V, # call them "Interact.1", "Interact.2", ... if(is.null(dimnames(V)[[2]])) { # default names nc <- ncol(V) dimnames(V) <- list(dimnames(V)[[1]], if(nc == 1) "Interaction" else paste("Interact.", 1:nc, sep="")) } # List of interaction variable names Vnames <- dimnames(V)[[2]] # Check for name clashes between the interaction variables # and the formula cc <- check.clashes(Vnames, termsinformula(trend), "model formula") if(cc != "") stop(cc) # and with the variables in 'covariates' if(!is.null(covariates)) { cc <- check.clashes(Vnames, names(covariates), "\'covariates\'") if(cc != "") stop(cc) } # OK. append variables. glmdata <- data.frame(glmdata, V) # Keep only those quadrature points for which the # conditional intensity is nonzero. #KEEP <- apply(V != -Inf, 1, all) .mpl$KEEP <- matrowall(V != -Inf) .mpl$SUBSET <- .mpl$SUBSET & .mpl$KEEP if(any(.mpl$Z & !.mpl$KEEP)) { howmany <- sum(.mpl$Z & !.mpl$KEEP) warning(paste(howmany, "data point(s) are illegal (zero conditional intensity under the model)")) # browser() } } ################## D a t a f r a m e ################### # Determine the domain of integration for the pseudolikelihood. if(correction == "border") { bd <- bdist.points(P) .mpl$DOMAIN <- (bd >= rbord) .mpl$SUBSET <- .mpl$DOMAIN & .mpl$SUBSET } glmdata <- data.frame(glmdata, .mpl.SUBSET=.mpl$SUBSET) ################# F o r m u l a ################################## if(!want.trend) trend <- ~1 trendpart <- paste(as.character(trend), collapse=" ") rhs <- paste(c(trendpart, Vnames), collapse= "+") fmla <- paste(".mpl.Y ", rhs) fmla <- as.formula(fmla) #### return(list(fmla=fmla, glmdata=glmdata, Vnames=Vnames)) } #################################################################### #################################################################### mpl.get.covariates <- function(covariates, locations, type="") { x <- locations$x y <- locations$y if(is.null(x) || is.null(y)) { xy <- xy.coords(locations) x <- xy$x y <- xy$y } if(is.null(x) || is.null(y)) stop("Can't interpret \`locations\' as x,y coordinates") n <- length(x) if(is.data.frame(covariates)) { if(nrow(covariates) != n) stop(paste("Number of rows in \`covariates\' != number of", type)) return(covariates) } else if(is.list(covariates)) { if(!all(unlist(lapply(covariates, is.im)))) stop("Some entries in the list \`covariates\' are not images") if(any(names(covariates) == "")) stop("Some entries in the list \`covariates\' are un-named") # look up values of each covariate image at the quadrature points values <- lapply(covariates, lookup.im, x=x, y=y, naok=TRUE) return(as.data.frame(values)) } else stop("\`covariates\' must be either a data frame or a list of images") }