# fields, Tools for spatial data # Copyright 2004-2007, Institute for Mathematics Applied Geosciences # University Corporation for Atmospheric Research # Licensed under the GPL -- www.gpl.org/licenses/gpl.html mKrig <- function(x, y, weights = rep(1, nrow(x)), Z=NULL, lambda = 0, cov.function = "stationary.cov", m = 2, chol.args = NULL, cov.args = NULL, find.trA = TRUE, NtrA = 20, iseed = 123, ...) { # grab other arguments for covariance function cov.args <- c(cov.args, list(...)) # see comments in Krig.engine.fixed for algorithmic commentary # # # default values for Cholesky decomposition, these are important # for sparse matrix decompositions # check for duplicate x's. # stop if there are any if (any(duplicated(cat.matrix(x)))) stop("locations are not unique see help(mKrig) ") if (any(is.na(y))) stop("Missing values in y should be removed") if(!is.null(Z)){ Z<- as.matrix(Z)} # create fixed part of model as m-1 order polynomial Tmatrix <- cbind(fields.mkpoly(x, m), Z) # set some dimensions np <- nrow(x) nt<- ncol(Tmatrix) nZ<- ifelse (is.null(Z), 0, ncol(Z)) ind.drift <- c(rep(TRUE, (nt-nZ)), rep( FALSE, nZ)) # as a place holder for reduced rank Kriging, distinguish between # observations locations and locations to evaluate covariance. # (this is will also predict.mKrig to handle a Krig object) knots <- x # covariance matrix at observation locations # NOTE: if cov.function is a sparse constuct then tempM will be sparse. # see e.g. wendland.cov tempM <- do.call(cov.function, c(cov.args, list(x1 = x, x2 = x))) # # decide how to handle the pivoting. # one wants to do pivoting if the matrix is sparse. # if tempM is not a matrix assume that it is in sparse format. # sparse.flag <- !is.matrix(tempM) # # set arguments that are passed to cholesky # if (is.null(chol.args)) { chol.args <- list(pivot = sparse.flag) } else { chol.args <- chol.args } # record sparsity of tempM nzero <- ifelse(sparse.flag, length(tempM@entries), np^2) # add diagonal matrix that is the observation error Variance # NOTE: diag must be a overloaded function to handle sparse format. if (lambda != 0) { diag(tempM) <- (lambda/weights) + diag(tempM) } # At this point tempM is proportional to the covariance matrix of the # observation vector, y. # # cholesky decoposition of tempM # do.call used to supply other arguments to the function # especially for sparse applications. # If chol.args is NULL then this is the same as # Mc<-chol(tempM) Mc <- do.call("chol", c(list(x = tempM), chol.args)) lnDetCov <- 2 * sum(log(diag(Mc))) # Efficent way to multply inverse of Mc times the Tmatrix VT <- forwardsolve(Mc, x = Tmatrix, transpose = TRUE, upper.tri = TRUE) qr.VT <- qr(VT) # start linear algebra to find solution # Note that all these expressions make sense if y is a matrix # of several data sets and one is solving for the coefficients # of all of these at once. In this case d.coef and c.coef are matrices # # now do generalized least squares for d d.coef <- as.matrix(qr.coef(qr.VT, forwardsolve(Mc, transpose = TRUE, y, upper.tri = TRUE))) # and then find c. # find the coefficents for the spatial part. c.coef <- as.matrix(forwardsolve(Mc, transpose = TRUE, y - Tmatrix %*% d.coef, upper.tri = TRUE)) # save intermediate result this is t(y- T d.coef)( M^{-1}) ( y- T d.coef) quad.form <- c(colSums(as.matrix(c.coef^2))) # find c coefficients c.coef <- backsolve(Mc, c.coef) # GLS covariance matrix for fixed part. Rinv <- solve(qr.R(qr.VT)) Omega <- Rinv %*% t(Rinv) # MLE estimate of rho and sigma # rhohat <- c(colSums(as.matrix(c.coef * y)))/(np - nt) rho.MLE <- quad.form/np rhohat<- c(colSums(as.matrix(c.coef * y)))/np shat.MLE <- sigma.MLE <- sqrt(lambda * rho.MLE) # the log profile likehood with rhohat and dhat substituted # leaving a profile for just lambda. # note that this is _not_ -2*loglike just the log and # includes the constants lnProfileLike <- (-np/2 - log(2*pi)*(np/2) - (np/2)*log(rho.MLE) - (1/2) * lnDetCov) # # return coefficients and include lambda as a check because # results are meaningless for other values of lambda # returned list is an 'object' of class mKrig (micro Krig) # also save the matrix decopositions so coefficients can be # recalculated for new y values. out <- list(d = (d.coef), c = (c.coef), nt = nt, np = np, lambda.fixed = lambda, x = x, knots = knots, cov.function.name = cov.function, args = cov.args, m = m, chol.args = chol.args, call = match.call(), nonzero.entries = nzero, shat.MLE = shat.MLE, rho.MLE = rho.MLE, sigma.MLE, rhohat = rhohat, lnProfileLike = lnProfileLike, lnDetCov = lnDetCov,quad.form = quad.form, Omega = Omega, qr.VT = qr.VT, Mc = Mc, Tmatrix = Tmatrix, ind.drift=ind.drift,nZ=nZ) # # find the residuals directly from solution # to avoid a call to predict out$residuals<- lambda*c.coef/weights out$fitted.values <- y - out$residuals # estimate effective degrees of freedom using Monte Carlo trace method. if (find.trA) { out2 <- mKrig.trace(out, iseed, NtrA) out$eff.df <- out2$eff.df out$trA.info <- out2$trA.info out$GCV <- (sum(out$residuals^2)/np)/(1 - out2$eff.df/np)^2 if (NtrA < np) { out$GCV.info <- (sum(out$residuals^2)/np)/(1 - out2$trA.info/np)^2 } else { out$GCV.info <- NA } } else { out$eff.df <- NA out$trA.info <- NA out$GCV <- NA } class(out) <- "mKrig" return(out) } mKrig.trace <- function(object, iseed, NtrA) { set.seed(iseed) # if more tests that number of data points just # find A exactly by np predicts. if (NtrA >= object$np) { Ey <- diag(1, object$np) NtrA <- object$np trA.info <- diag(predict.mKrig(object, ynew = Ey)) trA.est <- sum(trA.info) } else { # if fewer tests then use random trace method # find fitted.values for iid N(0,1) 'data' to calculate the # the Monte Carlo estimate of tr A(lambda) # basically repeat the steps above but take some # short cuts because we only need fitted.values # create random normal 'data' Ey <- matrix(rnorm(object$np * NtrA), nrow = object$np, ncol = NtrA) trA.info <- colSums(Ey * (predict.mKrig(object, ynew = Ey))) trA.est <- mean(trA.info) } return(list(trA.info = trA.info, eff.df = trA.est)) } mKrig.coef <- function(object, y) { # given new data y and the matrix decompositions in the # mKrig object find coefficients d and c. # d are the coefficients for the fixed part # in this case hard coded for a low order polynomial # c are coefficients for the basis functions derived from the # covariance function. # # see mKrig itself for more comments on the linear algebra # # Note that all these expressions make sense if y is a matrix # of several data sets and one is solving for the coefficients # of all of these at once. In this case d.coef and c.coef are matrices # # generalized least squares for d d.coef <- qr.coef(object$qr.VT, forwardsolve(object$Mc, transpose = TRUE, y, upper.tri = TRUE)) # residuals from subtracting off fixed part # of model as m-1 order polynomial resid <- y - object$Tmatrix %*% d.coef # and now find c. c.coef <- forwardsolve(object$Mc, transpose = TRUE, resid, upper.tri = TRUE) c.coef <- backsolve(object$Mc, c.coef) out <- list(d = (d.coef), c = (c.coef)) return(out) } print.mKrig <- function(x, digits = 4, ...) { if( is.matrix(x$residuals)){ n<- nrow(x$residuals) NData<- ncol(x$residuals)} else{ n<- length( x$residuals) NData<-1 } c1 <- "Number of Observations:" c2 <- n if( NData>1){ c1<- c( c1,"Number of data sets fit:") c2<- c( c2, NData)} c1 <- c(c1, "Degree of polynomial null space ( base model):") c2 <- c(c2, x$m - 1) c1 <- c(c1, "Total number of parameters in base model") c2 <- c(c2, x$nt) if( x$nZ>0){ c1 <- c(c1, "Number of additional covariates (Z)") c2 <- c(c2, x$nZ)} if (!is.na(x$eff.df)) { c1 <- c(c1, " Eff. degrees of freedom") c2 <- c(c2, signif(x$eff.df, digits)) if (length(x$trA.info) < x$np) { c1 <- c(c1, " Standard Error of estimate: ") c2 <- c(c2, signif(sd(x$trA.info)/sqrt(length(x$trA.info)), digits)) } } c1 <- c(c1, "Smoothing parameter") c2 <- c(c2, signif(x$lambda.fixed, digits)) if( NData==1){ c1 <- c(c1, "MLE sigma ") c2 <- c(c2, signif(x$shat.MLE, digits)) c1 <- c(c1, "MLE rho") c2 <- c(c2, signif(x$rho.MLE, digits))} c1 <- c(c1, "Nonzero entries in covariance") c2 <- c(c2, x$nonzero.entries) sum <- cbind(c1, c2) dimnames(sum) <- list(rep("", dim(sum)[1]), rep("", dim(sum)[2])) cat("Call:\n") dput(x$call) print(sum, quote = FALSE) cat(" ", fill=TRUE) cat(" Covariance Model:", x$cov.function, fill = TRUE) if (x$cov.function == "stationary.cov") { cat(" Covariance function: ", ifelse(is.null(x$args$Covariance), "Exponential", x$args$Covariance), fill = TRUE) } if (!is.null(x$args)) { cat(" Non-default covariance arguments and their values ", fill = TRUE) nlist <- as.character(names(x$args)) NL <- length(nlist) for (k in 1:NL) { cat(" Argument:", nlist[k]," " ) if(object.size(x$args[[k]]) <= 1024) { cat("has the value(s): ",fill=TRUE) print(x$args[[k]])} else{ cat("too large to print value, size > 1K ...", fill =TRUE)} } } invisible(x) } summary.mKrig<- function(object,...){ print.mKrig( object,...)} predict.mKrig <- function(object, xnew = NULL, ynew = NULL, derivative = 0, Z=NULL, drop.Z=FALSE,just.fixed=FALSE,...) { # the main reason to pass new args to the covariance is to increase # the temp space size for sparse multiplications # other optional arguments from mKrig are passed along in the # list object$args cov.args <- list(...) # predict at observation locations by default if (is.null(xnew)) { xnew <- object$x } if (is.null(Z)) { Z <- object$Tmatrix[,!object$ind.drift] } if (!is.null(ynew)) { coef.hold <- mKrig.coef(object, ynew) c.coef <- coef.hold$c d.coef <- coef.hold$d } else { c.coef <- object$c d.coef <- object$d } # fixed part of the model this a polynomial of degree m-1 # Tmatrix <- fields.mkpoly(xnew, m=object$m) # if (derivative == 0) { if( drop.Z|object$nZ==0){ # just evaluate polynomial and not the Z covariate temp1 <- fields.mkpoly(xnew, m = object$m) %*% d.coef[object$ind.drift,]} else{ temp1<- cbind( fields.mkpoly(xnew, m = object$m),Z) %*% d.coef} } else { if(!drop.Z & object$nZ>0){ stop("derivative not supported with Z covariate included")} temp1 <- fields.derivative.poly(xnew, m = object$m, d.coef[object$ind.drift,]) } if( just.fixed){ return(temp1)} # add nonparametric part. Covariance basis functions # times coefficients. # syntax is the name of the function and then a list with # all the arguments. This allows for different covariance functions # that have been passed as their name. if (derivative == 0) { # argument list are the parameters and other options from mKrig # locations and coefficients, temp2 <- do.call(object$cov.function.name, c(object$args, list(x1 = xnew, x2 = object$knots, C = c.coef), cov.args)) } else { temp2 <- do.call(object$cov.function.name, c(object$args, list(x1 = xnew, x2 = object$knots, C = c.coef, derivative = derivative), cov.args)) } # add two parts together and coerce to vector return((temp1 + temp2)) }