Content - 29f1999cc661183782cf410d0b4a4d5dc1d4e56e - 5cced8f/R/sreg.family.R

visit type:
https://github.com/cran/fields

13 September 2023, 07:07:31 UTC
Tip revision: 6c8b30169bba182a68765ee3cb9b4e2ef7d38332 authored by Doug Nychka on 16 November 2011, 00:00:00 UTC
version 6.6.3
Tip revision: 6c8b301
sreg.family.R
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"gcv.sreg" <- function(out, lambda.grid = NA, cost = 1, 
    nstep.cv = 80, rmse = NA, offset = 0, trmin = NA, trmax = NA, 
    verbose = FALSE, tol = 1e-05) {
    shat.pure.error <- out$shat.pure.error
    pure.ss <- out$pure.ss
    nt <- 2
    np <- out$np
    N <- out$N
    out$cost <- cost
    out$offset <- offset
    # find good end points for lambda coarse grid.
    if (is.na(trmin)) 
        trmin <- 2.05
    if (is.na(trmax)) 
        trmax <- out$np * 0.95
    if (verbose) {
        cat("trmin and trmax", fill = TRUE)
        cat(trmin, trmax, fill = TRUE)
    }
    if (is.na(lambda.grid[1])) {
        l2 <- sreg.df.to.lambda(trmax, out$xM, out$weightsM)
        l1 <- sreg.df.to.lambda(trmin, out$xM, out$weightsM)
        lambda.grid <- exp(seq(log(l2), log(l1), , nstep.cv))
    }
    if (verbose) {
        cat("endpoints of coarse lamdba grid", fill = TRUE)
        cat(l1, l2, fill = TRUE)
    }
    # build up table of coarse grid serach results for lambda
    # in the matrix gcv.grid
    nl <- length(lambda.grid)
    V <- V.model <- V.one <- trA <- MSE <- RSS.model <- rep(NA, 
        nl)
    #   loop through lambda's and compute various quantities related to
    #   lambda and the fitted spline.
    for (k in 1:nl) {
        temp <- sreg.fit(lambda.grid[k], out, verbose = verbose)
        RSS.model[k] <- temp$rss
        trA[k] <- temp$trace
        V[k] <- temp$gcv
        V.one[k] <- temp$gcv.one
        V.model[k] <- temp$gcv.model
    }
    # adjustments to columns of gcv.grid
    RSS <- RSS.model + pure.ss
    shat <- sqrt(RSS/(N - trA))
    gcv.grid <- cbind(lambda.grid, trA, V, V.one, V.model, shat)
    dimnames(gcv.grid) <- list(NULL, c("lambda", "trA", "GCV", 
        "GCV.one", "GCV.model", "shat"))
    if (verbose) {
        cat("Results of coarse grid search", fill = TRUE)
        print(gcv.grid)
    }
    lambda.est <- matrix(ncol = 4, nrow = 5, dimnames = list(c("GCV", 
        "GCV.model", "GCV.one", "RMSE", "pure error"), c("lambda", 
        "trA", "GCV", "shat")))
    # now do various refinements for different flavors of finding
    # a good value for lambda the smoothing parameter
    ##### traditional leave-one-out
    lambda.est[1, 1] <- Krig.find.gcvmin(out, lambda.grid, gcv.grid[, 
        "GCV"], sreg.fgcv, tol = tol, verbose = verbose)
    if (verbose) {
        cat("leave one out GCV lambda", fill = TRUE)
        cat(lambda.est[1, 1], fill = TRUE)
    }
    ##### using GCV criterion adjusting for replicates
    if (!is.na(shat.pure.error)) {
        lambda.est[2, 1] <- Krig.find.gcvmin(out, lambda.grid, 
            gcv.grid[, "GCV.model"], sreg.fgcv.model, tol = tol, 
            verbose = verbose)
        if (verbose) {
            cat("results of GCV.model search", fill = TRUE)
            cat(lambda.est[2, 1], fill = TRUE)
        }
    }
    lambda.est[3, 1] <- Krig.find.gcvmin(out, lambda.grid, gcv.grid[, 
        "GCV.one"], sreg.fgcv.one, tol = tol, verbose = verbose)
    ##### matching an external value of RMSE
    lambda.rmse <- NA
    lambda.pure.error <- NA
    if (!is.na(rmse)) {
        guess <- max(gcv.grid[gcv.grid[, "shat"] < rmse, "lambda"])
        if (verbose) {
            cat("trying to matching with RMSE", fill = TRUE)
            cat("rmse", rmse, "guess", guess, fill = TRUE)
        }
        if (!is.na(guess)) {
            lambda.rmse <- find.upcross(sreg.fs2hat, out, upcross.level = rmse^2, 
                guess = guess, tol = tol * rmse^2)
            lambda.est[4, 1] <- lambda.rmse
        }
        else {
            warning("Value of rmse is outside possible range")
        }
    }
    ##### matching  sigma estimated from the replicates.
    if (!is.na(shat.pure.error)) {
        #       all shats smaller than pure error estimate
        guess <- gcv.grid[, "shat"] < shat.pure.error
        #       set to NA a bad guess!
        if (any(guess)) {
            guess <- max(gcv.grid[guess, "lambda"])
        }
        else {
            guess <- NA
        }
        if (verbose) {
            cat("#### trying to matching with sigma from pure error", 
                fill = TRUE)
            cat("shat.pure", shat.pure.error, "guess", guess, 
                fill = TRUE)
        }
        if (!is.na(guess)) {
            lambda.pure.error <- find.upcross(sreg.fs2hat, out, 
                upcross.level = shat.pure.error^2, guess = guess, 
                tol = tol * shat.pure.error^2)
            lambda.est[5, 1] <- lambda.pure.error
            if (verbose) {
                cat("results of matching with pure error sigma", 
                  fill = TRUE)
            }
        }
        else {
            warning("Value of pure error estimate\nis outside possible range")
        }
    }
    if (verbose) {
        cat("All forms of estimated lambdas so far", fill = TRUE)
        print(lambda.est)
    }
    for (k in 1:5) {
        lam <- lambda.est[k, 1]
        if (!is.na(lam)) {
            temp <- sreg.fit(lam, out)
            lambda.est[k, 2] <- temp$trace
            if ((k == 1) | (k > 3)) {
                lambda.est[k, 3] <- temp$gcv
            }
            if (k == 2) {
                lambda.est[k, 3] <- temp$gcv.model
            }
            if (k == 3) {
                lambda.est[k, 3] <- temp$gcv.one
            }
            lambda.est[k, 4] <- temp$shat
        }
    }
    list(gcv.grid = gcv.grid, lambda.est = lambda.est)
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg" <- function(x, y, lambda = NA, df = NA, offset = 0, 
    weights = rep(1, length(x)), cost = 1, nstep.cv = 80, tol = 1e-05, 
    find.diagA = TRUE, trmin = 2.01, trmax = NA, lammin = NA, 
    lammax = NA, verbose = FALSE, do.cv = TRUE, method = "GCV", 
    rmse = NA, na.rm = TRUE) {
    call <- match.call()
    out <- list()
    out$call <- match.call()
    class(out) <- c("sreg")
    out$cost <- cost
    out$offset <- offset
    out$method <- method
    #
    # some obscure components so that some of the Krig functions
    # work without size of 'null space'
    out$nt <- 2
    out$knots <- NULL
    #
    # various checks on x and y including looking for NAs.
    out2 <- Krig.check.xY(x, y, NULL, weights, na.rm, verbose = verbose)
    out <- c(out, out2)
    # find duplicate rows of the x vector
    # unique x values are now in out$xM and the means of
    # y are in out$yM.
    out <- Krig.replicates(out, verbose = verbose)
    out <- c(out, out2)
    # number of unique locations
    out$np <- length(out$yM)
    # now set maximum of trace for upper bound of GCV grid search
    if (is.na(trmax)) {
        trmax <- out$np * 0.99
    }
    if (verbose) {
        print(out)
    }
    #
    # sorted unique values for prediction to make line plotting quick
    xgrid <- sort(out$xM)
    out$trace <- NA
    #
    # figure out if the GCV function should be minimized
    # and which value of lambda should be used for the estimate
    # old code used lam as argument, copy it over from lambda
    lam <- lambda
    if (is.na(lam[1]) & is.na(df[1])) {
        do.cv <- TRUE
    }
    else {
        do.cv <- FALSE
    }
    #
    # find lambda's if df's are given
    if (!is.na(df[1])) {
        lam <- rep(0, length(df))
        for (k in 1:length(df)) {
            lam[k] <- sreg.df.to.lambda(df[k], out$xM, out$weightsM)
        }
    }
    #
    if (verbose) {
        cat("lambda grid", fill = TRUE)
        print(lam)
    }
    if (do.cv) {
        a <- gcv.sreg(out, lambda = lam, cost = cost, offset = offset, 
            nstep.cv = nstep.cv, verbose = verbose, trmin = trmin, 
            trmax = trmax, rmse = rmse)
        # if the spline is evaluated at the GCV solution
        # wipe out lam grid
        # and just use GCV lambda.
        out$gcv.grid <- a$gcv.grid
        out$lambda.est <- a$lambda.est
        #
        # save  GCV estimate if that is what is needed
        lam <- a$lambda.est[method, "lambda"]
        out$shat.GCV <- a$lambda.est[method, "shat"]
    }
    #
    # now evaluate spline at lambda either from specified grid or GCV value.
    b <- list()
    # lam can either be  a grid or just the GCV value
    NL <- length(lam)
    NG <- length(xgrid)
    h <- log(lam)
    residuals <- matrix(NA, ncol = NL, nrow = length(out$y))
    predicted <- matrix(NA, ncol = NL, nrow = NG)
    trace <- rep(NA, NL)
    job <- as.integer(c(0, 3, 0))
    if (find.diagA) {
        diagA <- matrix(NA, ncol = NL, nrow = out$np)
        # add switch to find diag of A.
        job <- as.integer(c(3, 3, 0))
    }
    for (k in 1:NL) {
        #
        # call cubic spline FORTRAN, this is nasty looking but fast.
        # note lambda is passed in log scale.
        # what the routine does is controlled by array job
        # spline solution evaluated at xgrid
        #
        b <- .Fortran("css", h = as.double(h[k]), npoint = as.integer(out$np), 
            x = as.double(out$xM), y = as.double(out$yM), wt = as.double(1/sqrt(out$weightsM)), 
            sy = as.double(rep(0, out$np)), trace = as.double(0), 
            diag = as.double(c(cost, offset, rep(0, (out$np - 
                2)))), cv = as.double(0), ngrid = as.integer(NG), 
            xg = as.double(xgrid), yg = as.double(rep(0, NG)), 
            job = as.integer(job), ideriv = as.integer(0), ierr = as.integer(0)) 
        if (find.diagA) {
            diagA[, k] <- b$diag
        }
        # note distinction between yM and y, xM and x
        # these are residuals at all data point locations not just the
        # unique set.
        trace[k] <- b$trace
        residuals[, k] <- out$y - splint(out$xM, b$sy, out$x)
        predicted[, k] <- b$yg
    }
    out$call <- call
    out$lambda <- lam
    out$do.cv <- do.cv
    out$residuals <- residuals
    out$trace <- trace
    out$fitted.values <- out$y - residuals
    out$predicted <- list(x = xgrid, y = predicted)
    if (length(lambda[1]) == 1) {
        out$eff.df <- out$trace[1]
    }
    if (find.diagA) {
        out$diagA <- diagA
    }
    class(out) <- "sreg"
    return(out)
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.df.to.lambda" <- function(df, x, wt, guess = 1, 
    tol = 1e-05) {
    if (is.na(df)) 
        return(NA)
    n <- length(unique(x))
    info <- list(x = x, wt = wt, df = df)
    if (df > n) {
        warning(" df too large to match a lambda value")
        return(NA)
    }
    h1 <- log(guess)
    ########## find upper lambda
    for (k in 1:25) {
        tr <- sreg.trace(h1, info)
        if (tr <= df) 
            break
        h1 <- h1 + 1.5
    }
    ########## find lower lambda
    h2 <- log(guess)
    for (k in 1:25) {
        tr <- sreg.trace(h2, info)
        if (tr >= df) 
            break
        h2 <- h2 - 1.5
    }
    out <- bisection.search(h1, h2, sreg.fdf, tol = tol, f.extra = info)$x
    exp(out)
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.fdf" <- function(h, info) {
    sreg.trace(h, info) - info$df
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.fgcv" <- function(lam, obj) {
    sreg.fit(lam, obj)$gcv
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.fgcv.model" <- function(lam, obj) {
    sreg.fit(lam, obj)$gcv.model
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.fgcv.one" <- function(lam, obj) {
    sreg.fit(lam, obj)$gcv.one
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.fit" <- function(lam, obj, verbose = FALSE) {
    np <- obj$np
    N <- obj$N
    nt <- 2
    if (is.null(obj$cost)) {
        cost <- 1
    }
    else {
        cost <- obj$cost
    }
    if (is.null(obj$offset)) {
        offset <- 0
    }
    else {
        offset <- obj$offset
    }
    if (is.null(obj$shat.pure.error)) {
        shat.pure.error <- 0
    }
    else {
        shat.pure.error <- obj$shat.pure.error
    }
    if (is.null(obj$pure.ss)) {
        pure.ss <- 0
    }
    else {
        pure.ss <- obj$pure.ss
    }
    #print(np)
    #\tNOTE h <- log(lam)
    temp <- .Fortran("css", h = as.double(log(lam)), npoint = as.integer(np), 
        x = as.double(obj$xM), y = as.double(obj$yM), wt = as.double(sqrt(1/obj$weightsM)), 
        sy = as.double(rep(0, np)), trace = as.double(0), diag = as.double(rep(0, 
            np)), cv = as.double(0), ngrid = as.integer(0), xg = as.double(0), 
        yg = as.double(0), job = as.integer(c(3, 0, 0)), ideriv = as.integer(0), 
        ierr = as.integer(0))
    rss <- sum((temp$sy - obj$yM)^2 * obj$weightsM)
    MSE <- rss/np
    if ((N - np) > 0) {
        MSE <- MSE + pure.ss/(N - np)
    }
    trA <- temp$trace
    den <- (1 - (cost * (trA - nt - offset) + nt)/np)
    den1 <- (1 - (cost * (trA - nt - offset) + nt)/N)
    # If the denominator is negative then flag this as a bogus case
    # by making the GCV function 'infinity'
    #
    shat <- sqrt((rss + pure.ss)/(N - trA))
    GCV <- ifelse(den > 0, MSE/den^2, NA)
    gcv.model <- ifelse((den > 0) & ((N - np) > 0), pure.ss/(N - 
        np) + (rss/np)/(den^2), NA)
    gcv.one <- ifelse(den > 0, ((pure.ss + rss)/N)/(den1^2), 
        NA)
    list(trace = trA, gcv = GCV, rss = rss, shat = shat, gcv.model = gcv.model, 
        gcv.one = gcv.one)
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.fs2hat" <- function(lam, obj) {
    sreg.fit(lam, obj)$shat^2
}
# fields, Tools for spatial data
# Copyright 2004-2011, Institute for Mathematics Applied Geosciences
# University Corporation for Atmospheric Research
# Licensed under the GPL -- www.gpl.org/licenses/gpl.html
"sreg.trace" <- function(h, info) {
    N <- length(info$x)
    #\th <- log(lam)
    temp <- (.Fortran("css", h = as.double(h), npoint = as.integer(N), 
        x = as.double(info$x), y = as.double(rep(0, N)), wt = as.double(1/sqrt(info$wt)), 
        sy = as.double(rep(0, N)), trace = as.double(0), diag = as.double(rep(0, 
            N)), cv = as.double(0), ngrid = as.integer(0), xg = as.double(0), 
        yg = as.double(0), job = as.integer(c(3, 0, 0)), ideriv = as.integer(0), 
        ierr = as.integer(0))$trace)
    return(temp)
}