fRegress <- function(y, ...){ UseMethod('fRegress') } fRegress.fd <- function(y, xfdlist, betalist, wt=NULL, y2cMap=NULL, SigmaE=NULL, returnMatrix=FALSE, ...){ yfdPar <- fdPar(y, ...) fRegress.fdPar(yfdPar, xfdlist, betalist, wt=wt, y2cMap=y2cMap, SigmaE=SigmaE, returnMatrix=FALSE, ...) } fRegress.fdPar <- function(y, xfdlist, betalist, wt=NULL, y2cMap=NULL, SigmaE=NULL, returnMatrix=FALSE, ...) { # FREGRESS Fits a functional linear model using multiple # functional independent variables with the dependency being # pointwise or concurrent. # The case of a scalar independent variable is included by treating # it as a functional independent variable with a constant basis # and a unit coefficient. # # Arguments: # YFDPAR ... an object for the dependent variable, # which may be: # a functional data object, # a functional parameter (fdPar) object, or # a vector # XFDLIST ... a list object of length p with each list # containing an object for an independent variable. # the object may be: # a functional data object or # a vector # if XFDLIST is a functional data object or a vector, # it is converted to a list of length 1. # BETALIST ... a list object of length p with each list # containing a functional parameter object for # the corresponding regression function. If any of # these objects is a functional data object, it is # converted to the default functional parameter object. # if BETALIST is a functional parameter object # it is converted to a list of length 1. # WT ... a vector of nonnegative weights for observations # Y2CMAP ... the matrix mapping from the vector of observed values # to the coefficients for the dependent variable. # This is output by function SMOOTH_BASIS. If this is # supplied, confidence limits are computed, otherwise not. # SIGMAE ... Estimate of the covariances among the residuals. This # can only be estimated after a preliminary analysis # with FREGRESS. # RETURNMATRIX ... If False, a matrix in sparse storage model can be returned # from a call to function BsplineS. See this function for # enabling this option. # # Returns FREGRESSLIST ... A list containing seven members with names: # yfdPar ... first argument of FREGRESS # xfdlist ... second argument of FREGRESS # betalist ... third argument of FREGRESS # betaestlist ... estimated regression functions # yhatfdobj ... functional data object containing fitted functions # Cmatinv ... inverse of the coefficient matrix, needed for # function FREGRESS.STDERR that computes standard errors # wt ... weights for observations # df ... degrees of freedom for fit # Last modified 8 May 2012 by Jim Ramsay ## ## 1. check YFDPAR and compute sample size N ## yfdPar <- y if (inherits(yfdPar, "fd")) yfdPar <- fdPar(yfdPar) if (!(inherits(yfdPar, "fdPar") || inherits(yfdPar, "numeric"))) stop("First argument is not of class 'fd', 'fdPar' or 'numeric'.") if (inherits(yfdPar, "fdPar")) { yfd <- yfdPar$fd ycoef <- yfd$coefs ydim <- dim(ycoef) if(length(ydim)>2) stop('fRegress.fdPar only supports a univariate yfdPar; ', 'dim(yfdPar$fd$coefs) = ', paste(ydim, collapse=', ')) N <- ydim[2] } ## ## 2. get number of independent variables p & check betalist ## p <- length(xfdlist) # check BETALIST if (inherits(betalist, "fd")) betalist <- list(betalist) if (!inherits(betalist, "list")) stop("Argument BETALIST is not a list object.") if (length(betalist) != p) stop("Number of regression coefficients does not match\n", " the number of independent variables.") berror <- FALSE for (j in 1:p) { betafdParj <- betalist[[j]] if (inherits(betafdParj, "fd") || inherits(betafdParj, "basisfd")){ betafdParj <- fdPar(betafdParj) betalist[[j]] <- betafdParj } if (!inherits(betafdParj, "fdPar")) { print(paste("BETALIST[[",j,"]] is not a FDPAR object.")) berror <- TRUE } } if (berror) stop("bad betalist.") betafd1 <- betalist[[1]]$fd betabasis1 <- betafd1$basis rangeval <- betabasis1$rangeval ## ## 3. check XFDLIST ## if (inherits(xfdlist, "fd") || inherits(xfdlist, "numeric")) xfdlist <- list(xfdlist) if (!inherits(xfdlist, "list")) stop("Argument XFDLIST is not a list object.") # check each xfdlist member. If the object is a vector of length N, # it is converted to a functional data object with a # constant basis onebasis <- create.constant.basis(rangeval) onesfd <- fd(1,onebasis) xerror <- FALSE for (j in 1:p) { xfdj <- xfdlist[[j]] if (inherits(xfdj, "fd")) { xcoef <- xfdj$coefs if (length(dim(xcoef)) > 2) stop(paste("Covariate",j,"is not univariate.")) # check size of coefficient array Nj <- dim(xcoef)[2] if (Nj != N) { print(paste("Incorrect number of replications in XFDLIST", "for covariate",j)) xerror = TRUE } } if (is.numeric(xfdj)) { if (!is.matrix(xfdj)) xfdj = as.matrix(xfdj) Zdimj <- dim(xfdj) if (Zdimj[1] != N) { print(paste("Vector in XFDLIST[[",j,"]] has wrong length.")) xerror = TRUE } if (Zdimj[2] != 1) { print(paste("Matrix in XFDLIST[[",j, "]] has more than one column.")) xerror = TRUE } xfdlist[[j]] <- fd(matrix(xfdj,1,N), onebasis) } if (!(inherits(xfdlist[[j]], "fd") || is.numeric(xfdlist[[j]]))) { print(paste("XFDLIST[[",j,"]] is neither an FD object nor numeric.")) xerror = TRUE } } if (xerror) stop("problem with xfdlist") # check weights { if(is.null(wt)){ wt <- rep(1, N) wtconstant <- TRUE } else { if (length(wt) != N) stop("Number of weights not equal to N.") if (any(wt < 0)) stop("Negative weights found.") wtconstant <- (var(wt) == 0) } } # ---------------------------------------------------------------- # branch depending on whether the dependent variable # is functional or scalar # ---------------------------------------------------------------- if (inherits(yfdPar, "fdPar")) { # ---------------------------------------------------------------- # YFDPAR is a functional parameter object # ---------------------------------------------------------------- # extract dependent variable information yfdobj <- yfdPar$fd ylambda <- yfdPar$lambda yLfdobj <- yfdPar$Lfd ycoef <- yfdobj$coefs ycoefdim <- dim(ycoef) N <- ycoefdim[2] ybasisobj <- yfdobj$basis rangeval <- ybasisobj$rangeval ynbasis <- ybasisobj$nbasis if (length(ycoefdim) > 2) stop("YFDOBJ from YFDPAR is not univariate.") # ----------------------------------------------------------- # set up the linear equations for the solution # ----------------------------------------------------------- # compute the total number of coefficients to be estimated ncoef <- 0 for (j in 1:p) { betafdParj <- betalist[[j]] if (betafdParj$estimate) { ncoefj <- betafdParj$fd$basis$nbasis ncoef <- ncoef + ncoefj } } Cmat <- matrix(0,ncoef,ncoef) Dmat <- rep(0,ncoef) # loop through rows of CMAT mj2 <- 0 for (j in 1:p) { betafdParj <- betalist[[j]] if (betafdParj$estimate) { betafdj <- betafdParj$fd betabasisj <- betafdj$basis ncoefj <- betabasisj$nbasis # row indices of CMAT and DMAT to fill mj1 <- mj2 + 1 mj2 <- mj2 + ncoefj indexj <- mj1:mj2 # compute right side of equation DMAT xfdj <- xfdlist[[j]] if (wtconstant) { xyfdj <- xfdj*yfdobj } else { xyfdj <- (xfdj*wt)*yfdobj } wtfdj <- sum(xyfdj) Dmatj <- inprod(betabasisj,onesfd,0,0,rangeval,wtfdj) Dmat[indexj] <- Dmatj # loop through columns of CMAT mk2 <- 0 for (k in 1:j) { betafdPark <- betalist[[k]] if (betafdPark$estimate) { betafdk <- betafdPark$fd betabasisk <- betafdk$basis ncoefk <- betabasisk$nbasis # column indices of CMAT to fill mk1 <- mk2 + 1 mk2 <- mk2 + ncoefk indexk <- mk1:mk2 # set up two weight functions xfdk <- xfdlist[[k]] if (wtconstant) { xxfdjk <- xfdj*xfdk } else { xxfdjk <- (xfdj*wt)*xfdk } wtfdjk <- sum(xxfdjk) Cmatjk <- inprod(betabasisj, betabasisk, 0, 0, rangeval, wtfdjk) Cmat[indexj,indexk] <- Cmatjk Cmat[indexk,indexj] <- t(Cmatjk) } } # attach penalty term to diagonal block lambda <- betafdParj$lambda if (lambda > 0) { Lfdj <- betafdParj$Lfd Rmatj <- eval.penalty(betafdParj$fd$basis, Lfdj) Cmat[indexj,indexj] <- Cmat[indexj,indexj] + lambda*Rmatj } } } Cmat <- (Cmat+t(Cmat))/2 # check Cmat for singularity eigchk(Cmat) # solve for coefficients defining BETA Lmat <- chol(Cmat) Lmatinv <- solve(Lmat) Cmatinv <- Lmatinv %*% t(Lmatinv) betacoef <- Cmatinv %*% Dmat # set up fdPar objects for reg. fns. in BETAESTLIST betaestlist <- betalist mj2 <- 0 for (j in 1:p) { betafdParj <- betalist[[j]] if (betafdParj$estimate) { betafdj <- betafdParj$fd ncoefj <- betafdj$basis$nbasis mj1 <- mj2 + 1 mj2 <- mj2 + ncoefj indexj <- mj1:mj2 coefj <- betacoef[indexj] betafdj$coefs <- as.matrix(coefj) betafdParj$fd <- betafdj } betaestlist[[j]] <- betafdParj } # set up fd objects for predicted values in YHATFDOBJ nfine <- max(501,10*ynbasis+1) tfine <- seq(rangeval[1], rangeval[2], len=nfine) yhatmat <- matrix(0,nfine,N) for (j in 1:p) { xfdj <- xfdlist[[j]] xmatj <- eval.fd(tfine, xfdj, 0, returnMatrix) betafdParj <- betaestlist[[j]] betafdj <- betafdParj$fd betavecj <- eval.fd(tfine, betafdj, 0, returnMatrix) yhatmat <- yhatmat + xmatj*as.vector(betavecj) } yhatfdobj <- smooth.basis(tfine, yhatmat, ybasisobj) # ------------------------------------------------------------------- # Compute pointwise standard errors of regression coefficients # if both y2cMap and SigmaE are supplied. # ------------------------------------------------------------------- if (!(is.null(y2cMap) || is.null(SigmaE))) { # check dimensions of y2cMap and SigmaE y2cdim <- dim(y2cMap) if(y2cdim[1] != ynbasis || y2cdim[2] != dim(SigmaE)[1]) stop( "Dimensions of Y2CMAP not correct.") ybasismat <- eval.basis(tfine, ybasisobj, 0, returnMatrix) deltat <- tfine[2] - tfine[1] # compute BASISPRODMAT basisprodmat <- matrix(0,ncoef,ynbasis*N) mj2 <- 0 for (j in 1:p) { betafdParj <- betalist[[j]] betabasisj <- betafdParj$fd$basis ncoefj <- betabasisj$nbasis bbasismatj <- eval.basis(tfine, betabasisj, 0, returnMatrix) xfdj <- xfdlist[[j]] tempj <- eval.fd(tfine, xfdj, 0, returnMatrix) # row indices of BASISPRODMAT to fill mj1 <- mj2 + 1 mj2 <- mj2 + ncoefj indexj <- mj1:mj2 # inner products of beta basis and response basis # weighted by covariate basis functions mk2 <- 0 for (k in 1:ynbasis) { # row indices of BASISPRODMAT to fill mk1 <- mk2 + 1 mk2 <- mk2 + N indexk <- mk1:mk2 tempk <- bbasismatj*ybasismat[,k] basisprodmat[indexj,indexk] <- deltat*crossprod(tempk,tempj) } } # compute variances of regression coefficient function values c2bMap <- Cmatinv %*% basisprodmat VarCoef <- y2cMap %*% SigmaE %*% t(y2cMap) CVariance <- kronecker(VarCoef,diag(rep(1,N))) bvar <- c2bMap %*% CVariance %*% t(c2bMap) betastderrlist <- vector("list",p) mj2 <- 0 for (j in 1:p) { betafdParj <- betalist[[j]] betabasisj <- betafdParj$fd$basis ncoefj <- betabasisj$nbasis mj1 <- mj2 + 1 mj2 <- mj2 + ncoefj indexj <- mj1:mj2 bbasismat <- eval.basis(tfine, betabasisj, 0, returnMatrix) bvarj <- bvar[indexj,indexj] bstderrj <- sqrt(diag(bbasismat %*% bvarj %*% t(bbasismat))) bstderrfdj <- smooth.basis(tfine, bstderrj, betabasisj)$fd betastderrlist[[j]] <- bstderrfdj } } else{ betastderrlist = NULL bvar = NULL c2bMap = NULL } # ------------------------------------------------------------------- # Set up output list object # ------------------------------------------------------------------- fRegressList <- list(yfdPar = yfdPar, xfdlist = xfdlist, betalist = betalist, betaestlist = betaestlist, yhatfdobj = yhatfdobj, Cmatinv = Cmatinv, wt = wt, y2cMap = y2cMap, SigmaE = SigmaE, betastderrlist = betastderrlist, bvar = bvar, c2bMap = c2bMap) } # ----------------------------------------------------------------- if(inherits(yfdPar,"numeric")) { # -------------------------------------------------------------- # YFDPAR is scalar or multivariate # -------------------------------------------------------------- ymat <- as.matrix(yfdPar) N <- dim(ymat)[1] Zmat <- NULL Rmat <- NULL pjvec <- rep(0,p) ncoef <- 0 for (j in 1:p) { xfdj <- xfdlist[[j]] xcoef <- xfdj$coefs xbasis <- xfdj$basis betafdParj <- betalist[[j]] bbasis <- betafdParj$fd$basis bnbasis <- bbasis$nbasis pjvec[j] <- bnbasis Jpsithetaj <- inprod(xbasis,bbasis) Zmat <- cbind(Zmat,crossprod(xcoef,Jpsithetaj)) if (betafdParj$estimate) { lambdaj <- betafdParj$lambda if (lambdaj > 0) { Lfdj <- betafdParj$Lfd Rmatj <- lambdaj*eval.penalty(bbasis,Lfdj, returnMatrix) } else { Rmatj <- matrix(0,bnbasis,bnbasis) } if (ncoef > 0) { zeromat <- matrix(0,ncoef,bnbasis) Rmat <- rbind(cbind(Rmat, zeromat), cbind(t(zeromat), Rmatj)) } else { Rmat <- Rmatj ncoef <- ncoef + bnbasis } } } # ----------------------------------------------------------- # set up the linear equations for the solution # ----------------------------------------------------------- # solve for coefficients defining BETA if (any(wt != 1)) { rtwt <- sqrt(wt) Zmatwt <- Zmat*rtwt ymatwt <- ymat*rtwt Cmat <- t(Zmatwt) %*% Zmatwt + Rmat Dmat <- t(Zmatwt) %*% ymatwt } else { Cmat <- t(Zmat) %*% Zmat + Rmat Dmat <- t(Zmat) %*% ymat } eigchk(Cmat) Cmatinv <- solve(Cmat) betacoef <- Cmatinv %*% Dmat # compute and print degrees of freedom measure df <- sum(diag(Zmat %*% Cmatinv %*% t(Zmat))) # set up fdPar object for BETAESTFDPAR betaestlist <- betalist onebasis <- create.constant.basis(c(0,1)) mj2 <- 0 for (j in 1:p) { betafdParj <- betalist[[j]] betafdj <- betafdParj$fd ncoefj <- betafdj$basis$nbasis mj1 <- mj2 + 1 mj2 <- mj2 + ncoefj indexj <- mj1:mj2 betacoefj <- betacoef[indexj] if (inherits(xfdj, "fd")) { betaestfdj <- betafdj betaestfdj$coefs <- as.matrix(betacoefj) betaestfdParj <- betafdParj betaestfdParj$fd <- betaestfdj betaestlist[[j]] <- betaestfdParj } else{ betaestfdj <- fd(as.matrix(t(betacoefj)),onebasis) betaestfdParj <- betafdParj betaestfdParj$fd <- betaestfdj betaestlist[[j]] <- betaestfdParj } } # set up fd object for predicted values yhatmat <- matrix(0,N,1) for (j in 1:p) { xfdj <- xfdlist[[j]] if (inherits(xfdj, "fd")) { xbasis <- xfdj$basis xnbasis <- xbasis$nbasis xrng <- xbasis$rangeval nfine <- max(501,10*xnbasis+1) tfine <- seq(xrng[1], xrng[2], len=nfine) deltat <- tfine[2]-tfine[1] xmat <- eval.fd(tfine, xfdj, 0, returnMatrix) betafdParj <- betaestlist[[j]] betafdj <- betafdParj$fd betamat <- eval.fd(tfine, betafdj, 0, returnMatrix) fitj <- deltat*(crossprod(xmat,betamat) - 0.5*(outer(xmat[1, ],betamat[1, ]) + outer(xmat[nfine,],betamat[nfine,]))) yhatmat <- yhatmat + fitj } else{ betaestfdParj <- betaestlist[[j]] betavecj <- betaestfdParj$fd$coefs yhatmat <- yhatmat + xfdj %*% t(betavecj) } } yhatfdobj <- yhatmat # ----------------------------------------------------------------------- # Compute pointwise standard errors of regression coefficients # if both y2cMap and SigmaE are supplied. # ----------------------------------------------------------------------- if (!(is.null(y2cMap) || is.null(SigmaE))) { # check dimensions of y2cMap and SigmaE y2cdim <- dim(y2cMap) if (y2cdim[1] != ynbasis || y2cdim[2] != dim(SigmaE)[1]) stop( "Dimensions of Y2CMAP not correct.") # compute linear mapping c2bMap takinging coefficients for # response into coefficients for regression functions c2bMap <- Cmatinv %*% t(Zmat) y2bmap <- c2bMap bvar <- y2bmap %*% as.matrix(SigmaE) %*% t(y2bmap) betastderrlist <- vector("list",p) mj2 <- 0 for (j in 1:p) { betafdParj <- betalist[[j]] betabasisj <- betafdParj$fd$basis ncoefj <- betabasisj$nbasis mj1 <- mj2 + 1 mj2 <- mj2 + ncoefj indexj <- mj1:mj2 bvarj <- bvar[indexj,indexj] xfdj <- xfdlist[[j]] if (inherits(xfdj,"fd")) { betarng <- betabasisj$rangeval nfine <- max(c(501,10*ncoefj+1)) tfine <- seq(betarng[1], betarng[2], len=nfine) bbasismat <- eval.basis(tfine, betabasisj, 0, returnMatrix) bstderrj <- sqrt(diag(bbasismat %*% bvarj %*% t(bbasismat))) bstderrfdj <- smooth.basis(tfine, bstderrj, betabasisj)$fd } else { bsterrj <- sqrt(diag(bvarj)) onebasis <- create.constant.basis(betabasisj$rangeval) bstderrfdj <- fd(t(bstderrj), onebasis) } betastderrlist[[j]] <- bstderrfdj } } # ------------------------------------------------------------------- # Set up output list object # ------------------------------------------------------------------- fRegressList <- list(yfdPar = yfdPar, xfdlist = xfdlist, betalist = betalist, betaestlist = betaestlist, yhatfdobj = yhatfdobj, Cmatinv = Cmatinv, wt = wt, df = df) } class(fRegressList) <- 'fRegress' return(fRegressList) } # ---------------------------------------------------------------- eigchk <- function(Cmat) { # check Cmat for singularity eigval <- eigen(Cmat)$values ncoef <- length(eigval) if (eigval[ncoef] < 0) { neig <- min(length(eigval),10) cat("\nSmallest eigenvalues:\n") print(eigval[(ncoef-neig+1):ncoef]) cat("\nLargest eigenvalues:\n") print(eigval[1:neig]) stop("Negative eigenvalue of coefficient matrix.") } if (eigval[ncoef] == 0) stop("Zero eigenvalue of coefficient matrix.") logcondition <- log10(eigval[1]) - log10(eigval[ncoef]) if (logcondition > 12) { warning("Near singularity in coefficient matrix.") cat(paste("\nLog10 Eigenvalues range from\n", log10(eigval[ncoef])," to ",log10(eigval[1]),"\n")) } }