Revision 6309a99fadf62a57b1eab63c8a76805a31f2989a authored by Achim Zeileis on 19 February 2003, 00:00:00 UTC, committed by Gabor Csardi on 19 February 2003, 00:00:00 UTC
1 parent e0bdc79
Raw File
lmtest.R
dwtest <- function(formula, alternative = c("greater", "two.sided", "less"),
  iterations = 15, exact = NULL, tol = 1e-10, data = list())
{
  dname <- paste(deparse(substitute(formula)))
  alternative <- match.arg(alternative)
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  X <- model.matrix(formula, data = data)
  n <- nrow(X)
  if(is.null(exact)) exact <- (n < 100)
  k <- ncol(X)
  res <- lm.fit(X,y)$residuals
  dw <- sum(diff(res)^2)/sum(res^2)
  A <- diag(c(1,rep(2, n-2), 1))
  A[abs(row(A)-col(A))==1] <- -1
  Q1 <- solve(crossprod(X), tol=tol)
  if(exact)
  {
    MA <- diag(rep(1,n)) - X %*% Q1 %*% t(X)
    MA <- MA %*% A
    ev <- eigen(MA)$values[1:(n-k)]
    if(any(Im(ev)>tol)) warning("imaginary parts of eigenvalues discarded")
    ev <- Re(ev)
    ev <- ev[ev>tol]
    pdw <- function(dw) .Fortran("pan", as.double(c(dw,ev)), as.integer(length(ev)),
             as.double(0), as.integer(iterations), x=double(1), PACKAGE = "lmtest")$x
    pval <- switch(alternative,
      "two.sided" = (2*min(pdw(dw), 1-pdw(dw))),
      "less" = (1 - pdw(dw)),
      "greater" = pdw(dw))

    if((pval > 1) | (pval < 0))
    {
      warning("exact p value cannot be computed (not in [0,1]), approximate p value will be used")
      exact <- FALSE
    }
  }
  if(!exact)
  {
    XAXQ <- t(X) %*% A %*% X %*% Q1
    P <- 2*(n-1) - sum(diag(XAXQ))
    Q <- 2*(3*n - 4) - 2* sum(diag(t(X) %*% A %*% A %*% X %*% Q1)) + sum(
         diag(XAXQ %*% XAXQ))
    dmean <- P/(n-k)
    dvar <- 2/((n-k)*(n-k+2)) * (Q - P*dmean)
    pval <- switch(alternative,
      "two.sided" = (2*pnorm(abs(dw-dmean), sd=sqrt(dvar), lower.tail = FALSE)),
      "less" = pnorm(dw, mean = dmean, sd = sqrt(dvar), lower.tail = FALSE),
      "greater" = pnorm(dw, mean = dmean, sd = sqrt(dvar)))
  }

  alternative <- switch(alternative,
    "two.sided" = "true autocorelation is not 0",
    "less" = "true autocorrelation is less than 0",
    "greater" = "true autocorrelation is greater than 0")

  names(dw) <- "DW"
  RVAL <- list(statistic = dw, method = "Durbin-Watson test",
    alternative = alternative, p.value= pval, data.name=dname)
  class(RVAL) <- "htest"
  return(RVAL)
}

bptest <- function(formula, varformula = NULL, studentize = TRUE,
 data = list())
{
  dname <- paste(deparse(substitute(formula)))
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  X <- model.matrix(formula, data = data)
  k <- ncol(X)
  n <- nrow(X)

  resi <- lm.fit(X,y)$residuals
  sigma2 <- sum(resi^2)/n
  if(is.null(varformula)) varformula <- formula
  Z <- model.matrix(varformula, data = data)

  if(studentize)
  {
    w <- resi^2 - sigma2
    fv <- lm.fit(Z,w)$fitted
    bp <- n * sum(fv^2)/sum(w^2)
    method <- "studentized Breusch-Pagan test"
  }
  else
  {
    f <- resi^2/sigma2 -1
    fv <- lm.fit(Z,f)$fitted
    bp <- 0.5 * sum(fv^2)
    method <- "Breusch-Pagan test"
  }

  names(bp) <- "BP"
  df <- ncol(Z)-1
  names(df) <- "df";
  RVAL <- list(statistic = bp,
      parameter = df,
      method = method,
      p.value= 1-pchisq(bp,df),
      data.name=dname)

  class(RVAL) <- "htest"
  return(RVAL)
}

gqtest <- function(formula, point=0.5, order.by=NULL, data=list())
{
  dname <- paste(deparse(substitute(formula)))
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  X <- model.matrix(formula, data = data)
  k <- ncol(X)
  n <- nrow(X)
  if(point < 1) point <- floor(point*n)
  if (point > n - k | point < k) stop("inadmissable breakpoint")

  if(!is.null(order.by))
  {
    x <- model.matrix(order.by, data = data)
    x <- as.vector(x[,ncol(x)])
    X <- as.matrix(X[order(x),])
    y <- y[order(x)]
  }

  rss1 <- sum(lm.fit(as.matrix(X[1:point,]),y[1:point])$residuals^2)
  rss2 <- sum(lm.fit(as.matrix(X[(point+1):n,]),y[(point+1):n])$residuals^2)

  gq <- (rss2/(n-point-k))/(rss1/(point-k))
  df <- c(n-point-k, point-k)
  names(df) <- c("df1", "df2")
  PVAL <- 1-pf(gq, df[1], df[2])
  method <- "Goldfeld-Quandt test"
  names(gq) <- "GQ"
  RVAL <- list(statistic = gq,
      parameter = df,
      method = method,
      p.value= PVAL,
      data.name=dname)

  class(RVAL) <- "htest"
  return(RVAL)
}

hmctest <- function(formula, point=0.5, order.by=NULL, simulate.p=TRUE,
 nsim=1000,
  plot = FALSE, data=list()) {
  dname <- paste(deparse(substitute(formula)))
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  X <- model.matrix(formula, data = data)
  k <- ncol(X)
  n <- nrow(X)
  if(point < 1) point <- floor(point*n)
  if (point > n - k | point < k) stop("inadmissable breakpoint")

  if(!is.null(order.by))
  {
    x <- model.matrix(order.by, data = data)
    x <- as.vector(x[,ncol(x)])
    X <- as.matrix(X[order(x),])
    y <- y[order(x)]
  }

  resi <- lm.fit(X,y)$residuals
  hmc <- sum(resi[1:point]^2)/sum(resi^2)

  if(plot)
  {
    stats <- c(0,cumsum(resi^2))/sum(resi^2)
    stats <- ts(stats, start=0, freq=n)
    plot(stats, xlab="fraction", ylab="Harrison-McCabe statistics", xaxs="i",
      yaxs="i")
    abline(0,1)
  }

  names(hmc) <- "HMC"
  if (simulate.p)
  {
    stat <- rep(0, nsim)
    for (i in 1:nsim) {
      x <- rnorm(n)
      x <- (x - mean(x))/sqrt(var(x))
      stat[i] <- sum(x[1:point]^2)/sum(x^2)
    }
    PVAL <- mean(stat <= hmc)
  }
  else
    PVAL <- NA

  RVAL <- list(statistic = hmc,
      method = "Harrison-McCabe test",
      p.value= PVAL,
      data.name=dname)

  class(RVAL) <- "htest"
  return(RVAL)
}

harvtest <- function(formula, order.by=NULL, tol=1e-7, data=list())
{
  dname <- paste(deparse(substitute(formula)))
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  X <- model.matrix(formula, data = data)
  k <- ncol(X)
  n <- nrow(X)

  rec.res <- function(X, y, tol = 1e-7)
  {
      n <- nrow(X)
      q <- ncol(X)
      w <- rep(0,(n-q))
      Xr1 <- X[1:q,,drop = FALSE]
      xr <- as.vector(X[q+1,])
      X1 <- solve(t(Xr1)%*%Xr1, tol=tol)
      fr <- as.vector((1 + (t(xr) %*% X1 %*% xr)))
      betar <- X1 %*%t(Xr1)%*% y[1:q]
      w[1] <- (y[q+1] - t(xr) %*% betar)/sqrt(fr)

      for(r in ((q+2):n))
      {
          X1 <- X1 - (X1 %*% outer(xr, xr) %*% X1)/fr
  	  betar <- betar + X1 %*% xr * w[r-q-1]*sqrt(fr)
  	  xr <- as.vector(X[r,])
          fr <- as.vector((1 + (t(xr) %*% X1 %*% xr)))
          w[r-q] <- (y[r] - t(xr) %*% betar)/sqrt(fr)
      }
      return(w)
  }

  if(!is.null(order.by))
  {
    x <- model.matrix(order.by, data = data)
    x <- as.vector(x[,ncol(x)])
    X <- as.matrix(X[order(x),])
    y <- y[order(x)]
  }

  resr <- rec.res(X,y, tol=tol)
  sigma <- sqrt(var(resr)*(length(resr)-1)/(n-k-1))
  resr <- resr / sigma
  harv <- abs(sum(resr)/sqrt(n-k))/sqrt(var(resr))
  names(harv) <- "HC"
  df <- n-k-1
  names(df) <- "df"
  RVAL <- list(statistic = harv,
      parameter = df,
      method = "Harvey-Collier test",
      p.value= 2 * (1-pt(harv, n-k-1)),
      data.name=dname)

  class(RVAL) <- "htest"
  return(RVAL)
}

raintest <- function(formula, fraction=0.5, order.by=NULL, center=NULL,
 data=list())
{
  dname <- paste(deparse(substitute(formula)))
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  X <- model.matrix(formula, data = data)
  k <- ncol(X)
  n <- nrow(X)

  if(is.null(order.by))
  {
    if(is.null(center)) center <- 0.5
    if(center > 1) center <- center/n
    from <- ceiling(quantile(1:n, probs=(center-fraction/2)))
    to <- from + floor(fraction*n) - 1
  }
  else
  if(!is.null(class(order.by)) && class(order.by)=="formula")
  {
    x <- model.matrix(order.by, data = data)
    x <- as.vector(x[,ncol(x)])
    X <- as.matrix(X[order(x),])
    y <- y[order(x)]
    if(is.null(center)) center <- 0.5
    if(center > 1) center <- center/n
    from <- ceiling(quantile(1:n, probs=(center-fraction/2)))
    to <- from + floor(fraction*n) - 1
  }
  else
  if(order.by == "mahalanobis")
  {
    if(is.null(center)) center <- apply(X,2,mean)
    o <- order(mahalanobis(X,center,crossprod(X)))
    X <- as.matrix(X[o,])
    y <- y[o]
    from <- 1
    to <- floor(fraction*n)
  }
  else
    stop("order.by must be a formula, \"mahalanobis\" or NULL")

  subX <- as.matrix(X[from:to,])
  suby <- y[from:to]
  n1 <- nrow(subX)
  if(n1 < k) stop("not enough observations in subset")
  resi <- lm.fit(X,y)$residuals
  subresi <- lm.fit(subX, suby)$residuals
  sresi <- sum(resi^2)
  sresi1 <- sum(subresi^2)
  rain <- ((sresi - sresi1)/(n-n1))/(sresi1/(n1-k))
  names(rain) <- "Rain"
  df <- c((n-n1),(n1-k))
  names(df) <- c("df1","df2")
  RVAL <- list(statistic = rain,
      parameter = df,
      method = "Rainbow test",
      p.value= as.vector(1-pf(rain, df[1], df[2])),
      data.name=dname)

  class(RVAL) <- "htest"
  return(RVAL)
}

reset <- function(formula, power=2:3, type=c("fitted", "regressor",
  "princomp"), data=list())
{
  dname <- paste(deparse(substitute(formula)))
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  X <- model.matrix(formula, data = data)
  k <- ncol(X)
  n <- nrow(X)
  type <- match.arg(type)
  switch(type,

  "fitted" = {
    y.hat <- lm.fit(X,y)$fitted
    Z <- matrix(t(sapply(y.hat, "^", power)), nrow=n)
  },

  "regressor" = {
    Z <- as.matrix(mf[,which(!sapply(mf,is.factor))[-1]])
    Z <- matrix(as.vector(t(sapply(as.vector(Z), "^", power))), nrow=n)
  },

  "princomp" = {
    Z <- as.matrix(mf[,which(!sapply(mf,is.factor))[-1]])
    pc1 <- as.matrix(eigen(cov(Z))$vectors)[,1]
    pc1 <- as.vector(Z %*% pc1)
    Z <- matrix(t(sapply(pc1, "^", power)), nrow=n)
  })

  XZ <- cbind(X, Z)
  q <- ncol(Z)
  res1 <- lm.fit(X,y)$residuals
  res2 <- lm.fit(XZ,y)$residuals
  res1 <- sum(res1^2)
  res2 <- sum(res2^2)
  df1 <- q
  df2 <- n-(k+q)
  reset <- (df2/df1) * ((res1 - res2) / res2)
  names(reset) <- "RESET"
  df <- c(df1, df2)
  names(df) <- c("df1","df2")
  RVAL <- list(statistic = reset,
	parameter = df,
	method = "RESET test",
	p.value= as.vector(1-pf(reset, df1, df2)),
	data.name=dname)
  class(RVAL) <- "htest"
  return(RVAL)
}

bgtest <- function(formula, order = 1, type = c("Chisq", "F"), data = list())
{
  dname <- paste(deparse(substitute(formula)))
  mf <- model.frame(formula, data = data)
  y <- model.response(mf)
  modelterms <- terms(formula, data = data)
  X <- model.matrix(modelterms, data = data)
  n <- nrow(X)
  k <- ncol(X)
  order <- 1:order
  m <- length(order)
  resi <- lm.fit(X,y)$residuals

  Z <- sapply(order, function(x) c(rep(0, x), resi[1:(n-x)]))
  umod <- lm(resi ~ X + Z)

  switch(match.arg(type),

  "Chisq" = {
    bg <- n * summary(umod)$r.squared
    p.val <- pchisq(bg, m, lower.tail = FALSE)
    df <- m
    names(df) <- "df"
  },

  "F" = {
    uresi <- residuals(umod)
    rresi <- lm.fit(X,resi)$residuals
    bg <- ((sum(rresi^2) - sum(uresi^2))/m) / (sum(uresi^2) / (n-k-m))
    p.val <- pf(bg, df1 = m, df2 = n-k, lower.tail = FALSE)
    df <- c(m, n-k)
    names(df) <- c("df1", "df2")
  })

  names(bg) <- "LM test"
  RVAL <- list(statistic = bg, parameter = df,
               method = paste("Breusch-Godfrey test for serial correlation of order", max(order)),
               p.value = p.val,
               data.name =   dname)
  class(RVAL) <- "htest"
  return(RVAL)
}


back to top