Content - 8351e004a75d75d0dd7b947f3582bb68812b7b7b - 0245189/ewcdf.R

ewcdf.R
#
#     ewcdf.R
#
#     $Revision: 1.8 $  $Date: 2015/05/17 10:30:37 $
#
#  With contributions from Kevin Ummel
#

ewcdf <- function(x, weights=rep(1/length(x), length(x)))
{
  stopifnot(length(x) == length(weights))
  # remove NA's together
  nbg <- is.na(x) 
  x <- x[!nbg]
  weights <- weights[!nbg]
  n <- length(x)
  if (n < 1)
    stop("'x' must have 1 or more non-missing values")
  stopifnot(all(weights >= 0))
  # sort in increasing order of x value
  ox <- fave.order(x)
  x <- x[ox]
  w <- weights[ox]
  # find jump locations and match
  vals <- sort(unique(x))
  xmatch <- factor(match(x, vals), levels=seq_along(vals))
  # sum weight in each interval
  wmatch <- tapply(w, xmatch, sum)
  wmatch[is.na(wmatch)] <- 0
  cumwt <- cumsum(wmatch)
  # make function
  rval <- approxfun(vals, cumwt,
                    method = "constant", yleft = 0, yright = sum(wmatch),
                    f = 0, ties = "ordered")
  class(rval) <- c("ewcdf", "ecdf", "stepfun", class(rval))
  assign("w", w, envir=environment(rval))
  attr(rval, "call") <- sys.call()
  return(rval)
}

  # Hacked from stats:::print.ewcdf
print.ewcdf <- function (x, digits = getOption("digits") - 2L, ...) {
  cat("Weighted empirical CDF \nCall: ")
  print(attr(x, "call"), ...)
  env <- environment(x)
  xx <- get("x", envir=env)
  ww <- get("w", envir=env)
  n <- length(xx)
  i1 <- 1L:min(3L, n)
  i2 <- if (n >= 4L) max(4L, n - 1L):n else integer()
  numform <- function(x) paste(formatC(x, digits = digits), collapse = ", ")
  cat(" x[1:", n, "] = ", numform(xx[i1]), if (n > 3L) 
      ", ", if (n > 5L) 
      " ..., ", numform(xx[i2]), "\n", sep = "")
  cat(" weights[1:", n, "] = ", numform(ww[i1]), if (n > 3L) 
      ", ", if (n > 5L) 
      " ..., ", numform(ww[i2]), "\n", sep = "")
  invisible(x)
}

quantile.ewcdf <- function(x, probs=seq(0,1,0.25), names=TRUE, ..., type=1) {
  trap.extra.arguments(..., .Context="quantile.ewcdf")
  if(!(type %in% c(1,2)))
    stop("Only quantiles of type 1 and 2 are implemented", call.=FALSE)
  env <- environment(x)
  xx <- get("x", envir=env)
  Fxx <- get("y", envir=env)
  n <- length(xx)
  eps <- 100 * .Machine$double.eps
  if(any((p.ok <- !is.na(probs)) & (probs < -eps | probs > 1 + eps))) 
    stop("'probs' outside [0,1]")
  if (na.p <- any(!p.ok)) {
    o.pr <- probs
    probs <- probs[p.ok]
    probs <- pmax(0, pmin(1, probs))
  }
  np <- length(probs)
  if (n > 0 && np > 0) {
    qs <- numeric(np)
    if(type == 1) {
      ## right-continuous inverse
      for(k in 1:np) qs[k] <- xx[min(which(Fxx >= probs[k]))]
    } else {
      ## average of left and right continuous
      for(k in 1:np) {
        pk <- probs[k]
        ik <- min(which(Fxx >= probs[k]))
        qs[k] <- if(Fxx[ik] > pk) (xx[ik] + xx[ik-1])/2 else xx[ik]
      }
    }
  } else {
    qs <- rep(NA_real_, np)
  }
  if (names && np > 0L) {
    dig <- max(2L, getOption("digits"))
    probnames <-
      if(np < 100) formatC(100 * probs, format="fg", width=1, digits=dig) else
      format(100 * probs, trim = TRUE, digits = dig)
    names(qs) <- paste0(probnames, "%")
  }
  if (na.p) {
    o.pr[p.ok] <- qs
    names(o.pr) <- rep("", length(o.pr))
    names(o.pr)[p.ok] <- names(qs)
    o.pr
  } else qs
}