Revision - d94d986 - version 3.4-3 – Software Heritage archive

Revision d94d986df2cb1bd6eb7f7bb1dae50853744f8d77 authored by Charles Dupont on 31 October 2007, 21:18:43 UTC, committed by cran-robot on 31 October 2007, 21:18:43 UTC

version 3.4-3

1 parent b69f0b3

Files
Changes

Permalinks

wtd.stats.s

wtd.mean <- function(x, weights=NULL, normwt='ignored', na.rm=TRUE)
{
  if(!length(weights)) return(mean(x, na.rm=na.rm))
  if(na.rm) {
    s <- !is.na(x + weights)
    x <- x[s]
    weights <- weights[s]
  }

  sum(weights*x)/sum(weights)
}


wtd.var <- function(x, weights=NULL, normwt=FALSE, na.rm=TRUE)
{
  if(!length(weights)) {
    if(na.rm) x <- x[!is.na(x)]
    return(var(x))
  }

  if(na.rm) {
    s <- !is.na(x + weights)
    x <- x[s]
    weights <- weights[s]
  }

  if(normwt)
    weights <- weights*length(x)/sum(weights)

  xbar <- sum(weights*x)/sum(weights)
  sum(weights*((x - xbar)^2)) / (sum(weights) - 1)
}


wtd.quantile <- function(x, weights=NULL, probs=c(0, .25, .5, .75, 1), 
                         type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'), 
                         normwt=FALSE, na.rm=TRUE)
{
  if(!length(weights))
    return(quantile(x, probs=probs, na.rm=na.rm))

  type <- match.arg(type)
  if(any(probs < 0 | probs > 1))
    stop("Probabilities must be between 0 and 1 inclusive")

  nams <- paste(format(round(probs * 100, if(length(probs) > 1) 
                             2 - log10(diff(range(probs))) else 2)), 
                "%", sep = "")

  if(type=='quantile') {
    w <- wtd.table(x, weights, na.rm=na.rm, normwt=normwt, type='list')
    x     <- w$x
    wts   <- w$sum.of.weights
    n     <- sum(wts)
    order <- 1 + (n - 1) * probs
    low   <- pmax(floor(order), 1)
    high  <- pmin(low + 1, n)
    order <- order %% 1
    ## Find low and high order statistics
    ## These are minimum values of x such that the cum. freqs >= c(low,high)
    allq <- approx(cumsum(wts), x, xout=c(low,high), 
                   method='constant', f=1, rule=2)$y
    k <- length(probs)
    quantiles <- (1 - order)*allq[1:k] + order*allq[-(1:k)]
    names(quantiles) <- nams
    return(quantiles)
  } 
  w <- wtd.Ecdf(x, weights, na.rm=na.rm, type=type, normwt=normwt)
  structure(approx(w$ecdf, w$x, xout=probs, rule=2)$y, 
            names=nams)
}


wtd.Ecdf <- function(x, weights=NULL, 
                     type=c('i/n','(i-1)/(n-1)','i/(n+1)'), 
                     normwt=FALSE, na.rm=TRUE)
{
  type <- match.arg(type)
  switch(type,
         '(i-1)/(n-1)'={a <- b <- -1},
         'i/(n+1)'    ={a <- 0; b <- 1},
         'i/n'        ={a <- b <- 0})

  if(!length(weights)) {
    ##.Options$digits <- 7  ## to get good resolution for names(table(x))6Aug00
    oldopt <- options(digits=7)
    on.exit(options(oldopt))
    cumu <- table(x)    ## R does not give names for cumsum
    isdate <- testDateTime(x)  ## 31aug02
    ax <- attributes(x)
    ax$names <- NULL
    x <- as.numeric(names(cumu))
    if(isdate) attributes(x) <- c(attributes(x),ax)
    cumu <- cumsum(cumu)
    cdf <- (cumu + a)/(cumu[length(cumu)] + b)
    if(cdf[1]>0) {
      x <- c(x[1], x);
      cdf <- c(0,cdf)
    }

    return(list(x = x, ecdf=cdf))
  }

  w <- wtd.table(x, weights, normwt=normwt, na.rm=na.rm)
  cumu <- cumsum(w$sum.of.weights)
  cdf <- (cumu + a)/(cumu[length(cumu)] + b)
  list(x = c(if(cdf[1]>0) w$x[1], w$x), ecdf=c(if(cdf[1]>0)0, cdf))
}


wtd.table <- function(x, weights=NULL, type=c('list','table'), 
                      normwt=FALSE, na.rm=TRUE)
{
  type <- match.arg(type)
  if(!length(weights))
    weights <- rep(1, length(x))

  isdate <- testDateTime(x)  ## 31aug02 + next 2
  ax <- attributes(x)
  ax$names <- NULL
  x <- if(is.character(x)) as.category(x)
       else oldUnclass(x)

  lev <- levels(x)
  if(na.rm) {
    s <- !is.na(x + weights)
    x <- x[s,drop=FALSE]    ## drop is for factor class
    weights <- weights[s]
  }

  n <- length(x)
  if(normwt)
    weights <- weights*length(x)/sum(weights)

  i <- order(x)  # R does not preserve levels here
  x <- x[i]; weights <- weights[i]

  if(any(diff(x)==0)) {  ## slightly faster than any(duplicated(xo))
    weights <- tapply(weights, x, sum)
    if(length(lev)) {    ## 3apr03
      levused <- lev[sort(unique(x))]  ## 7sep02
      ## Next 3 lines 21apr03
      if((length(weights) > length(levused)) &&
         any(is.na(weights)))
        weights <- weights[!is.na(weights)]

      if(length(weights) != length(levused))
        stop('program logic error')

      names(weights) <- levused   # 10Apr01  length 16May01
    }

    if(!length(names(weights)))
      stop('program logic error')  # 16May01

    if(type=='table')
      return(weights)

    x <- all.is.numeric(names(weights),'vector')
    if(isdate)
      attributes(x) <- c(attributes(x),ax)   ## 31aug02

    names(weights) <- NULL
    return(list(x=x, sum.of.weights=weights))
  }

  xx <- x  ## 31aug02
  if(isdate)
    attributes(xx) <- c(attributes(xx),ax)

  if(type=='list')
    list(x=if(length(lev))lev[x]
           else xx, 
         sum.of.weights=weights)
  else {
    names(weights) <- if(length(lev)) lev[x]
                      else xx
    weights
  }
}


wtd.rank <- function(x, weights=NULL, normwt=FALSE, na.rm=TRUE)
{
  if(!length(weights))
    return(rank(x),na.last=if(na.rm)NA else TRUE)

  tab <- wtd.table(x, weights, normwt=normwt, na.rm=na.rm)
  
  freqs <- tab$sum.of.weights
  ## rank of x = # <= x - .5 (# = x, minus 1)
  r <- cumsum(freqs) - .5*(freqs-1)
  ## Now r gives ranks for all unique x values.  Do table look-up
  ## to spread these ranks around for all x values.  r is in order of x
  approx(tab$x, r, xout=x)$y
}


wtd.loess.noiter <- function(x, y, weights=rep(1,n), robust=rep(1,n), 
                             span=2/3, degree=1, cell=.13333, 
                             type=c('all','ordered all','evaluate'), 
                             evaluation=100, na.rm=TRUE)
{
  type <- match.arg(type)
  n <- length(y)
  if(na.rm) {
    s <- !is.na(x+y+weights)
    x <- x[s]; y <- y[s]; weights <- weights[s]; n <- length(y)
  }

  robust <- weights * robust
  max.kd <- max(200, n)
  y <-
    if(.R.) .C("loess_raw",
               as.double(y),
               as.double(x),
               as.double(weights),
               as.double(robust),
               as.integer(1),
               as.integer(n),
               as.double(span),
               as.integer(degree),
               as.integer(1),
               as.integer(2),
               as.integer(0),
               as.double(cell),
               as.character('interpolate/none'),
               fitted.values = double(n),
               parameter = integer(7),
               a = integer(max.kd),
               xi = double(max.kd),
               vert = double(2),
               vval = double(2 * max.kd),
               diagonal = double(n),
               trace.hat = double(1),
               one.delta = double(1),
               two.delta = double(1),
               as.integer(FALSE))$fitted.values
    else .C("loess_raw",
            specialsok = TRUE,
            as.double(y),
            as.double(x),
            as.double(weights),
            as.double(robust),
            as.integer(1),
            as.integer(n),
            as.double(span),
            as.integer(degree),
            as.integer(1),
            as.integer(2),
            as.integer(0),
            as.double(cell),
            as.character('interpolate/none'),
            fitted.values = double(n),
            parameter = integer(7),
            a = integer(max.kd),
            xi = double(max.kd),
            vert = double(2),
            vval = double(2 * max.kd),
            diagonal = double(n),
            trace.hat = double(1),
            one.delta = double(1),
            two.delta = double(1),
            as.integer(FALSE))$fitted.values

  switch(type,
         all=list(x=x, y=y),
         'ordered all'={
           i <- order(x);
           list(x=x[i],y=y[i])
         },
         evaluate={
           r <- range(x, na.rm=na.rm)
           approx(x, y, xout=seq(r[1], r[2], length=evaluation))
         })
}


num.denom.setup <- function(num, denom)
{
  n <- length(num)
  if(length(denom) != n)
    stop('lengths of num and denom must match')
  
  s <- (1:n)[!is.na(num + denom) & denom != 0]
  num <- num[s];
  denom <- denom[s]
  
  subs <- s[num > 0]
  y <- rep(1, length(subs))
  wt <- num[num > 0]
  other <- denom - num
  subs <- c(subs, s[other > 0])
  wt <- c(wt, other[other > 0])
  y <- c(y, rep(0, sum(other>0)))
  list(subs=subs, weights=wt, y=y)
}

Showing with 0 additions and 0 deletions (0 / 0 diffs computed)

Computing file changes ...