Content - 238386be57b837f0d450a0d97bc77c469e40dceb - 1766339/R/Confints.R

swh:1:snp:813359ba77493c9d5dd1abad9a1f53490a8abf57

Tip revision: 035df276b039349a91d92b6c5691fdeb05819f3a authored by Torsten Hothorn on 17 July 2017, 21:32:53 UTC
version 1.2-1
Tip revision: 035df27
Confints.R
confint_location <- function(object, nulldistr, level = 0.95, ...) {

    if (!extends(class(object), "ScalarIndependenceTestStatistic"))
        stop("Argument ", sQuote("object"), " is not of class ",
             sQuote("ScalarIndependenceTestStatistic"))

    ## <FIXME> drop unused levels!
    if (!is_2sample(object))
        warning(sQuote("object"), " does not represent a two sample problem")
    ## </FIXME>

    if (!extends(class(nulldistr), "NullDistribution"))
        stop("Argument ", sQuote("nulldistr"), " is not of class ",
             sQuote("NullDistribution"))
    approx <- inherits(nulldistr, "AsymptNullDistribution")

    if (nlevels(object@block) != 1L || !is_unity(object@weights))
        stop("cannot compute confidence intervals with blocks or weights")

    alternative <- object@alternative

    if(!((length(level) == 1L)
       && is.finite(level)
       && (level > 0)
       && (level < 1)))
       stop("level must be a single number between 0 and 1")

    scores <- object@y[[1L]]
    groups <- object@xtrans[, 1L]

    ## raw data
    x <- sort(scores[groups > 0])
    y <- sort(scores[groups < 1])
    alpha <- 1 - level

    foo <- function(x, d) x - d

    if (!approx) {

        steps <- c()
        ## explicitely compute all possible steps
        for (lev in levels(object@block)) {
            thisblock <- (object@block == lev)
            ytmp <- sort(split(scores[thisblock], groups[thisblock])[[1L]])
            xtmp <- sort(split(scores[thisblock], groups[thisblock])[[2L]])
            steps <- c(steps, as.vector(outer(xtmp, ytmp, foo)))
        }
        steps <- sort(unique(steps))

        ## computes the statistic under the alternative 'd'
        fse <- function(d)
            sum(object@ytrafo(data.frame(c(foo(x, d), y)))[seq_along(x)])

        ## we need to compute the statistics just to the right of
        ## each step
        ds <- diff(steps)
        justright <- min(abs(ds[abs(ds) > eps()])) / 2
        jumps <- vapply(steps + justright, fse, NA_real_)

        ## determine if the statistics are in- or decreasing
        ## jumpsdiffs <- diff(jumps)
        increasing <- all(diff(jumps[c(1L, length(jumps))]) > 0)
        decreasing <- all(diff(jumps[c(1L, length(jumps))]) < 0)

        ## this is safe
        if (!(increasing || decreasing))
            stop("cannot compute confidence intervals:",
                 "the step function is not monotone")

        cci <- function(alpha) {
            ## the quantiles:
            ## we reject iff
            ##
            ##   STATISTIC <  qlower OR
            ##   STATISTIC >= qupper
            ##
            qlower <- drop(qperm(nulldistr, alpha / 2) *
                             sqrt(variance(object)) + expectation(object))
            qupper <- drop(qperm(nulldistr, 1 - alpha / 2) *
                             sqrt(variance(object)) + expectation(object))

            ## Check if the statistic exceeds both quantiles first.
            if (qlower < min(jumps) || qupper > max(jumps)) {
                warning("Cannot compute confidence intervals")
                return(c(NA, NA))
            }

            if (increasing) {
                ##
                ##  We do NOT reject for all steps with
                ##
                ##     STATISTICS >= qlower AND
                ##     STATISTICS < qupper
                ##
                ##  but the open right interval ends with the
                ##  step with STATISTIC == qupper
                ##
                ci <- c(min(steps[qlower %LE% jumps]),
                        min(steps[jumps > qupper]))
            } else {
                ##
                ##  We do NOT reject for all steps with
                ##
                ##     STATISTICS >= qlower AND
                ##     STATISTICS < qupper
                ##
                ##  but the open left interval ends with the
                ##  step with STATISTIC == qupper
                ##
                ci <- c(min(steps[jumps %LE% qupper]),
                        min(steps[jumps < qlower]))
            }
            ci
        }

        cint <- switch(alternative,
                    "two.sided" = cci(alpha),
                    "greater"   = c(cci(alpha * 2)[1L], Inf),
                    "less"      = c(-Inf, cci(alpha * 2)[2L])
                )
        attr(cint, "conf.level") <- level

        ## was: median(steps) which will not work for blocks etc.
        u <- jumps - expectation(object)
        sgr <- ifelse(decreasing, min(steps[u %LE% 0]), max(steps[u %LE% 0]))
        sle <- ifelse(decreasing, min(steps[u < 0]), min(steps[u > 0]))

        ESTIMATE <- mean(c(sle, sgr), na.rm = TRUE)
        names(ESTIMATE) <- "difference in location"
    } else {
        ## approximate the steps
        ## Here we search the root of the function 'fsa' on the set
        ## c(mumin, mumax).
        ##
        ## This returns a value from c(mumin, mumax) for which
        ## the standardized statistic is equal to the
        ## quantile zq.  This means that the statistic is not
        ## within the critical region, and that implies that '
        ## is a confidence limit for the median.

        fsa <- function(d, zq) {
           STAT <- sum(object@ytrafo(data.frame(c(foo(x, d), y)))[seq_along(x)])
           (STAT - expectation(object)) / sqrt(variance(object)) - zq
        }

        mumin <- min(x) - max(y)
        mumax <- max(x) - min(y)

        ccia <- function(alpha) {
            ## Check if the statistic exceeds both quantiles
            ## first: otherwise 'uniroot' won't work anyway
            statu <- fsa(mumin, zq = qperm(nulldistr, alpha / 2))
            statl <- fsa(mumax, zq = qperm(nulldistr, 1 - alpha / 2))
            if (sign(statu) == sign(statl)) {
                warning(paste("Samples differ in location:",
                              "Cannot compute confidence set,",
                              "returning NA"))
                return(c(NA, NA))
            }
            u <- uniroot(fsa, c(mumin, mumax),
                         zq = qperm(nulldistr, alpha / 2), ...)$root
            l <- uniroot(fsa, c(mumin, mumax),
                         zq = qperm(nulldistr, 1 - alpha / 2), ...)$root
            ## The process of the statistics does not need to be
            ## increasing: sort is ok here.
            sort(c(u, l))
        }

        cint <- switch(alternative,
                    "two.sided" = ccia(alpha),
                    "greater"   = c(ccia(alpha * 2)[1L], Inf),
                    "less"      = c(-Inf, ccia(alpha * 2)[2L])
                )
        attr(cint, "conf.level") <- level

        ## Check if the statistic exceeds both quantiles first.
        statu <- fsa(mumin, zq = 0)
        statl <- fsa(mumax, zq = 0)
        if (sign(statu) == sign(statl)) {
            ESTIMATE <- NA
            warning("Cannot compute estimate, returning NA")
        } else
            ESTIMATE <- uniroot(fsa, c(mumin, mumax), zq = 0, ...)$root
        names(ESTIMATE) <- "difference in location"
    }

    list(conf.int = cint, estimate = ESTIMATE)
}


confint_scale <- function(object, nulldistr, level = 0.95,
    approx = FALSE, ...) {

    if (!extends(class(object), "ScalarIndependenceTestStatistic"))
        stop("Argument ", sQuote("object"), " is not of class ",
             sQuote("ScalarIndependenceTestStatistic"))

    if (!extends(class(nulldistr), "NullDistribution"))
        stop("Argument ", sQuote("nulldistr"), " is not of class ",
             sQuote("NullDistribution"))

    ## <FIXME> drop unused levels!
    if (!is_2sample(object))
        warning(sQuote("object"), " does not represent a two sample problem")
    ## </FIXME>

    if (nlevels(object@block) != 1L || !is_unity(object@weights))
        stop("cannot compute confidence intervals with blocks or weights")

    alternative <- object@alternative

    if(!((length(level) == 1L)
       && is.finite(level)
       && (level > 0)
       && (level < 1)))
       stop("level must be a single number between 0 and 1")

    scores <- object@y[[1L]]
    groups <- object@xtrans[, 1L]

    ## raw data
    x <- sort(scores[groups > 0])
    y <- sort(scores[groups < 1])
    alpha <- 1 - level

    foo <- function(x, d) x / d

    if (!approx) {

        ## explicitely compute all possible steps
        steps <- c()
        for (lev in levels(object@block)) {
            thisblock <- (object@block == lev)
            ytmp <- sort(split(scores[thisblock], groups[thisblock])[[1L]])
            xtmp <- sort(split(scores[thisblock], groups[thisblock])[[2L]])
            ratio <-  outer(xtmp, ytmp, "/")
            aratio <- ratio[ratio >= 0]
            steps <- c(steps, aratio)
        }
        steps <- sort(unique(steps))

        ## computes the statistic under the alternative 'd'
        fse <- function(d)
            sum(object@ytrafo(data.frame(c(foo(x, d), y)))[seq_along(x)])

        ## we need to compute the statistics just to the right of
        ## each step
        ds <- diff(steps)
        justright <- min(abs(ds[abs(ds) > eps()])) / 2
        jumps <- vapply(steps + justright, fse, NA_real_)

        ## determine if the statistics are in- or decreasing
        ## jumpsdiffs <- diff(jumps)
        increasing <- all(diff(jumps[c(1L, length(jumps))]) > 0)
        decreasing <- all(diff(jumps[c(1L, length(jumps))]) < 0)

        ## this is safe
        if (!(increasing || decreasing))
            stop("cannot compute confidence intervals:",
                 "the step function is not monotone")

        cci <- function(alpha) {
            ## the quantiles:
            ## we reject iff
            ##
            ##   STATISTIC <  qlower OR
            ##   STATISTIC >= qupper
            ##
            qlower <- drop(qperm(nulldistr, alpha / 2) *
                             sqrt(variance(object)) + expectation(object))
            qupper <- drop(qperm(nulldistr, 1 - alpha / 2) *
                             sqrt(variance(object)) + expectation(object))

            ## Check if the statistic exceeds both quantiles first.
            if (qlower < min(jumps) || qupper > max(jumps)) {
                warning("Cannot compute confidence intervals")
                return(c(NA, NA))
            }

            if (increasing) {
                ##
                ##  We do NOT reject for all steps with
                ##
                ##     STATISTICS >= qlower AND
                ##     STATISTICS < qupper
                ##
                ##  but the open right interval ends with the
                ##  step with STATISTIC == qupper
                ##
                ci <- c(min(steps[qlower %LE% jumps]),
                        min(steps[jumps > qupper]))
            } else {
                ##
                ##  We do NOT reject for all steps with
                ##
                ##     STATISTICS >= qlower AND
                ##     STATISTICS < qupper
                ##
                ##  but the open left interval ends with the
                ##  step with STATISTIC == qupper
                ##
                ci <- c(min(steps[jumps %LE% qupper]),
                        min(steps[jumps < qlower]))
            }
            ci
        }

        cint <- switch(alternative,
                    "two.sided" = cci(alpha),
                    "greater"   = c(cci(alpha * 2)[1L], Inf),
                    "less"      = c(0, cci(alpha * 2)[2L])
                )
        attr(cint, "conf.level") <- level

        u <- jumps - expectation(object)
        sgr <- ifelse(decreasing, min(steps[u %LE% 0]), max(steps[u %LE% 0]))
        sle <- ifelse(decreasing, min(steps[u < 0]), min(steps[u > 0]))

        ESTIMATE <- mean(c(sle, sgr), na.rm = TRUE)
        names(ESTIMATE) <- "ratio of scales"
    } else {
        ## approximate the steps
        ## Here we search the root of the function 'fsa' on the set
        ## c(mumin, mumax).
        ##
        ## This returns a value from c(mumin, mumax) for which
        ## the standardized statistic is equal to the
        ## quantile zq.  This means that the statistic is not
        ## within the critical region, and that implies that '
        ## is a confidence limit for the median.

        fsa <- function(d, zq) {
           STAT <- sum(object@ytrafo(data.frame(c(foo(x, d), y)))[seq_along(x)])
           (STAT - expectation(object)) / sqrt(variance(object)) - zq
        }

        srangepos <- NULL
        srangeneg <- NULL
        if (any(x > 0) && any(y > 0))
            srangepos <-
                c(min(x[x > 0], na.rm = TRUE) / max(y[y > 0], na.rm = TRUE),
                  max(x[x > 0], na.rm = TRUE) / min(y[y > 0], na.rm = TRUE))
        if (any(x %LE% 0) && any(y < 0))
            srangeneg <-
                c(min(x[x %LE% 0], na.rm = TRUE) / max(y[y < 0], na.rm = TRUE),
                  max(x[x %LE% 0], na.rm = TRUE) / min(y[y < 0], na.rm = TRUE))
        if (any(is.infinite(c(srangepos, srangeneg)))) {
            stop(paste("Cannot compute asymptotic confidence",
                       "set or estimator"))
        }

        mumin <- range(c(srangepos, srangeneg), na.rm = FALSE)[1L]
        mumax <- range(c(srangepos, srangeneg), na.rm = FALSE)[2L]

        ccia <- function(alpha) {
            ## Check if the statistic exceeds both quantiles
            ## first: otherwise 'uniroot' won't work anyway
            statu <- fsa(mumin, zq = qperm(nulldistr, alpha / 2))
            statl <- fsa(mumax, zq = qperm(nulldistr, 1 - alpha / 2))
            if (sign(statu) == sign(statl)) {
                warning(paste("Samples differ in location:",
                              "Cannot compute confidence set,",
                              "returning NA"))
                return(c(NA, NA))
            }
            u <- uniroot(fsa, c(mumin, mumax),
                         zq = qperm(nulldistr, alpha / 2), ...)$root
            l <- uniroot(fsa, c(mumin, mumax),
                         zq = qperm(nulldistr, 1 - alpha / 2), ...)$root
            ## The process of the statistics does not need to be
            ## increasing: sort is ok here.
            sort(c(u, l))
        }

        cint <- switch(alternative,
                    "two.sided" = ccia(alpha),
                    "greater"   = c(ccia(alpha*2)[1L], Inf),
                    "less"      = c(0, ccia(alpha*2)[2L])
                )
        attr(cint, "conf.level") <- level

        ## Check if the statistic exceeds both quantiles first.
        statu <- fsa(mumin, zq = 0)
        statl <- fsa(mumax, zq = 0)
        if (sign(statu) == sign(statl)) {
            ESTIMATE <- NA
            warning("Cannot compute estimate, returning NA")
        } else
            ESTIMATE <- uniroot(fsa, c(mumin, mumax), zq = 0, ...)$root
        names(ESTIMATE) <- "ratio of scales"
    }

    list(conf.int = cint, estimate = ESTIMATE)
}


simconfint_location <- function(object, level = 0.95,
    approx = FALSE, ...) {

    if (!(is_Ksample(object@statistic) &&
        extends(class(object), "MaxTypeIndependenceTest")))
        stop(sQuote("object"), " is not an object of class ",
             sQuote("MaxTypeIndependenceTest"),
             " representing a K sample problem")

    xtrans <- object@statistic@xtrans
    if (!all(apply(xtrans, 2L, function(x) all(x %in% c(-1, 0, 1)))))
        stop("Only differences are allowed as contrasts")

    estimate <- c()
    lower <- c()
    upper <- c()

    ## transform max(abs(x))-type distribution into a
    ## distribution symmetric around zero
    nnd <- object@distribution
    nnd@q <- function(p) {
        pp <- p
        if (p > 0.5)
            pp <- 1 - pp
        q <- qperm(object@distribution, 1 - pp)
        if (p < 0.5)
            -q
        else
            q
    }

    for (i in seq_len(ncol(xtrans))) {
        thisset <- abs(xtrans[, i]) > 0
        ip <- new("IndependenceProblem",
                  object@statistic@x[thisset, , drop = FALSE],
                  object@statistic@y[thisset, , drop = FALSE],
                  object@statistic@block[thisset])

        itp <- independence_test(ip, teststat = "scalar",
            distribution = "asymptotic", alternative = "two.sided",
            yfun = object@statistic@ytrafo, ...)

        ci <- confint_location(itp@statistic, nnd,
                               level = level, approx =approx, ...)
        estimate <- c(estimate, ci$estimate)
        lower <- c(lower, ci$conf.int[1L])
        upper <- c(upper, ci$conf.int[2L])
    }
    RET <- data.frame(Estimate = estimate, lower = lower, upper = upper)
    colnames(RET)[2L:3L] <-
        paste(c((1 - level) / 2, 1 - (1 - level) / 2) * 100, "%")
    rownames(RET) <- colnames(object@statistic@xtrans)
    attr(RET, "conf.level") <- level
    RET
}


### Exact Clopper-Pearson CI for a binomial parameter
confint_binom <- function(x, n, conf.level = 0.99) {
    alpha <- (1 - conf.level) / 2
    lower <- if (x == 0)
                 0
             else
                 qbeta(alpha, x, n - x + 1)
    upper <- if (x == n)
                 1
             else
                 qbeta(1 - alpha, x + 1, n - x)
    ci <- c(lower, upper)
    attr(ci, "conf.level") <- conf.level
    ci
}


### Mid-p CI for a binomial parameter (see Berry and Armitage, 1995)
confint_midp <- function(x, n, conf.level = 0.99) {
    alpha <- 1 - conf.level
    if (x > 0 & x < n) {
        f <- function(a, p)
            ## 0.5 * dbinom(...) + pbinom(..., lower.tail = FALSE)
            mean(pbinom(c(x, x - 1), n, a, lower.tail = TRUE)) - p
        UR <- function(p)
            uniroot(f, c(0, 1), p, tol = .Machine$double.eps)$root
        ci <- c(UR(1 - alpha / 2), UR(alpha / 2))
    } else if (x == 0) {
        ci <- c(0, 1 - alpha^(1 / n))
    } else if (x == n) {
        ci <- c(alpha^(1 / n), 1)
    }
    attr(ci, "conf.level") <- conf.level
    ci
}