https://github.com/cran/coin
Raw File
Tip revision: cd5ace5b6355edbcf1277dc2a8cb00999df6f943 authored by Torsten Hothorn on 29 September 2009, 00:00:00 UTC
version 1.0-7
Tip revision: cd5ace5
bugfixes.Rout.save

R version 2.9.0 (2009-04-17)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> 
> ### Regression tests for fixed bugs
> 
> set.seed(290875)
> library(coin)
Loading required package: survival
Loading required package: splines
Loading required package: mvtnorm
Loading required package: modeltools
Loading required package: stats4
> isequal <- coin:::isequal
> 
> ### check if doxygen documentation is there
> stopifnot(nchar(system.file("documentation", "html", "index.html", 
+                             package = "coin")) > 29) 
> 
> ### I() returns objects of class AsIs which caused an error in `trafo'
> df <- data.frame(x1 = rnorm(100), x2 = rnorm(100), x3 = gl(2, 50))
> independence_test(I(x1 / x2) ~ x3, data = df)

	Asymptotic General Independence Test

data:  I(x1/x2) by x3 (1, 2) 
Z = -0.4491, p-value = 0.6534
alternative hypothesis: two.sided 

> independence_test(I(x1 < 0) ~ x3, data = df)

	Asymptotic General Independence Test

data:  I(x1 < 0) by x3 (1, 2) 
Z = 0.7966, p-value = 0.4257
alternative hypothesis: two.sided 

> 
> ### expectation was wrong when varonly = TRUE in case both
> ### xtrafo and ytrafo were multivariate 
> if (require(multcomp)) {
+     df <- data.frame(x = runif(30), y = runif(30), z = gl(3, 10))
+     a <- independence_test(x + y ~ z, data = df,
+          distribution = approximate(B = 19999),
+          xtrafo = function(data) trafo(data, factor_trafo = function(x)
+              model.matrix(~x - 1) %*% t(contrMat(table(x), "Tukey"))))
+     b <- independence_test(x + y ~ z, data = df,
+          xtrafo = function(data) trafo(data, factor_trafo = function(x)
+              model.matrix(~x - 1) %*% t(contrMat(table(x), "Tukey"))))
+     isequal(expectation(a), expectation(b))
+ }
Loading required package: multcomp
[1] TRUE
> 
> 
> ### `statistic' for linear and standardized statistics was wrong in case of 
> ### scores
> data("jobsatisfaction")
> stopifnot(unique(dim(statistic(lbl_test(jobsatisfaction), "linear"))) == 1)
> stopifnot(unique(dim(statistic(lbl_test(jobsatisfaction), "standardized"))) == 1)
> 
> 
> ### support() failed in most cases
> df <- data.frame(x = runif(20), y = runif(20), z = gl(2, 10))
> support(independence_test(x ~ z, data = df))
[1] -4.264891  4.264891
> support(independence_test(x ~ z, data = df, teststat = "quad"))
[1]  0.00000 19.51142
> ite <- independence_test(I(round(x, 1)) ~ z, data = df, dist = exact())
> ae <- support(ite)
> de <- sapply(ae, function(x) dperm(ite, x))
> sum(de)
[1] 1
> ita <- independence_test(I(round(x, 1)) ~ z, data = df, 
+                          dist = approximate(B = 100000))
> aa <- support(ita)
> da <- sapply(aa, function(x) dperm(ita, x))
> sum(da)
[1] 1
> mean(round(ae, 10) %in% round(aa, 10))
[1] 1
> 
> plot(aa, da, type = "s", lty = 1)
> lines(ae, de, type = "s", lty = 2)
> itas <- independence_test(I(round(x, 1)) ~ z, data = df)
> lines(ae[-1], diff(sapply(ae, function(x) pperm(itas, x))), lty = 3)
> legend("topleft", lty = 1:3, legend = c("approx", "exact", "asympt"), bty = "n")
> 
> ### check correct handling of multiple censoring indicators (in modeltools)
> ### was never wrong, just in case...
> data("photocar", package = "coin")
> i1 <- independence_test(Surv(time, event) + Surv(dmin, tumor) + ntumor ~ group,
+                   data = photocar)
> i2 <- independence_test(Surv(time, event) ~ group, data = photocar)
> i3 <- independence_test(Surv(dmin, tumor) ~ group, data = photocar)
> 
> stopifnot(max(abs(statistic(i1, "standardized")[,1] - 
+                   statistic(i2, "stand"))) < sqrt(.Machine$double.eps))
> stopifnot(max(abs(statistic(i1, "standardized")[,2] - 
+                   statistic(i3, "stand"))) < sqrt(.Machine$double.eps))
> 
> ### check new var_trafo argument
> x <- rnorm(20)
> y <- gl(2, 10)
> a <- trafo(data.frame(x = x, y = y), numeric_trafo = normal_trafo)
> b <- trafo(data.frame(x = x, y = y), var_trafo = list(x = normal_trafo))
> stopifnot(isequal(a, b))
> 
> ### check for multiple ordered factors
> mydf <- data.frame(x = ordered(gl(4, 5)), y = ordered(gl(5, 4)), 
+                    z = rnorm(20))
> it1 <- independence_test(x + y ~ z , data = mydf)
> stopifnot(isequal(drop(statistic(it1, "linear")), 
+           c(statistic(independence_test(x ~ z , data = mydf), "linear"),
+             statistic(independence_test(y ~ z , data = mydf), "linear"))))
> it1 <- independence_test(x + z ~ y , data = mydf)
> stopifnot(isequal(drop(statistic(it1, "linear")),
+           c(statistic(independence_test(x ~ y , data = mydf), "linear"),
+             statistic(independence_test(z ~ y , data = mydf), "linear"))))
> it1 <- independence_test(z ~ x + y , data = mydf)
> stopifnot(isequal(drop(statistic(it1, "linear")),
+           c(statistic(independence_test(z ~ x , data = mydf), "linear"),
+             statistic(independence_test(z ~ y , data = mydf), "linear"))))
> 
> ### NA's and weights
> mydf <- data.frame(x = 1:10, y = gl(2, 5), w = rep(2, 10))
> s <- statistic(independence_test(x ~ y, data = mydf, weights = ~ w), "linear")
> stopifnot(s == 30)
> mydf$x[1] <- NA
> s <- statistic(independence_test(x ~ y, data = mydf, weights = ~ w), "linear")
> stopifnot(s == 28)
> 
> ### two observations only
> mydf <- data.frame(x = 1:10, y = factor(rep(c(1, 2), 5)))
> independence_test(y ~ x, data = mydf, subset = c(1, 6))

	Asymptotic General Independence Test

data:  y by x 
Z = -1, p-value = 0.3173
alternative hypothesis: two.sided 

> independence_test(y ~ x, data = mydf, subset = c(1, 2))

	Asymptotic General Independence Test

data:  y by x 
Z = -1, p-value = 0.3173
alternative hypothesis: two.sided 

> try(independence_test(y ~ x, data = mydf, subset = 1))
Error in validObject(.Object) : 
  invalid class "IndependenceProblem" object: FALSE
> 
> ### names of expectation and covariance
> YOY <- data.frame(length = c(46, 28, 46, 37, 32, 41, 42, 45, 38, 44,
+                              42, 60, 32, 42, 45, 58, 27, 51, 42, 52,
+                              38, 33, 26, 25, 28, 28, 26, 27, 27, 27,
+                              31, 30, 27, 29, 30, 25, 25, 24, 27, 30),
+                   site = factor(c(rep("I", 10), rep("II", 10),
+                                   rep("III", 10), rep("IV", 10))))
> 
> it <- independence_test(length ~ site, data = YOY,
+     ytrafo = function(data) trafo(data, numeric_trafo = rank),
+     teststat = "quad")
> expectation(it)
  I  II III  IV 
205 205 205 205 
> covariance(it)
            I        II       III        IV
I   1019.0385 -339.6795 -339.6795 -339.6795
II  -339.6795 1019.0385 -339.6795 -339.6795
III -339.6795 -339.6795 1019.0385 -339.6795
IV  -339.6795 -339.6795 -339.6795 1019.0385
> 
> mydf <- data.frame(x = rnorm(10), y = rnorm(10), z = gl(2, 5))
> it <- independence_test(x + y ~ z, data = mydf)
> statistic(it, "linear")
          x         y
1 -2.066068 0.3292961
> expectation(it)
       1:x        1:y 
-1.6340021 -0.9750075 
> covariance(it)
           1:x        1:y
1:x  0.4417981 -0.8139286
1:y -0.8139286  2.5231787
> 
> ### maxstat_trafo
> n <- seq(from = 5, to = 100, by = 1)
> for (i in n) {
+    x <- round(rnorm(i) * 10, 1)
+    xm <- maxstat_trafo(x)
+    stopifnot(min(c(mean(xm[,1]), 1 - mean(xm[,ncol(xm)])) - 0.1) >
+              -.Machine$double.eps)
+ }
> 
> ### formula evaluation in `parent.frame()', spotted by Z
> foo <- function(x, y) independence_test(y ~ x)
> a <- 1:10
> b <- 1:10
> foo(a, b)

	Asymptotic General Independence Test

data:  y by x 
Z = 3, p-value = 0.0027
alternative hypothesis: two.sided 

> x <- 1
> y <- 1
> foo(a, b)

	Asymptotic General Independence Test

data:  y by x 
Z = 3, p-value = 0.0027
alternative hypothesis: two.sided 

> 
> ### factors with only one level
> dat <- data.frame(y = rnorm(100), x1 = runif(100), x2 = factor(rep(0, 100)))
> try(independence_test(y ~ x1  + x2, data = dat))
Error in factor_trafo(x) : 
  Can't deal with factors containing only one level
> 
> ### user specified g: names, MC
> me <- as.table(matrix(c( 6,  8, 10,
+                32, 47, 20), byrow = TRUE, nrow = 2,
+     dimnames = list(group = c("In situ", "Control"),
+                     genotype = c("AA", "AG", "GG"))))
> medf <- as.data.frame(me)
> 
> add <- c(0, 1, 2)
> dom <- c(0, 1, 1)
> rez <- c(0, 0, 1)
> g <- function(x) {
+     x <- unlist(x)
+     cbind(add[x], dom[x], rez[x])
+ }
> it <- independence_test(group ~ genotype, 
+     data = medf, weights = ~ Freq, xtrafo = g)
> statistic(it, "linear")
   In situ
X1      28
X2      18
X3      10
> 
> it <- independence_test(group ~ genotype,
+     data = medf, weights = ~ Freq, xtrafo = g,
+     distribution = approximate(B = 49999))
> pvalue(it)
[1] 0.06848137
99 percent confidence interval:
 0.06560306 0.07144217 

> 
> stopifnot(all.equal(statistic(independence_test(t(me), xtrafo = g), "linear"),
+                     statistic(it, "linear")))
> 
> ### alternative trafo for ordered variables didn't work
> ### spotted by Ludwig Hothorn <hothorn@biostat.uni-hannover.de>
> tmp <- data.frame(x = ordered(sample(1:3, 20, replace = TRUE)), y = rnorm(20))
> it1 <- independence_test(y ~ x, data = tmp, scores = list(x = c(1, 1, 2)))
> g <- function(x) c(1, 1, 2)[unlist(x)]
> it2 <- independence_test(y ~ x, data = tmp, xtrafo = g)
> it3 <- independence_test(y ~ x, data = tmp, 
+     xtrafo = function(data) trafo(data, ordered_trafo = g))
> stopifnot(all.equal(statistic(it1), statistic(it2)))
> stopifnot(all.equal(statistic(it1), statistic(it3)))
> 
> ### precision problems in SR algorithm, >= <= issue
> ### spotted by "Fay, Michael (NIH/NIAID) [E]" <mfay@niaid.nih.gov>
> x <- c(1,2,3.1,4,5,6)
> g <- factor(c(0,0,0,1,1,1))
> it <- independence_test(x ~ g, distribution = exact())
> stopifnot(pvalue(it) == 0.1)
> itMC <- independence_test(x ~ g, distribution = approximate(99999))
> ci <- attr(pvalue(itMC), "conf.int")
> stopifnot(ci[1] < pvalue(it) && ci[2] > pvalue(it))
> 
> ### any() not applied to logicals
> ### spotted by R-2.7.0 and Kaspar Daniel Hansen
> x <- c(1.83,  0.50,  1.62,  2.48, 1.68, 1.88, 1.55, 3.06, 1.30)
> y <- c(0.878, 0.647, 0.598, 2.05, 1.06, 1.29, 1.06, 3.14, 1.29)
> ### must not give a warning
> wilcoxsign_test(x ~ y, alternative = "greater", 
+                 distribution = exact())

	Exact Wilcoxon-Signed-Rank Test

data:  y by x (neg, pos) 
	 stratified by block 
Z = 2.0732, p-value = 0.01953
alternative hypothesis: true mu is greater than 0 

> 
> ### inconsistencies with confidence intervals
> ### spotted by Fritz Scholz <fscholz@u.washington.edu>
> Route = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
+ 2L, 2L, 2L), .Label = c("A", "B"), class = "factor")
> Route.Time = c(5.8, 5.8, 5.9, 6, 6, 6, 6.3, 6.3, 6.4, 6.5, 6.5, 6.5,
+ 6.8, 7.1, 7.3, 10.2)
> Route2 <- factor(as.character(Route), levels = c("B", "A"))
> 
> wilcox_test(Route.Time~Route,conf.int=TRUE)

	Asymptotic Wilcoxon Mann-Whitney Rank Sum Test

data:  Route.Time by Route (A, B) 
Z = -3.0245, p-value = 0.002491
alternative hypothesis: true mu is not equal to 0 
95 percent confidence interval:
 -3.6999810 -0.4999788 
sample estimates:
difference in location 
            -0.9000475 

> wilcox_test(Route.Time~Route2,conf.int=TRUE)

	Asymptotic Wilcoxon Mann-Whitney Rank Sum Test

data:  Route.Time by Route2 (B, A) 
Z = 3.0245, p-value = 0.002491
alternative hypothesis: true mu is not equal to 0 
95 percent confidence interval:
 0.4999788 3.6999810 
sample estimates:
difference in location 
             0.9000352 

> wilcox_test(Route.Time~Route,conf.int=TRUE, distr = exact())

	Exact Wilcoxon Mann-Whitney Rank Sum Test

data:  Route.Time by Route (A, B) 
Z = -3.0245, p-value = 0.001374
alternative hypothesis: true mu is not equal to 0 
95 percent confidence interval:
 -3.7 -0.5 
sample estimates:
difference in location 
                  -0.9 

> wilcox_test(Route.Time~Route2,conf.int=TRUE, distr = exact())

	Exact Wilcoxon Mann-Whitney Rank Sum Test

data:  Route.Time by Route2 (B, A) 
Z = 3.0245, p-value = 0.001374
alternative hypothesis: true mu is not equal to 0 
95 percent confidence interval:
 0.5 3.7 
sample estimates:
difference in location 
                   0.9 

> 
> ### evaluate all formulae in xxx_tests parent.frame
> ### spotted by Matthieu Dubois <matthdub@gmail.com>
> y <- as.data.frame(matrix(rnorm(200), ncol=2))
> group <- gl(2, 50)
> lapply(y, function(var) wilcox_test(var ~ group))
$V1

	Asymptotic Wilcoxon Mann-Whitney Rank Sum Test

data:  var by group (1, 2) 
Z = -0.3929, p-value = 0.6944
alternative hypothesis: true mu is not equal to 0 


$V2

	Asymptotic Wilcoxon Mann-Whitney Rank Sum Test

data:  var by group (1, 2) 
Z = 0.7997, p-value = 0.4239
alternative hypothesis: true mu is not equal to 0 


> 
> ### make sure a parallel design with 
> ### n = 2 isn't confused with a block design
> ### spotted by Fritz Scholz <fscholz@u.washington.edu>
> dat <- data.frame(y = c(1, 2), g = gl(2, 1))
> wt <- wilcox_test(y ~ g, data = dat, distribution = exact())
> s <- support(wt)
> stopifnot(all(dperm(wt, s) == c(0.5, 0.5)))
> 
> ### dpqperm
> wte <- wilcox_test(Route.Time ~ Route, distribution = exact())
> wta <- wilcox_test(Route.Time ~ Route, distribution = approximate())
> de <- dperm(wte, support(wte))
> pe <- pperm(wte, support(wte))
> stopifnot(max(abs(cumsum(de) - pe)) < .Machine$double.eps)
> da <- dperm(wta, support(wta))
> pa <- pperm(wta, support(wta))
> stopifnot(max(abs(cumsum(da) - pa)) < .Machine$double.eps)
> qperm(wte, seq(from = 0.1, to = 0.9, by = 0.1))
[1] -1.3125012 -0.8559791 -0.5135874 -0.2853264  0.0000000  0.2853264  0.5706527
[8]  0.8559791  1.2554359
> qperm(wta, seq(from = 0.1, to = 0.9, by = 0.1))
[1] -1.3125012 -0.8559791 -0.5135874 -0.2282611  0.0000000  0.2282611  0.5135874
[8]  0.7989138  1.2554359
> 
back to top