### Regression tests for the 2 sample problem, i.e.,
### testing the independence of a numeric variable
### `y' and a binary factor `x' (possibly blocked)
set.seed(290875)
library(coin)
isequal <- coin:::isequal
### generate data
dat <- data.frame(x = gl(2, 50), y = rnorm(100), block = gl(5, 20))[sample(1:100,
75),]
### Wilcoxon Mann-Whitney Rank Sum Test
### asymptotic distribution
ptwo <- wilcox.test(y ~ x, data = dat, correct = FALSE, exact = FALSE)$p.value
pless <- wilcox.test(y ~ x, data = dat, alternative = "less",
correct = FALSE, exact = FALSE)$p.value
pgreater <- wilcox.test(y ~ x, data = dat, alternative = "greater",
correct = FALSE, exact = FALSE)$p.value
stopifnot(isequal(pvalue(wilcox_test(y ~ x, data = dat)), ptwo))
stopifnot(isequal(pvalue(wilcox_test(y ~ x, data = dat, alternative = "less")),
pless))
stopifnot(isequal(pvalue(wilcox_test(y ~ x, data = dat, alternative = "greater")),
pgreater))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "asympt",
ytrafo = function(data) trafo(data, numeric_trafo = rank))), ptwo))
### check direct supply of a function via ytrafo
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "asympt",
ytrafo = rank)), ptwo))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "asympt",
ytrafo = function(data) trafo(data, numeric_trafo = rank),
alternative = "less")), pless))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "asympt",
ytrafo = function(data) trafo(data, numeric_trafo = rank),
alternative = "greater")), pgreater))
### exact distribution
ptwo <- wilcox.test(y ~ x, data = dat, exact = TRUE)$p.value
pless <- wilcox.test(y ~ x, data = dat, alternative = "less", exact = TRUE)$p.value
pgreater <- wilcox.test(y ~ x, data = dat, alternative = "greater",
exact = TRUE)$p.value
stopifnot(isequal(pvalue(wilcox_test(y ~ x, data = dat, distribution = "exact")),
ptwo))
stopifnot(isequal(pvalue(wilcox_test(y ~ x, data = dat, alternative = "less",
distribution = "exact")), pless))
stopifnot(isequal(pvalue(wilcox_test(y ~ x, data = dat, alternative = "greater",
distribution = "exact")), pgreater))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "exact",
ytrafo = function(data) trafo(data, numeric_trafo = rank))), ptwo))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "exact",
ytrafo = function(data) trafo(data, numeric_trafo = rank),
alternative = "less")), pless))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "exact",
ytrafo = function(data) trafo(data, numeric_trafo = rank),
alternative = "greater")), pgreater))
### approximated distribution
rtwo <- pvalue(wilcox_test(y ~ x, data = dat, distribution = "approx")) / ptwo
rless <- pvalue(wilcox_test(y ~ x, data = dat, alternative = "less",
distribution = "approx")) / pless
rgreater <- pvalue(wilcox_test(y ~ x, data = dat, alternative = "greater",
distribution = "approx")) / pgreater
stopifnot(all(c(rtwo, rless, rgreater) > 0.9 &
c(rtwo, rless, rgreater) < 1.1))
### add block examples
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "asympt"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "approx"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "exact"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "asympt",
alternative = "less"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "approx",
alternative = "less"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "exact",
alternative = "less"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "asympt",
alternative = "greater"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "approx",
alternative = "greater"))
pvalue(wilcox_test(y ~ x | block, data = dat, distribution = "exact",
alternative = "greater"))
### sanity checks
try(wilcox_test(x ~ y, data = dat))
try(wilcox_test(x ~ y | y, data = dat))
### Ansari-Bradley Test
### asymptotic distribution
ptwo <- ansari.test(y ~ x, data = dat, correct = FALSE, exact = FALSE)$p.value
pless <- ansari.test(y ~ x, data = dat, alternative = "less",
correct = FALSE, exact = FALSE)$p.value
pgreater <- ansari.test(y ~ x, data = dat, alternative = "greater",
correct = FALSE, exact = FALSE)$p.value
stopifnot(isequal(pvalue(ansari_test(y ~ x, data = dat)), ptwo))
stopifnot(isequal(pvalue(ansari_test(y ~ x, data = dat, alternative = "less")),
pless))
stopifnot(isequal(pvalue(ansari_test(y ~ x, data = dat, alternative = "greater")),
pgreater))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "asympt",
ytrafo = function(data) trafo(data, numeric_trafo = ansari_trafo))), ptwo))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "asympt",
ytrafo = function(data) trafo(data, numeric_trafo = ansari_trafo),
alternative = "greater")), pless))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "asympt",
ytrafo = function(data) trafo(data, numeric_trafo = ansari_trafo),
alternative = "less")), pgreater))
### exact distribution
ptwo <- ansari.test(y ~ x, data = dat, exact = TRUE)$p.value
pless <- ansari.test(y ~ x, data = dat, alternative = "less", exact = TRUE)$p.value
pgreater <- ansari.test(y ~ x, data = dat, alternative = "greater",
exact = TRUE)$p.value
### : Definition of two-sided P-values!
(isequal(pvalue(ansari_test(y ~ x, data = dat, distribution = "exact")),
ptwo))
stopifnot(isequal(pvalue(ansari_test(y ~ x, data = dat, alternative = "less",
distribution = "exact")), pless))
stopifnot(isequal(pvalue(ansari_test(y ~ x, data = dat, alternative = "greater",
distribution = "exact")), pgreater))
### : Definition of two-sided P-values!
(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "exact",
ytrafo = function(data) trafo(data, numeric_trafo = ansari_trafo))), ptwo))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "exact",
ytrafo = function(data) trafo(data, numeric_trafo = ansari_trafo),
alternative = "greater")), pless))
stopifnot(isequal(pvalue(oneway_test(y ~ x, data = dat, distribution = "exact",
ytrafo = function(data) trafo(data, numeric_trafo = ansari_trafo),
alternative = "less")), pgreater))
### approximated distribution
rtwo <- pvalue(ansari_test(y ~ x, data = dat, distribution = "approx")) / ptwo
rless <- pvalue(ansari_test(y ~ x, data = dat, alternative = "less",
distribution = "approx")) / pless
rgreater <- pvalue(ansari_test(y ~ x, data = dat, alternative = "greater",
distribution = "approx")) / pgreater
### ???
(all(c(rtwo, rless, rgreater) > 0.9 &
c(rtwo, rless, rgreater) < 1.1))
### add block examples
### sanity checks
try(ansari_test(x ~ y, data = dat))
try(ansari_test(x ~ y | y, data = dat))
### the remaining three candidates
### asymptotic distribution
ptwo <- wilcox.test(y ~ x, data = dat, correct = FALSE)$p.value
pless <- wilcox.test(y ~ x, data = dat, alternative = "less",
correct = FALSE)$p.value
pgreater <- wilcox.test(y ~ x, data = dat, alternative = "greater",
correct = FALSE)$p.value
oneway_test(y ~ x, dat = dat)
oneway_test(y ~ x, dat = dat, alternative = "less")
oneway_test(y ~ x, dat = dat, alternative = "greater")
normal_test(y ~ x, dat = dat)
normal_test(y ~ x, dat = dat, alternative = "less")
normal_test(y ~ x, dat = dat, alternative = "greater")
median_test(y ~ x, dat = dat)
median_test(y ~ x, dat = dat, alternative = "less")
median_test(y ~ x, dat = dat, alternative = "greater")
### confidence intervals, cf Bauer 1972
location <- data.frame(y = c(6, 20, 27, 38, 46, 51, 54, 57,
10, 12, 15, 21, 32, 40, 41, 45),
x = gl(2, 8))
ci <- confint(normal_test(y ~ x, data = location,
conf.int = TRUE, di = "ex"))
stopifnot(isequal(ci$conf.int, c(-6, 30)))
stopifnot(isequal(ci$estimate, 11))
wt <- wilcox.test(y ~ x, data = location,
conf.int = TRUE)
ci <- confint(wilcox_test(y ~ x, data = location,
conf.int = TRUE, di = "ex"))
stopifnot(isequal(wt$confint, ci$confint))
stopifnot(isequal(wt$estimate, ci$estimate))
scale <- data.frame(y = c(-101, -35, -13, 10, 130, 236, 370, 556,
-145, -140, -40, -30, 2, 27, 68, 290),
x = gl(2, 8))
ci <- confint(ansari_test(y ~ x, data = scale, conf.int = TRUE,
di = "ex", conf.level = 0.988))
stopifnot(isequal(ci$conf.int, c(10, 556) / c(68, 27)))
stopifnot(isequal(ci$estimate, mean(c(35/30, 370 / 290))))
### ties handling
y1 <- c(14 , 18 , 2 , 4 , -5 , 14 , -3 , -1 , 1 , 6 , 3 , 3)
x1 <- c(8 , 26 , -7 , -1 , 2 , 9 , 0 , -4 , 13 , 3 , 3 , 4)
pvalue(wilcoxsign_test(y1~x1,alter="greater",dist=exact()))
pvalue(wilcoxsign_test(y1~x1,alter="greater",dist=exact(), ties = "Pratt"))