Revision 00e9ffa4b909c19e58bd23e35ffa4c07541e4b94 authored by Torsten Hothorn on 19 June 2007, 00:00:00 UTC, committed by Gabor Csardi on 19 June 2007, 00:00:00 UTC
1 parent 6fe1cef
StreitbergRoehmel.c
/**
Exact Distribution of Two-Sample Permutation Tests
Streitberg & Roehmel Algorithm
*\file StreitbergRoehmel.c
*\author $Author: hothorn $
*\date $Date: 2006-10-17 13:17:12 +0200 (Tue, 17 Oct 2006) $
*/
#include <R.h>
#include <Rmath.h>
#include <Rdefines.h>
/*
length(scores) <= 1.000.000 observations only.
*/
#define PERM_MAX_N 1000000
/**
The density of the permutation distribution for
the independent two sample problem.
REFERENCES
Bernd Streitberg & Joachim R\"ohmel (1986),
Exact distributions for permutations and rank tests:
An introduction to some recently published algorithms.
Statistical Software Newsletter 12(1), 10-17.
Bernd Streitberg & Joachim R\"ohmel (1987),
Exakte Verteilungen f\"ur Rang- und Randomisierungstests
im allgemeinen $c$-Stichprobenfall.
EDV in Medizin und Biologie 18(1), 12-19 (in german).
*\param score_a score vector (typically c(1,1,...,1))
*\param score_b score vector (typically ranks)
*\param m_a integer indicating the sum of m_a elements of score_a
*\param m_b integer indicating the sum of m_b elements of score_b
*\param retProb logical indicating whether the density (TRUE) or
the matrix of all permutations should be returned
*/
SEXP R_cpermdist2(SEXP score_a, SEXP score_b, SEXP m_a, SEXP m_b,
SEXP retProb) {
/*
compute the joint permutation distribution of the
sum of the first m_a elements of score_a and score_b
(usualy score_a = rep(1, length(score_a)) and
score_b = Data scores, Wilcoxon, Ansari ...).
In this case the exact conditional distribution
in the simple independent two-sample problem is computed.
*/
int n, im_a, im_b; /* number of observations */
SEXP H, x; /* matrix of permutations and vector
of probabilities */
int i, j, k, sum_a = 0, sum_b = 0, s_a = 0, s_b = 0, isb;
double msum = 0.0; /* little helpers */
int *iscore_a, *iscore_b; /* pointers to R structures */
double *dH, *dx;
/* some basic checks, should be improved */
if (!isVector(score_a))
error("score_a is not a vector");
n = LENGTH(score_a);
if (!isVector(score_b))
error("score_b is not a vector");
if (LENGTH(score_b) != n)
error("length of score_a and score_b differ");
iscore_a = INTEGER(score_a);
iscore_b = INTEGER(score_b);
if (TYPEOF(retProb) != LGLSXP)
error("retProb is not a logical");
im_a = INTEGER(m_a)[0]; /* cosmetics only */
im_b = INTEGER(m_b)[0];
if (n > PERM_MAX_N)
error("n > %d in R_cpermdistr2", PERM_MAX_N);
/* compute the total sum of the scores and check if they are >= 0 */
for (i = 0; i < n; i++) {
if (iscore_a[i] < 0)
error("score_a for observation number %d is negative", i);
if (iscore_b[i] < 0)
error("score_b for observation number %d is negative", i);
sum_a += iscore_a[i];
sum_b += iscore_b[i];
}
/*
optimization according to Streitberg & Roehmel
*/
sum_a = imin2(sum_a, im_a);
sum_b = imin2(sum_b, im_b);
/*
initialize H
*/
PROTECT(H = allocVector(REALSXP, (sum_a + 1) * (sum_b + 1)));
dH = REAL(H);
for (i = 0; i <= sum_a; i++) {
isb = i * (sum_b + 1);
for (j = 0; j <= sum_b; j++) dH[isb + j] = 0.0;
}
/*
start the Shift-Algorithm with H[0,0] = 1
*/
dH[0] = 1.0;
for (k = 0; k < n; k++) {
s_a += iscore_a[k];
s_b += iscore_b[k];
/*
compute H up to row im_aand column im_b
Note:
sum_a = min(sum_a, m)
sum_b = min(sum_b, c)
*/
for (i = imin2(im_a, s_a); i >= iscore_a[k]; i--) {
isb = i * (sum_b + 1);
for (j = imin2(im_b,s_b); j >= iscore_b[k]; j--)
dH[isb + j] +=
dH[(i - iscore_a[k]) * (sum_b + 1) + (j - iscore_b[k])];
}
}
/*
return the whole matrix H
Note: use matrix(H, nrow=m_a+1, byrow=TRUE) in R
*/
if (!LOGICAL(retProb)[0]) {
UNPROTECT(1);
return(H);
} else {
PROTECT(x = allocVector(REALSXP, sum_b));
dx = REAL(x);
/*
get the values for sample size im_a (in row m) and sum it up
*/
isb = im_a * (sum_b + 1);
for (j = 0; j < sum_b; j++) {
dx[j] = dH[isb + j + 1];
msum += dx[j];
}
/*
compute probabilities and return the density x to R
the support is min(score_b):sum(score_b)
[dpq] stuff is done in R
*/
for (j = 0; j < sum_b; j++)
dx[j] = dx[j]/msum;
UNPROTECT(2);
return(x);
}
}
/**
The density of the permutation distribution for
the one sample problem.
REFERENCES
Bernd Streitberg & Joachim R\"ohmel (1986),
Exact distributions for permutations and rank tests:
An introduction to some recently published algorithms.
Statistical Software Newsletter 12(1), 10-17.
Bernd Streitberg & Joachim R\"ohmel (1987),
Exakte Verteilungen f\"ur Rang- und Randomisierungstests
im allgemeinen $c$-Stichprobenfall.
EDV in Medizin und Biologie 18(1), 12-19 (in german).
*\param scores score vector (such as rank(abs(y)) for wilcoxsign_test)
*/
SEXP R_cpermdist1(SEXP scores) {
/*
compute the permutation distribution of the sum of the
absolute values of the positive elements of `scores'
*/
int n; /* number of observations */
SEXP H; /* vector giving the density of statistics 0:sum(scores) */
int i, k, sum_a = 0, s_a = 0; /* little helpers */
int *iscores;
double msum = 0.0;
double *dH;
n = LENGTH(scores);
iscores = INTEGER(scores);
if (n > PERM_MAX_N)
error("n > %d in R_cpermdist1", PERM_MAX_N);
for (i = 0; i < n; i++) sum_a += iscores[i];
/*
Initialize H
*/
PROTECT(H = allocVector(REALSXP, sum_a + 1));
dH = REAL(H);
for (i = 0; i <= sum_a; i++) dH[i] = 0.0;
/*
start the shift-algorithm with H[0] = 1.0
*/
dH[0] = 1.0;
for (k = 0; k < n; k++) {
s_a = s_a + iscores[k];
for (i = s_a; i >= iscores[k]; i--)
dH[i] = dH[i] + dH[i - iscores[k]];
}
/*
get the number of permutations
*/
for (i = 0; i <= sum_a; i++)
msum += dH[i];
/*
compute probabilities and return the density H to R
[dpq] stuff is done in R
*/
for (i = 0; i <= sum_a; i++)
dH[i] = dH[i]/msum; /* 0 is a possible realization */
UNPROTECT(1);
return(H);
}
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...