Content - 2824f61f5c8d49765a621ae1f761024f1726d6ac - 851415f/src/vandeWiel.c

visit type:
Tip revision: 9c42537ac8466d7f2dadcf8a3b64e8da6ae3198f authored by Torsten Hothorn on 28 February 2007, 00:00:00 UTC
version 0.6-0
Tip revision: 9c42537
vandeWiel.c

/**
    Exact Distribution of Two-Sample Permutation Tests
    van de Wiel split-up Algorithm

    Author: Mark van de Wiel (2001-2005) <m.a.v.d.wiel@TUE.nl>
            with modifications for R by 
            Torsten Hothorn <Torsten.Hothorn@R-project.org>
      
    *\file vandeWiel.c
    *\author $Author: hothorn $
    *\date $Date: 2005-07-28 17:04:29 +0200 (Thu, 28 Jul 2005) $
*/
                    
#include <R.h>
#include <Rmath.h>
#include <Rdefines.h>
                    

/**
    The probability distribution for the independent two sample problem.
    
    REFERENCE
    
    M.A. van de Wiel (2001). The split-up algorithm: a fast symbolic method 
    for computing p-values of rank statistics, Computational Statistics, 
    16, 519-538.
    
*/
                                        
typedef struct {
    long length;
    double *c;
    double *x;
} celW;

double binomi(int m, int n) { 

    double bin = 1;
    double bin1 = 1;
    double bin2 = 1;
    int i, j;
        
    for (i = 1; i <= n; i++) bin1 = bin1 * (m + 1 -i);
    for (j = 1; j <= n; j++) bin2 = bin2 * j;
    bin = bin1/bin2;
    
    return(bin);
}

celW** reserveW(int a, int b)
{
    long res = 0;
    int i, j;
    celW** W;
    
    /* <FIXME>: need to free memory in case Calloc barfs
       Writing R Extension advertises .on.exit but
       I still need a pointer to the memory.
       </FIXME>
    */

    W = Calloc(a + 1, celW*);

    for (i = 0; i <= a; i++)
        W[i] = Calloc(b + 1, celW);
        
    for (i = 0; i <= a; i++) {
        for (j = i; j <= b; j++) {
            res = (long) binomi(j,i);
            /* the majority of memory is freed on exit and error
               thanks to S_alloc */
            W[i][j].c = (double *) S_alloc(res, sizeof(double));
            W[i][j].x = (double *) S_alloc(res, sizeof(double));
        }
        R_CheckUserInterrupt();
    }
    return(W);
}

void FreeW(int a, celW **W)
{
     int i;
 
     for (i = a; i >= 0; i--)
         Free(W[i]);
         
     Free(W);
}

void initW(int a, int b, celW **W) {

    int i, j;

    for (i = 1; i <= a; i++)
    for (j = 0; j <= b; j++) {
        W[i][j].length = 0;
    }
    for (j = 0; j <= b; j++) {
        W[0][j].length = 1;
        W[0][j].c[0] = 1;  
        W[0][j].x[0] = 0;  
    }
}

void mult(celW *tem, int a, int b, int rank, double *rs) {

    /*

    mult multiplies the polynomial c_i*x^(l_i) by x^(rs[rank]), 
    which means adding the exponents

    */

    int j;
    for (j = 0; j < tem[0].length; j++)
        tem[0].x[j] += rs[rank];
}

void plus(celW **W, celW *tempie, int a, int b) {

    /*

    plus adds terms with the same exponents after multiplication 
    with 1 + x^(rs[rank]), so c1*x^j + c2*x^j becomes (c1+c2)*x^j

    */

    int elep = 0;
    int k = 0;
    int test = 1;
    int i, j;
    
    for (i = 0; i < W[a][b-1].length; i++) {

        test = 1;
        
        for (j = elep; j < tempie[0].length && test==1; j++) {
        
            if (tempie[0].x[j] - 0.000001 <= W[a][b-1].x[i]
                && W[a][b-1].x[i] <= tempie[0].x[j] + 0.000001) {

                tempie[0].c[j] += W[a][b-1].c[i];
                test = 0;
                elep = j;             
            }
        }
         
        if (test == 1) {
            tempie[0].c[tempie[0].length + k] = W[a][b-1].c[i];
            tempie[0].x[tempie[0].length + k] = W[a][b-1].x[i];
            k++;
        }
        R_CheckUserInterrupt();
    }
    tempie[0].length += k;
}

void mymergesort(celW temptw, long tijd)
{

    /*
    
    mymergesort composes one sorted list (increasing exponents of 
    the polynomial) from two separately sorted lists. c1*x^3 + c2*x^5 
    and  c3*x^4 + c4*x^7  becomes  c1*x^3 + c3*x^4 + c2*x^5 + c4*x^7.

    */

    celW copiep;
    int t1 = 0; 
    int t2 = 0;
    int i, j;

    copiep.c = Calloc(temptw.length, double);
    copiep.x = Calloc(temptw.length, double);
        
    for (i = 0; i < temptw.length; i++) {
        copiep.c[i] = temptw.c[i];
        copiep.x[i] = temptw.x[i];
    }
    
    for (j = 0; j < temptw.length; j++) {
        if (t1 <= tijd-1 && t2 <= temptw.length - tijd - 1) {
            if (copiep.x[t1] < copiep.x[tijd + t2]) {
                temptw.x[j] = copiep.x[t1];
                temptw.c[j] = copiep.c[t1];
                t1++;
            } else {
                temptw.x[j] = copiep.x[tijd + t2];
                temptw.c[j] = copiep.c[tijd + t2];
                t2++;
            }
        } else {
            if (t1 > tijd - 1) {
                temptw.x[j] = copiep.x[tijd + t2];
                temptw.c[j] = copiep.c[tijd + t2];
                t2++; 
            } else {   
                temptw.x[j] = copiep.x[t1];
                temptw.c[j] = copiep.c[t1];
                t1++;
            }
        }   
        R_CheckUserInterrupt();       
    } 
    Free(copiep.c);
    Free(copiep.x);
}

void fillcell(celW **W, int i1, int j1, int r, double *rs) {
    
    /*

    fillcell makes the new recursive polynomial W[i1][j1] from 
    W[i1 - 1][j1 - 1] and W[i1][j1 - 1]. j1 is the total number of 
    rank scores assigned so far to either of the two groups, 
    i1 is the number of rank scores assigned to the smallest sample.

    */

    long tijd;
    celW temp2;
    int j, j2;

    temp2.c = Calloc(W[i1 - 1][j1 - 1].length + 
                     W[i1][j1 - 1].length, double);
    temp2.x = Calloc(W[i1 - 1][j1 - 1].length + 
                     W[i1][j1 - 1].length, double);
    temp2.length = W[i1 - 1][j1 - 1].length;

    for (j = 0; j < temp2.length; j++) {
       temp2.c[j] = W[i1 - 1][j1 - 1].c[j];
       temp2.x[j] = W[i1 - 1][j1 - 1].x[j];
    }

    if (i1 == j1) {       
        mult(&temp2, i1 - 1, j1 - 1, r, rs); 
    } else {           
        mult(&temp2, i1 - 1, j1 - 1, r, rs);                        
        tijd = temp2.length;                                
        plus(W, &temp2, i1, j1);                            
        mymergesort(temp2, tijd);                              
    }

    W[i1][j1].length = temp2.length;

    for (j2 = 0; j2 < temp2.length; j2++) {
        W[i1][j1].c[j2] = temp2.c[j2];
        W[i1][j1].x[j2] = temp2.x[j2];
    }          

    Free(temp2.c);
    Free(temp2.x);
}

void mirrorW(celW **W,int ce, int bep, int start, double *rs) {   

    /* 

    mirrorW contains a trick to speed op computations considerably. 
    By symmetry arguments it is easy to find W[i][tot] from W[tot-i][tot]

    */

    double totsum = 0;
    long len;
    int r, h;
    
    for (r = 0; r < bep; r++) totsum += rs[start + r];
    
    len = W[bep-ce][bep].length;
        
    for (h = 0; h < len; h++) {
        W[ce][bep].length = W[bep-ce][bep].length;
        W[ce][bep].c[len-1-h] = W[bep-ce][bep].c[h];
        W[ce][bep].x[len-1-h] = totsum - W[bep-ce][bep].x[h];
    }
}

void makeW(celW **W, int a, int b, int start, double *rs) {


    /* 

    makeW simply determines whether a new polynomial W[i][j] 
    can be found from mirrorW (if W[j-i][j] is available) or needs 
    to be constructed via multiplication etc.

    */

    long i,j;
    int rank;
    int hulp;

    for (j = 1; j <= b; j++) {  /* verander naar 0!! */

        if (j < a) {
            hulp = j; 
        } else {
            hulp = a;
        }

        for (i=1; i <= hulp; i++) {   
            if (i <= j/2 || j == 1) {
                rank = start+j;
                fillcell(W, i, j, rank - 1, rs);
            } else {
                mirrorW(W, i, j, start, rs);
            }                               
            R_CheckUserInterrupt();
        }
    }
}

void cumulcoef(celW **W, int i1, int j1) {


    /*

    cumulcoef recursively adds the coefficients of the 
    sorted polynomial. So, 3*x^4 + 4*x^6 + 2*x^7  becomes  
    3*x^4 + 7*x^6 + 9*x^7.  

    */
    double coef = 0;
    int i;
     
    for(i = 0; i < W[i1][j1].length; i++) {
        W[i1][j1].c[i] += coef;
        coef = W[i1][j1].c[i];
    }
}

double numbersmall(int c, int b, double ob, celW **W1, celW **W2) {


    /*

    numbersmall is the core of the split-up algorithm. 
    
    It efficiently combines two polynomials which have used 
    complementary sets of rank scores and computes their contribution 
    to the tail-probability. 

    */

    double tot = 0;
    long le;
    int test = 1;
    int be = b/2;
    int bp = (b+1)/2;
    int tempel = 0;
    int i, j, h;
    
    for (h = 0; h <= c; h++) {

        tempel = 0;
        le = W2[c-h][bp].length;
        
        for (i = 0; i < W1[h][be].length; i++) {
            test = 1;
            for (j = tempel; j < le && test == 1; j++) {
                if (W1[h][be].x[i] + W2[c-h][bp].x[le-j-1] <= ob) {
                    tot += W1[h][be].c[i] * W2[c - h][bp].c[le - j -1];
                    tempel = j;
                    test = 0;
                }
            }
        }
    }
    return(tot);
}
       
SEXP R_split_up_2sample(SEXP scores, SEXP m, SEXP obs) {
	
    /*

    R interface to the split-up algorithm. 

    `scores' is a REAL vector giving the scores of the total sample 
    and `m' is a scalar integer with the sample size of one group.
    `obs' is the scalar observed test statistic, namely the
    sum of the `m' scores measured in one group.

    */

    int b, c, d, u;
    double tot, bino, prob;
    double ob;  
    SEXP ans;

    celW **W1;
    celW **W2;
    double *rs;

    b = LENGTH(scores);
    rs = REAL(scores);
    c = INTEGER(m)[0];
    d = b - INTEGER(m)[0];
    ob = REAL(obs)[0];

    /* total number of possible permutations */
    bino = binomi(b, c);

    /* allocate and initialise memory */
    W1 = reserveW(c, (b+1)/2);
    initW(c, (b+1)/2, W1);
    W2 = reserveW(c, (b+1)/2);
    initW(c, (b+1)/2, W2);
    
    makeW(W1, c, b/2, 0, rs);  
    makeW(W2, c, (b+1)/2, b/2, rs);

    for (u = 0; u <= c; u++) cumulcoef(W2, u, (b+1)/2);

    /* number of permutations <= ob */
    tot = numbersmall(c, b, ob, W1, W2);
    
    /* probability */
    prob = tot/bino; 
    
    /* free memory: this will _not_ take place 
       in case of an error */
    FreeW(c, W1);
    FreeW(c, W2);

    /* return to R */
    PROTECT(ans = allocVector(REALSXP, 1));
    REAL(ans)[0] = prob;
    UNPROTECT(1);
    return(ans);
}
Browse the archive

https://github.com/cran/coin