Content - 1ee335d7bab07ea98724a91ff6cb98ca54500e57 - 6bb4cc9/src/empvario.cc

empvario.cc
/*
 Authors  Martin Schlather, schlather@math.uni-mannheim.de 

 calculation of the empirical variogram

 Copyright (C) 2002 - 2014 Martin Schlather, 

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 3
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111 - 1307, USA.
*/


#include <math.h>  
#include <stdio.h>  
#include <stdlib.h>
#include "RF.h"
 

// z coordinate run the fastest in values, x the slowest   

#define TOOLS_DIM 1
#define TOOLS_MEMORYERROR 2
#define TOOLS_XERROR 3
#define TOOLS_BIN_ERROR 4
#define TOOLS_UNKNOWN_CHAR 5


double EV_TIMES_LESS = 2.0; // 0:always new method; inf:always usual method
int EV_SYSTEM_LARGER = 100; // 0:always new method; inf:always usual method

double variogram(double a, double b) {
  double dummy = a - b; 
  return dummy * dummy; 
}

double Efunction(double a, double b) {return a + b; } 
/*   see Schlather (2001), Bernoulli, 7(1), 99 - 117
    Schlather (????), Math. Nachr, conditionally accepted
    Schlather, Ribeiro, Diggle (in prep.) 
 */

double kmm(double a, double b) {return a * b; }  /*  
					     Stoyan's k_mm function
					     see Stoyan, Kendall, & Mecke, 
					     or Stoyan & Stoyan (both Wiley) 
					   */

int LOW = -1; 

void empiricalvariogram(double  * x, int  * dim, int  * lx, 
			double  * values, int  * repet, int  * grid, 
			double  * bin, int  * nbin, int  * charact, 
			double  * vario, 
			double  * sd, // 
			// note that within the subsequent algorithm
			// the sd of the Efct ist correctly calculated:
			//   instead for summing up Efct^2 in sd one should
			//   sum up a^2 + b^2 !
			//   so finally only NAs are returned in this case
			//   use emp
			int  * n)
/*     x      : matrix of coordinates, ### rows define points
 *    dim    : dimension, 
 *    lx     : length of x, y, and z
 *    values : (univariate) data for the points
 *    repet  : number of repetitions (the calculated emprical variogram will 
 *             be an overall average)
 *    grid   : (boolean) if true lx must be 3 [ x==c(start, end, step) ]
 *    bin    : specification of the bins 
 *             sequence is checked whether it is strictly isotone
 *    nbin   : number of bins. i.e. length(bin) - 1
 *    character: 1 : function E
 *               2 : Stoyan's kmm function
 *               default : semivariogram
 *    vario    : empirical variogram
 *    n      : number of pairs for each bin
  */
{ 
  int i, ii, j, d, halfnbin, gridpoints[MAXVARIODIM], dimM1, EV_METHOD, err, 
    low, cur, up;    
  long totalpoints, totalpointsrepet, segment, 
    factor =  -1; 
  double ( * characteristic)(double, double) = NULL; 
  double  * xx[MAXVARIODIM], maxdist[MAXVARIODIM], dd, 
     * BinSq = NULL; 

  if ( * dim > MAXVARIODIM) {err = TOOLS_DIM; goto ErrorHandling; }
    
  for (segment = i = 0; i< * dim; i++, segment += * lx) xx[i] = &(x[segment]); 
  if (xx[0]==NULL) {err=TOOLS_XERROR; goto ErrorHandling; }
  for (i=0; i< * nbin; i++) {
    if (bin[i] >= bin[i + 1])  {err = TOOLS_BIN_ERROR; goto ErrorHandling; }
  }


  dimM1 =  *dim - 1; 
  halfnbin =  * nbin / 2; 

  switch ( *charact) {
  case 0 :
    characteristic = variogram; 
    factor = 2; 
    break; 
  case 1 : 
    characteristic = Efunction; 
    factor = 2; 
    break; 
  case 2  : 
    characteristic = kmm; 
    factor = 1; 
    break; 
  default : 
    err = TOOLS_UNKNOWN_CHAR; goto ErrorHandling; 
   }
   
  if ((BinSq = (double  * ) MALLOC(sizeof(double) *  ( * nbin + 1)))==NULL) {
    err = TOOLS_MEMORYERROR; goto ErrorHandling; 
  }
  for (i = 0; i< * nbin; i++){
    sd[i] = vario[i] = 0.0; 
    n[i] = 0; 
  }
  for (i = 0; i <=  * nbin; i++){ 
    BinSq[i] = bin[i] > 0 ?  bin[i] * bin[i] : bin[i]; 
  }


  //////////////////////////////////// GRID ////////////////////////////////////
  if ( * grid) {
    
    error("use option 'fft' for data on a grid."); 

    int d1, d2, head, tail, swapstart; 
    double p1[MAXVARIODIM], p2[MAXVARIODIM], distSq, dx[MAXVARIODIM]; 
    int delta[MAXVARIODIM], deltaTwo[MAXVARIODIM], maxtail;    
    long indextail[MAXVARIODIM], indexhead[MAXVARIODIM],
      valuePtail, valuePointhead, 
      segmentbase[MAXVARIODIM], DeltaTwoSegmentbase[MAXVARIODIM], 
      DeltaFourSegmentbase[MAXVARIODIM], 
      SegmentbaseTwo[MAXVARIODIM], SegmentbaseFour[MAXVARIODIM]; 
    double psq[MAXVARIODIM], p[MAXVARIODIM], maxbinsquare; 
    bool forbid;    

    // does not work!! :
    //GetGridSize(x, y, z, dim, &(gridpoints[0]), &(gridpoints[1]), &(gridpoints[2])); 
    // totalpoints = gridpoints[0] * gridpoints[1] * gridpoints[2]; 
    //
    // instead :
    for (dd = 0.0, totalpoints = 1, i = 0; i <= dimM1; i++) {
      gridpoints[i] = (int) (xx[i][XLENGTH]); 
      totalpoints  *= gridpoints[i]; 
      maxdist[i] = (gridpoints[i] - 1) * xx[i][XSTEP]; 
      dd += maxdist[i] * maxdist[i]; 
    }
    EV_METHOD = (int) ( ((EV_TIMES_LESS * BinSq[ * nbin]) < dd) ||
			  (EV_SYSTEM_LARGER<totalpoints)); // ? large system ?
    for (i = 0; i <= dimM1; i++) { dx[i] = xx[i][XSTEP] * 0.99999999; }
    totalpointsrepet = totalpoints *  *repet; 
    segmentbase[0] = 1; // here x runs the fastest;  
    SegmentbaseTwo[0] = segmentbase[0] << 1; 
    SegmentbaseFour[0] = segmentbase[0] << 2; 
    for (i = 1; i <= dimM1; i++) { 
      segmentbase[i] = segmentbase[i - 1] * gridpoints[i - 1]; 
      SegmentbaseTwo[i] = segmentbase[i] << 1; 
      SegmentbaseFour[i] = segmentbase[i] << 2; 
    } 




    EV_METHOD = 0; /// !!! since EV_METHOD = 1 has invalid read !!!!!!
    
    switch (EV_METHOD) {
    case 0 :
      for (i = 0; i <= dimM1; i++) {indexhead[i] = 0; p1[i] = 0.0; }
       
      // loop through all pair of points, except (head, tail) for head=tail, 
      // which is treated separately at the end, if necessary (not relevant for 
      // variogram itself, but for function E and V)
      for (head = 0; head<totalpoints; ) {
	for (i = 0; i <= dimM1; i++) {
	  indextail[i] = 0; 
	  p2[i] = 0.0; 
	}
	for (tail = 0; tail<head; ) {
	  distSq = 0.0; 
	  for (i = 0; i <= dimM1; i++) {
	    double dx2; 
	    dx2 = p1[i] - p2[i]; 
	    distSq  += dx2 * dx2; 
	  }
	  
	  if ((distSq>BinSq[0]) && (distSq <= BinSq[ * nbin])) { 
	    /*  search which bin distSq in  */
	    low = 0; up =   * nbin; /*  21.2.01,  * nbin - 1  */  
	    cur =  halfnbin; 
	    while (low!=up) { 
	      if (distSq> BinSq[cur]) {low = cur;} else {up = cur-1;} // (* ; *]
	      cur = (up + low + 1) / 2; 
	    }
	    for (segment = 0; segment<totalpointsrepet; segment += totalpoints){
	      double x2; 
	      x2 = characteristic(values[head + segment],
				  values[tail + segment]); 
	      vario[low] += x2; 
	      sd[low] += x2 * x2; 
	    }
	    assert(low < *nbin); 
	    n[low]++; 
	  }	
	  tail++; 
	  d2 = dimM1; 
	  indextail[d2]++;
	  p2[d2] +=dx[d2]; 
	  while (indextail[d2] >= gridpoints[d2]) { 
	    indextail[d2] = 0; p2[d2] = 0; 
	    d2--; 
	    assert(d2 >= 0); 	
	    indextail[d2]++; 
	    p2[d2] +=dx[d2]; 
	  }
	}   
	head++; 
	if (head<totalpoints) {
	  d1 = dimM1;  
	  indexhead[d1]++; p1[d1] +=dx[d1];      
	  while (indexhead[d1] >= gridpoints[d1]) { 
	    indexhead[d1] = 0; 
	    p1[d1] = 0; 
	    d1--;  
	    assert(d1 >= 0); 
	    indexhead[d1]++; 
	    p1[d1] +=dx[d1]; 
	  }
	}
      }
      if(PL >= PL_SUBDETAILS) 
	{
	for(i = 0; i< * nbin; i++){
	  PRINTF(" %d:%f(%f, %d)",
		 i, vario[i]/(double)(factor * n[i]), vario[i], n[i]); 
	}
	PRINTF("  XXXY\n"); 
	}      
	break; 

	/* ******************************************************** */

    case 1 :
      /*  idea: 
	 1) fix a vector `delta' of positive components
	 2) search for any pair of points whose distSqance vector dv equals
	    |dv[i]|=delta
	    and some the f(value1, value2) up.
	 
	realisation :
	1) one preferred direction, namely dimM1 - direction (of 0..dimM1)
	2) for the minor directions, swap the values of delta from positive 
	   to negative (all 2^{dimM1} posibilities)   - - > indexhead
	   EXCEPTION :  delta[dimM1]==0. Then only the minor directions up to 
                        dimM1 - 2 are swapped!
                        (otherwise values will be counted twice as indextail is 
                         running through all the positions with
                         coordinate[dimM1]==0)
           if delta[dimM1 - 1] is also 0 then only minor direction up to dimM1 - 3 
           are swapped, etc.
        3) indextail running through all positions with coordinate[dimM1]==0. 
	   This is not very effective if the grid is small in comparison to the
	   right boundary of the bins!    
       */
          
      maxbinsquare = BinSq[ * nbin]; 
      for (i = 0; i <= dimM1; i++) {
	delta[i] = 0; 
	psq[i] = p[i] = 0.0; 
      }
      for (; ; ) { // delta
	do {
	  d = 0;  
	  delta[d]++;  // this excludes distSq==0 !	
	  p[d] +=dx[d]; psq[d] = p[d] * p[d]; 
	  for (distSq = 0.0, i = 0; i<=dimM1; i++) {distSq += psq[i]; }
	  while ((distSq>maxbinsquare)  || (p[d]>maxdist[d])) {/*  ( * )  */ 
	    delta[d] = 0; psq[d] = p[d] = 0.0; 
	    d++;   
	    if (d<=dimM1) { 
	      delta[d]++; 
	      p[d] +=dx[d];
	      psq[d] = p[d] * p[d]; 
	      for (distSq = 0.0, i = 0; i<=dimM1; i++) distSq += psq[i];
	    } else break; 	
	  }
	} while ((distSq<=BinSq[0]) && (d<=dimM1)); 
	if (d>dimM1) break; 
	if (PL >= PL_SUBDETAILS) 
	    PRINTF("\n {%d %d %d}", delta[0], delta[1], delta[2]); 
	assert((distSq<=BinSq[ * nbin]) && (distSq>BinSq[0])); 
	{
	  low = 0; up = *nbin; /*   */ cur =  halfnbin; 
	  while(low!=up){
	    if (distSq> BinSq[cur]) low = cur; else up = cur-1;// ( * ; * ] 
	    cur = (up + low + 1) / 2; 
	  }
	}	

	
	i = 0; 
	while ((i<dimM1) && (delta[i]==0)) i++; 
	swapstart = i + 1; 
	
	for(i = 0; i<=dimM1; i++) {
	  deltaTwo[i]=delta[i] << 1; 
	  DeltaTwoSegmentbase[i]  =  delta[i] * SegmentbaseTwo[i]; 
	  DeltaFourSegmentbase[i] =  delta[i] * SegmentbaseFour[i]; 
	}

	for (i = 0; i<=dimM1; i++) indextail[i] = 0;
	valuePtail = valuePointhead = 0; 
	
	maxtail = gridpoints[0] - delta[0];  
	for(; ; ) { // indextail
	  for (i = 0, valuePointhead = 0; i<=dimM1; i++) {
	    indexhead[i] = indextail[i] + delta[i]; /*  Do not use  - delta[i], as last 
						   dimension must always be 
						    + delta[]  */
	    valuePointhead  += indexhead[i] * segmentbase[i]; 
	  }  // one point is given by indextail, the other by indexhead	
	  for (; ; ) {// indexhead
	    forbid = false; // in case dim=1 ! (???, 21.2.01)
	    for (i = 0; i<=dimM1; i++) { // for (i = 1; ... ) should be enough...
	      if ((forbid = indexhead[i] < 0) || indexhead[i] >= gridpoints[i]) 
		break; 
	    }
	    if (!forbid) { 	  
	      long segtail, seghead; 
	      for (tail = 0; tail<maxtail; tail++) {	
		segtail = valuePtail + tail; 
		seghead = valuePointhead + tail; 
		for (segment=0; segment<totalpointsrepet; segment+=totalpoints){
		  double x2; 
		  x2 = characteristic(values[segtail + segment], 
				      values[seghead + segment]); 
		  vario[low] += x2; 
		  sd[low] += x2 * x2; 
		} 	
		assert(low< *nbin); 
		n[low]++; 
	      }	
	    }
	    
	    d = swapstart;  
	    if (d<=dimM1) {
	      indexhead[d] -=deltaTwo[d];  
	      valuePointhead -= DeltaTwoSegmentbase[d]; 
	      while (indexhead[d] < indextail[d] - delta[d] || delta[d]==0) { 
		if (delta[d] != 0) {
		  // so, indexhead[d]<indextail[d] - delta[d]; 
		  // i.e.indexhead[d] = indextail[d] - 3 * delta[d]
		  valuePointhead += DeltaFourSegmentbase[d]; 
		  indexhead[d] = indextail[d] + delta[d]; 
		}
		d++; 
		if (d<=dimM1) {
		  indexhead[d] -= deltaTwo[d]; 
		  valuePointhead -= DeltaTwoSegmentbase[d]; 
		} else break; 
	      }
	    } else break; 
	    if (d>dimM1) break; 
	  } //for(; ; ), indexhead
	  
	  d = 1;  
	  if (d<=dimM1) {    // not satisfied for one dimensional data! ?
	    indextail[d]++; 
	    valuePtail += segmentbase[d]; 
	    while (indextail[d] >= gridpoints[d]) { 
	      valuePtail -= indextail[d] * segmentbase[d]; 
	      indextail[d] = 0; 
	      d++; 
	      if (d<=dimM1) {
		indextail[d]++; 
		valuePtail += segmentbase[d]; 
	      } else break; 
	    }
	  } else break; 
	  if (d>dimM1) break; 
	} // for(; ; ), indextail
      }  //for(; ; ), delta
      if(PL >= PL_SUBDETAILS) {
	PRINTF("\n"); 
	for(i = 0; i< * nbin; i++){
	  PRINTF(" %d:%f(%f, %d)  ", 
		 i, vario[i]/(double)(factor * n[i]), vario[i], n[i]); 
	}
	PRINTF(" YYY\n"); 
      }
      break; 
    default : BUG;
    }
  } else {
    ////////////////////////////////////  ARBITRARY /////////////////////////////
    totalpoints =  * lx; 
    totalpointsrepet = totalpoints *  * repet; 
    for (i = 0; i<totalpoints; i++){ // to have a better performance for large 
      //                            data sets, group the data first into blocks
      for (j = 0; j<i; j++){
        double distSq; 
      	for (distSq = 0.0, d = 0; d<=dimM1; d++) {
	  double dx; 
	  dx = xx[d][i] - xx[d][j]; 
	  distSq  += dx * dx; 
	}		
	// see also above
	//distSq = sqrt(distSq); 26.2.
	if (distSq>BinSq[0] && distSq<=BinSq[*nbin]) { 
	  low = 0; 
	  up = *nbin; 
	  cur =  halfnbin; 
	  while (low!=up) {  
	    if (distSq> BinSq[cur]) low = cur; else up = cur - 1; // ( * ; * ]  
	    cur = (up + low + 1) / 2; 
	  } 	
	  for (segment = 0; segment<totalpointsrepet; segment  += totalpoints) { 
	    double x2; 
	    x2 = characteristic(values[i + segment], values[j + segment]); 
	    if (R_FINITE(x2)) {
	      vario[low] += x2; 
	      sd[low] += x2 * x2; 
	      n[low]++; 
	    }
	  }	
	  assert(low< * nbin); 
	}
	/*   */
      }
    }
  }
  
  // ii is used in switch by this value: !!!!
  ii = 0; while(ii <= *nbin && bin[ii] < 0) ii++;
  ii--; 

  // 11.10.03: factor ohne  * repet to be clearer
  // case ii : separat behandelt, neu geschrieben
  //  if ( * repet>1) {
  //   for (j=0; j< * nbin; j++) n[j]  *=  * repet; 
  //}

  for (i = 0; i<ii; i++) sd[i] = vario[i] =  RF_NA;
  for (i++; i< * nbin; i++){
    if (n[i]>0) { 
      vario[i] /= (double) (factor * n[i]); 
      if (n[i]>1) 
	sd[i] = sqrt(sd[i] / (factor * factor * (double) (n[i] - 1))
		     - (double) (n[i]) / (double) (n[i] - 1) * 
		     vario[i] * vario[i]); 
      else sd[i] = RF_INF; 
    } else { 
      vario[i] =  RF_NA; 
      sd[i] = RF_INF; 
    }
  }

  i = ii; // sicherheitshalber - -  eigentlich sollte unten kein i mehr auftauchen
  long jj; 
  switch ( * charact) {
  case 0 :
    if ((ii< * nbin)&&(ii >= 0)){
      n[ii]  += totalpointsrepet; 
      vario[ii] /=  (double) (factor * n[ii]); 
      sd[ii] = sqrt(sd[ii] / (factor * factor * (double) (n[ii] - 1))
		    - (double) (n[ii]) / (double) (n[ii] - 1) * vario[ii] * vario[ii]); 
    }    
    break; 
  case 1 : // E, calculating E(0) correctly, taking into account that the
    //        bin including 0 may include further points
    if ((ii< * nbin)&&(ii >= 0)){
      n[ii]  *= factor; // * 2
      for (jj = 0; jj<totalpointsrepet; jj++) {
	vario[ii]  += values[jj]; 
      }
      n[ii]  += totalpointsrepet; 
      vario[ii] = vario[ii] / (double) n[ii]; 
      for (i = 0; i< * nbin; i++) sd[i] = RF_NA; // beste Loesung, da sonst
      // nur Chaos gibt - -  da sd falsch berechnet ist.
      // siehe MPP package fuer die richtige Berechnung.
    }
    break; 
  case 2 : // kmm, calculating kmm(0} and dividing all the values by mean^2
    double mean, square, qu, x2; 
    for (jj = 0, mean = square = qu = 0.0; jj<totalpointsrepet; jj++) { 
      mean  += values[jj]; 
      square  += (x2 = values[jj] * values[jj]); 
      qu  += x2 * x2; 
    }
    mean /= (double) totalpointsrepet; 
    square /= (double) totalpointsrepet; 
    if ((ii< * nbin)&&(ii >= 0)){
      n[ii]  += totalpointsrepet;    
      vario[ii] =  (vario[ii] + square) / (double) n[ii]; // factor is 1
      sd[ii] = sqrt(((sd[ii] + qu) - (double) (n[ii]) * vario[ii] * vario[ii])
		    / ((double) (n[ii] - 1))); 
    }
    mean *= mean; 
    for (jj = 0; jj< * nbin; jj++) { vario[jj] /= mean; }
    break; 
  default : BUG;    
  }

  if (BinSq!=NULL) free(BinSq); 
  return; 
  
 ErrorHandling:
  if (BinSq!=NULL) free(BinSq); 
  for (i = 0; i< * nbin; i++){sd[i] = vario[i] = RF_NA; } 
  switch (err) {
  case TOOLS_DIM :
    error("dimension exceed max dimension of empirical variogram estimation"); 
  case TOOLS_MEMORYERROR :  
    error("Memory alloc failed in empiricalvariogram.\n"); 
  case TOOLS_XERROR :  
    error("The x coordinate may not be NULL.\n"); 
  case TOOLS_BIN_ERROR :
    error("Bin components not an increasing sequence.\n"); 
  case TOOLS_UNKNOWN_CHAR:
    error("unknown type of second order characteristic"); 
  default : BUG;
  }
}