Raw File
xvalid.c
/*
    Gstat, a program for geostatistical modelling, prediction and simulation
    Copyright 1992, 2003 (C) Edzer J. Pebesma

    Edzer J. Pebesma, e.pebesma@geog.uu.nl
    Department of physical geography, Utrecht University
    P.O. Box 80.115, 3508 TC Utrecht, The Netherlands

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version. As a special exception, linking 
    this program with the Qt library is permitted.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    (read also the files COPYING and Copyright)
*/

/*
 * xvalid.c: cross validation
 */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "defs.h"
#include "userio.h"
#include "data.h"
#include "utils.h"
#include "debug.h"
#include "glvars.h"
#include "getest.h"
#include "select.h"
#include "stat.h"
#include "report.h"
#include "xvalid.h"
 
#define IS_VAR(m) (m == SKR || m == OKR || m == UKR || m == LSLM)

static int cross = 0;

static void remove_where_from_selection(DATA *data, DPOINT *pt, int at0);

void cross_valid(DATA **data) {
/*
 * DATE: 		10 sept 1992
 * BY: 			Edzer J. Pebesma.
 * RETURNS:		none
 * PURPOSE:		perform cross validation on data[0]->list[i], 
 * given data[0]->list[j] (j = 0..i-1,i+1..data[0]->n) and data[j] (j > 0) 
 * PARAMETERS: data is a data array, data[0]->n_list > 0; 
 * SIDE EFFECT:	some memory allocation and free_ing
 */
	int i, j, k, var, sel_max_ori, n_Xtot;
	double *xdata, *xpred, *xdiff, *xstd, *xzscore;
	static double *est;
	double *X = NULL;

	DPOINT where, *block;

	cross = 1;
/* get global variables: */
	if (o_filename != NULL)
		write_points(o_filename, data[0], NULL, NULL, get_n_outfile());
	for (i = n_Xtot = 0; i < get_n_vars(); i++)
		n_Xtot += data[i]->n_X;
	X = (double *) emalloc(n_Xtot * sizeof(double));
	for (i = 0; i < n_Xtot; i++)
		X[i] = 1.0; /* will lead to correct results on ordinary cokriging */
	est = (double *) emalloc (get_n_outfile() * sizeof(double));
	sel_max_ori = data[0]->sel_max;
	data[0]->sel_max = MIN(data[0]->n_list, sel_max_ori) + 1; /* prevent "global" */
					 
	block = get_block_p();
	if (block->x > 0.0 || block->y > 0.0 || block->z > 0.0 || 
			get_data_area() != NULL)
		pr_warning("xvalid.c: cross validating with non-zero block/area size");

/* initialise: */
	var = IS_VAR(get_method());
/* var = TRUE if variance is returned, otherwise FALSE */
	xdata = (double *) emalloc(data[0]->n_list * sizeof(double));
	xpred = (double *) emalloc(data[0]->n_list * sizeof(double));
	xdiff = (double *) emalloc(data[0]->n_list * sizeof(double));
	xstd = (double *) emalloc(data[0]->n_list * sizeof(double));
	xzscore = (double *) emalloc(data[0]->n_list * sizeof(double));
	for (i = 0; i < data[0]->n_list; i++) { /* initialise all arrays: */
		set_mv_double(&(xdata[i]));
		set_mv_double(&(xpred[i]));
		set_mv_double(&(xdiff[i]));
		set_mv_double(&(xstd[i]));
		set_mv_double(&(xzscore[i]));
	}
	for (i = 0; i < data[0]->n_list; i++) { 
		xdata[i] = data[0]->list[i]->attr;
		where = *(data[0]->list[i]);
		for (j = 0; j < data[0]->n_X; j++)
			X[j] = where.X[j];
		where.X = X;
		/* qtree_rebuild(data); */
		for (j = 0; j < get_n_outfile(); j++)
			set_mv_double(&est[j]);
		for (j = 0; j < get_n_vars(); j++) {
			select_at(data[j], &where);
			remove_where_from_selection(data[j], &where, j == 0);
		}
		get_est(data, get_method(), &where, est);
		if (o_filename != NULL) {
			for (j = 1; j < get_n_vars(); j++) {
				if (!(data[j]->n_X == 1 && data[j]->colX[0] == 0)) {
					set_mv_double(&(est[2 * j]));
					set_mv_double(&(est[2 * j + 1]));
					for (k = 0; k < get_n_vars(); k++)
						if (k != j)
							set_mv_double(&(est[2 * get_n_vars() + LTI2(j,k)]));
				}
			}
			write_points(o_filename, data[0], &where, est, get_n_outfile());
		}
		if (! is_mv_double(&est[0])) {
			xpred[i] = est[0]; 
			if (! is_mv_double(&xdata[i]))
				xdiff[i] = xpred[i] - xdata[i];
			if (var) {
				if (! is_mv_double(&est[1])) 
					if (est[1] >= 0.0) {
						xstd[i] = sqrt(est[1]); 
						if (xstd[i] != 0.0 && !is_mv_double(&xdata[i]))
							xzscore[i] = xdiff[i]/xstd[i];
					}
			} 
		} else {
			set_mv_double(&xpred[i]); 
			set_mv_double(&xdiff[i]); 
			set_mv_double(&xstd[i]);
			set_mv_double(&xzscore[i]);
		} /* if .... */
		print_progress(i, data[0]->n_list);
	} /* for i, all records */
	/* 
	 * once and for all: copy original data pointers back: 
	 */
	data[0]->sel_max = sel_max_ori;
	print_progress(data[0]->n_list, data[0]->n_list);
	report_xvalid(xdata, xpred, xdiff, xstd, xzscore, data[0]->n_list, var);
	if (o_filename != NULL)
		write_points(NULL, NULL, NULL, NULL, 0); /* closes file */
	efree(xdata); 
	efree(xpred); 
	efree(xdiff); 
	efree(xstd); 
	efree(xzscore);
	efree(X);
	cross = 0;
	return;
}

static void remove_where_from_selection(DATA *data, DPOINT *pt, int at0) {
	/* remove point pt from data->sel */
	int n_sel, i;

	n_sel = data->n_sel;
	if (n_sel == 0)
		return;

	for (i = 0; i < data->n_sel; i++) {
		if (data->pp_norm2(data->sel[i], pt) == 0) {
			data->sel[i] = data->sel[data->n_sel-1]; /* copy last to i */
			data->n_sel--; /* cut 1 from list */
		}
	}
	if (at0 && n_sel == data->n_sel)
		ErrMsg(ER_NULL, "point(s) not found in remove_where_from_selection");
}
back to top