https://github.com/cran/gstat
Tip revision: 2dc01647537ce302e4e2b10aa8bea9e40d7e3134 authored by Edzer J. Pebesma on 21 May 2007, 00:00:00 UTC
version 0.9-39
version 0.9-39
Tip revision: 2dc0164
fit.c
/*
Gstat, a program for geostatistical modelling, prediction and
simulation Copyright 1992, 2003 (C) Edzer J. Pebesma
Edzer J. Pebesma, e.pebesma@geo.uu.nl
Department of physical geography, Utrecht University
P.O. Box 80.115, 3508 TC Utrecht, The Netherlands
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. As a special exception, linking
this program with the Qt library is permitted.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
(read also the files COPYING and Copyright)
*/
/*
* fit.c: fit variogram model to experimental variograms; gnuplot fit driver
*/
#include <stdio.h>
#include <stdlib.h> /* getenv() */
#include <string.h> /* strstr() */
#include <math.h> /* fabs(), sqrt() */
#include "matrix2.h"
#include "defs.h"
#include "defaults.h"
#include "userio.h"
#include "data.h"
#include "utils.h"
#include "read.h"
#include "debug.h"
#include "vario.h"
#include "sem.h"
#include "plot.h"
#include "glvars.h"
#include "reml.h"
#include "lm.h"
#include "fit.h"
#define FIT_LOG "fit.log"
#define NEARLY_ZERO 1e-30
static void wls_fit(VARIOGRAM *vp);
static double getSSErr(const VARIOGRAM *vp, PERM *p, LM *lm);
static void write_fx(FILE *, VARIOGRAM *v);
static void get_values(const char *fname, VARIOGRAM *v);
static void gnu_fit(VARIOGRAM *v);
static void correct_for_anisotropy(VARIOGRAM *v);
static int fill_weights(const VARIOGRAM *vp, PERM *p, LM *lm);
static int fit_GaussNewton(VARIOGRAM *vp, PERM *p, LM *lm,
int iter, int *bounded);
int fit_variogram(VARIOGRAM *v) {
DATA **d = NULL;
int i = 0;
long n = 0;
if (v->ev->refit == 0)
return 0;
if (v->ev->fit != NO_FIT && v->ev->fit != MIVQUE_FIT) {
if (! v->ev->cloud) {
while (n == 0 && i < v->ev->n_est)
n += v->ev->nh[i++]; /* check if estimates exist */
if (n == 0) /* bad luck */
return 1;
} else if (v->ev->n_est == 0)
return 1;
}
if (v->ev->fit != NO_FIT) {
if (v->ev->map)
ErrMsg(ER_IMPOSVAL, "cannot fit model to variogram map");
for (i = 0; i < v->n_models; i++)
if (v->part[i].sill == 0.0 && v->part[i].fit_sill != 0)
v->part[i].sill = 1.0; /* avoid lot'o trouble */
}
if ((v->ev->fit == WLS_FIT_MOD || v->ev->fit == WLS_GNUFIT_MOD) &&
!is_variogram(v))
pr_warning("this fit method is not recommended for covariograms");
v->ev->direction.x = sin(gl_alpha * PI / 180.0) * cos(gl_beta * PI / 180.0);
v->ev->direction.y = cos(gl_alpha * PI / 180.0) * cos(gl_beta * PI / 180.0);
v->ev->direction.z = sin(gl_beta * PI / 180.0);
switch (v->ev->fit) {
case NO_FIT:
break;
case OLS_FIT: /* BREAKTHROUGH: */
case WLS_FIT: /* BREAKTHROUGH: */
case WLS_FIT_MOD:
case WLS_NHH:
wls_fit(v);
break;
case WLS_GNUFIT: /* BREAKTHROUGH */
case WLS_GNUFIT_MOD:
gnu_fit(v);
break;
case MIVQUE_FIT:
if (v->id1 != v->id2)
return 1;
d = get_gstat_data();
reml_sills(d[v->id1], v);
break;
/*
case LMC:
d = get_gstat_data();
fit_lmc(d, v, ...
default:
ErrMsg(ER_IMPOSVAL, "fit_vgm(): unknown v->ev->fit");
*/
/* no default: force compile warning on missing option! */
}
return 0;
}
static void wls_fit(VARIOGRAM *vp) {
/*
* non-linear iterative reweighted least squares fitting of variogram model to
* sample variogram (..covariogram model to sample covariogram, cross, etc.)
* all information necessary is contained in *vp.
*
* uses Marquardt-Levenberg algorithm;
* the implementation follows gnuplot's fit.c
*/
static PERM *p = PNULL;
int i, j, n_iter = 0, bounded = 0, timetostop;
double SSErr, oldSSErr = DBL_MAX, step;
LM *lm;
p = px_resize(p, vp->ev->n_est);
if (! vp->ev->cloud) {
for (i = j = 0; i < (vp->ev->zero == ZERO_AVOID ?
vp->ev->n_est-1 : vp->ev->n_est); i++) {
if (vp->ev->nh[i] > 0)
p->pe[j++] = i;
}
p->size = j;
}
lm = init_lm(NULL);
if (gl_cn_max < 0.0)
lm->cn_max = 1.0/sqrt(MACHEPS);
else
lm->cn_max = gl_cn_max;
/* oldSSErr = getSSErr(vp, p, lm); */
do {
print_progress(n_iter, gl_iter);
if (DEBUG_VGMFIT)
printlog("%s: ", vp->descr);
if ((vp->fit_is_singular = fit_GaussNewton(vp, p, lm, n_iter, &bounded))) {
pr_warning("singular model in variogram fit");
print_progress(gl_iter, gl_iter);
vp->SSErr = getSSErr(vp, p, lm);
return;
}
update_variogram(vp);
SSErr = getSSErr(vp, p, lm);
/* we can't use lm->SSErr here since that's only in the
X-filled-with-derivatives, not the true residuals */
step = oldSSErr - SSErr;
if (SSErr > gl_zero)
step /= SSErr;
n_iter++;
if (DEBUG_VGMFIT)
printlog("after it. %d: SSErr %g->%g, step=%g (fit_limit %g%s)\n",
n_iter, oldSSErr, SSErr, step, gl_fit_limit,
bounded ? "; bounded" : "");
oldSSErr = SSErr;
if (step < gl_fit_limit && step >= 0.0 && bounded == 0)
timetostop = 1;
else if (n_iter > gl_iter)
timetostop = 1;
else
timetostop = 0;
} while (! timetostop);
print_progress(gl_iter, gl_iter);
if (n_iter == gl_iter)
pr_warning("No convergence after %d iterations", n_iter);
if (DEBUG_VGMFIT) {
printlog("# iterations: %d, SSErr %g, last step %g", n_iter, SSErr, step);
if (step < 0.0)
printlog(", last step was in the wrong direction.\n");
else
printlog("\n");
}
free_lm(lm);
vp->SSErr = SSErr;
return;
} /* wls_fit */
static double getSSErr(const VARIOGRAM *vp, PERM *p, LM *lm) {
int i;
double x, y, z, dz, SSErr;
/*
if (fill_weights(vp, p, lm))
return 0.0;
*/
for (i = 0, SSErr = 0.0; i < p->size; i++) {
x = vp->ev->direction.x * vp->ev->dist[p->pe[i]];
y = vp->ev->direction.y * vp->ev->dist[p->pe[i]];
z = vp->ev->direction.z * vp->ev->dist[p->pe[i]];
/* fill y with current residuals: */
dz = vp->ev->gamma[p->pe[i]] - (is_variogram(vp) ?
get_semivariance(vp, x, y, z) :
get_covariance(vp, x, y, z));
if (lm->weights != NULL)
SSErr += dz * dz * lm->weights->ve[i];
else
SSErr += dz * dz;
}
return SSErr;
}
static int fit_GaussNewton(VARIOGRAM *vp, PERM *p, LM *lm, int iter,
int *bounded) {
double s = 0.0, x, y, z;
int i, j, n_fit, model, fit_ranges = 0;
IVEC *fit = NULL;
VEC *start = NULL;
if (p->size == 0)
return 1;
fit = iv_resize(fit, 2 * vp->n_models);
/* index fit parameters: parameter fit->ive[j] corresponds to model i */
for (i = n_fit = 0; i < vp->n_models; i++) {
if (vp->part[i].fit_sill)
fit->ive[n_fit++] = i;
if (vp->part[i].fit_range) {
fit->ive[n_fit++] = i + vp->n_models; /* large -->> ranges */
fit_ranges = 1;
}
}
if (n_fit == 0) {
iv_free(fit);
return 0;
}
fit = iv_resize(fit, n_fit); /* shrink to fit */
lm->X = m_resize(lm->X, p->size, n_fit);
lm->y = v_resize(lm->y, p->size);
start = v_resize(start, n_fit);
for (i = 0; i < n_fit; i++) {
if (fit->ive[i] < vp->n_models) {
model = fit->ive[i];
start->ve[i] = vp->part[model].sill;
} else {
model = fit->ive[i] - vp->n_models;
start->ve[i] = vp->part[model].range[0];
}
}
for (i = 0; i < p->size; i++) {
x = vp->ev->direction.x * vp->ev->dist[p->pe[i]];
y = vp->ev->direction.y * vp->ev->dist[p->pe[i]];
z = vp->ev->direction.z * vp->ev->dist[p->pe[i]];
/* fill y with current residuals: */
if (is_variogram(vp))
s = get_semivariance(vp, x, y, z);
else
s = get_covariance(vp, x, y, z);
lm->y->ve[i] = vp->ev->gamma[p->pe[i]] - s;
/* fill X: */
for (j = 0; j < n_fit; j++) { /* cols */
if (fit->ive[j] < vp->n_models) {
model = fit->ive[j];
lm->X->me[i][j] = (is_variogram(vp) ?
UnitSemivariance(vp->part[model],x,y,z) :
UnitCovariance(vp->part[model],x,y,z));
} else {
model = fit->ive[j] - vp->n_models;
lm->X->me[i][j] = (is_variogram(vp) ?
da_Semivariance(vp->part[model],x,y,z) :
-da_Semivariance(vp->part[model],x,y,z));
}
}
}
if (iter == 0 && fill_weights(vp, p, lm)) {
iv_free(fit);
v_free(start);
return 1;
}
if (DEBUG_FIT) {
printf("data: ");
v_foutput(stdout, lm->y);
printf("weights: ");
v_foutput(stdout, lm->weights);
printf("X: ");
m_foutput(stdout, lm->X);
}
lm->has_intercept = 1; /* does not affect the fit */
lm = calc_lm(lm); /* solve WLS eqs. for beta */
if (DEBUG_FIT) {
printf("beta: ");
v_foutput(stdout, lm->beta);
}
if (lm->is_singular) {
iv_free(fit);
v_free(start);
return 1;
}
if (fit_ranges) {
s = v_norm2(lm->beta) / v_norm2(start);
if (s > 0.2) {
/* don't allow steps > 20% ---- */
sv_mlt(0.2 / s, lm->beta, lm->beta);
*bounded = 1;
} else
*bounded = 0; /* a `free', voluntary step */
} else /* we're basically doing linear regression here: */
*bounded = 0;
for (i = 0; i < n_fit; i++) {
if (fit->ive[i] < vp->n_models) {
model = fit->ive[i];
vp->part[model].sill = start->ve[i] + lm->beta->ve[i];
} else {
model = fit->ive[i] - vp->n_models;;
vp->part[model].range[0] = start->ve[i] + lm->beta->ve[i];
}
}
iv_free(fit);
v_free(start);
return 0;
}
static int fill_weights(const VARIOGRAM *vp, PERM *p, LM *lm) {
double x, y, z, s;
int i, retval = 0;
if (vp->ev->fit == OLS_FIT || (vp->ev->cloud && vp->ev->fit == WLS_FIT)) {
if (lm->weights != NULL)
v_free(lm->weights);
lm->weights = NULL;
return 0;
}
lm->weights = v_resize(lm->weights, p->size);
for (i = 0; i < p->size; i++) {
if (vp->ev->fit == WLS_NHH)
s = vp->ev->dist[p->pe[i]];
else {
x = vp->ev->direction.x * vp->ev->dist[p->pe[i]];
y = vp->ev->direction.y * vp->ev->dist[p->pe[i]];
z = vp->ev->direction.z * vp->ev->dist[p->pe[i]];
s = get_semivariance(vp, x, y, z);
}
if (vp->ev->cloud) {
lm->weights->ve[i] = 1.0;
if (fabs(s) > NEARLY_ZERO)
lm->weights->ve[i] /= s * s;
else {
pr_warning("infinite weight during fit");
retval = 1;
break;
}
} else { /* no cloud: */
lm->weights->ve[i] = vp->ev->nh[p->pe[i]];
if (vp->ev->fit != WLS_FIT) {
if (fabs(s) > NEARLY_ZERO)
lm->weights->ve[i] /= (s * s);
else {
pr_warning("infinite weight during fit");
retval = 1;
break;
}
}
} /* else cloud */
} /* for i */
return retval;
}
/*
* fit a variogram model to a sample variogram,
* using the gnuplot fit command of gnuplot versions 3.6 and above
*/
static void gnu_fit(VARIOGRAM *v) {
int i;
FILE *f = NULL;
char *fit_log = NULL, *cmd;
cmd = NULL; /* to avoid compiler warning */
if ((fit_log = getenv("FIT_LOG")) == NULL)
fit_log = FIT_LOG;
if (file_exists(fit_log))
eremove(fit_log);
#ifdef HAVE_POPEN
f = epopen(gl_gnuplot, "w");
#else
f = efopen(GNUFIT_NAME, "w");
#endif
if (is_variogram(v)) {
fprintf(f, "Nug(a,x) = (x == 0.0 ? 0.0 : 1.0) # irresp. a\n");
for (i = 2; v_models[i].name != NULL; i++)
fprintf(f, "%s\n", v_models[i].v_gnuplot);
} else {
fprintf(f, "Nug(a,x) = (x == 0.0 ? 1.0 : 0.0) # irresp. a\n");
for (i = 2; v_models[i].name != NULL; i++)
fprintf(f, "%s\n", v_models[i].c_gnuplot);
}
write_fx(f, v);
fprint_sample_vgm(f, v->ev);
fprintf(f, "e\n");
#ifdef HAVE_POPEN
epclose(f);
#else
fclose(f);
cmd = (char *) emalloc(strlen(gl_gnuplot) + strlen(GNUFIT_NAME) + 2);
sprintf(cmd, "%s %s", gl_gnuplot, GNUFIT_NAME);
esystem(cmd); /* let gnuplot do the fit */
efree(cmd);
#endif
get_values(fit_log, v);
update_variogram(v);
return;
}
static void write_fx(FILE *f, VARIOGRAM *v) {
/*
* write:
* c1 = ..; a1 = ..; ...
* f(x) = c1 * Nug(0) + c2 * Sph(a2) + ...
* fit f(x) '-' via c1, c2, ...
*/
int i, first = 1;
/*
* set parateters, c0=..; a0=..;
*/
for (i = 0; i < v->n_models; i++) {
if (v->part[i].fit_sill)
fprintf(f, "c%d = %g; ", i, v->part[i].sill);
if (v->part[i].fit_range)
fprintf(f, "a%d = %g; ", i, v->part[i].range[0]);
}
/*
* f(x) = ...
*/
fprintf(f, "\nf(x) = ");
fprint_gnuplot_model(f, v, 1);
/*
* fit f(x) 'file' using a:b:c # c are `uncertainties'
* after a bit trial and error, they seem to be error standard deviations.
*/
if (v->ev->fit == WLS_GNUFIT_MOD)
fprintf(f, "\nfit f(x) '-' using %s via",
v->ev->cloud ? "3:4:(sqrt(f($3)**2))" : "4:5:(sqrt((f($4)**2)/$3))");
else
fprintf(f, "\nfit f(x) '-' using %s via",
v->ev->cloud ? "3:4" : "4:5:(sqrt(1.0/$3))" );
/*
* via
*/
for (i = 0; i < v->n_models; i++) {
if (v->part[i].fit_sill) {
fprintf(f, "%sc%d", first ? " " : ", ", i);
first = 0;
}
if (v->part[i].fit_range) {
fprintf(f, "%sa%d", first ? " " : ", ", i);
first = 0;
}
}
fprintf(f, "\n");
return;
}
static void get_values(const char *fname, VARIOGRAM *v) {
FILE *f = NULL;
char *s = NULL, *cp;
int size = 0, nr = 0;
if (! file_exists(fname)) {
pr_warning("can't find %s -- get gnuplot 3.7 from www.gnuplot.info",
fname);
return;
}
f = efopen(fname, "r");
do {
if (get_line(&s, &size, f) == NULL) {
pr_warning("error while reading %s", fname);
efclose(f);
return;
}
if (strstr(s, "BREAK: Singular matrix")) {
pr_warning("error during variogram fit (singular model)");
efree(s);
efclose(f);
return;
}
} while (strstr(s, "Final set of parameters") == NULL);
get_line(&s, &size, f); /* line with the many ==== */
get_line(&s, &size, f); /* empty line */
while (get_line(&s, &size, f) != NULL) { /* values */
if (s[0] == 'c' || s[0] == 'a') {
cp = s;
cp++;
cp = strtok(cp, " =");
if (read_int(cp, &nr) || nr < 0 || nr >= v->n_models)
ErrMsg(ER_IMPOSVAL, fname);
cp = strtok(NULL, " =");
if (cp == NULL)
ErrMsg(ER_IMPOSVAL, fname);
if (s[0] == 'c')
read_double(cp, &(v->part[nr].sill));
else
read_double(cp, &(v->part[nr].range[0]));
} else
break; /* while-loop */
}
efclose(f);
efree(s);
correct_for_anisotropy(v);
return;
}
static void correct_for_anisotropy(VARIOGRAM *v) {
/*
* Ok, so now let's get to the hard part. What happened?
* in case of anisotropy, non-fitted values are correctly adjusted to
* their directional value in fprint_gnuplot_model().
* If however a value is fitted, the real fitted sill or range should
* be the corresponding range/sill in the main direction. So, we have
* to transfer those.
*/
int i;
for (i = 0; i < v->n_models; i++) {
if (v->part[i].fit_range && v->part[i].tm_range)
v->part[i].range[0] /= relative_norm(v->part[i].tm_range,
v->ev->direction.x, v->ev->direction.y, v->ev->direction.z);
}
return;
}