/******************************************************************************** * * Bayesian Regression and Adaptive Sampling with Gaussian Process Trees * Copyright (C) 2005, University of California * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * Questions? Contact Robert B. Gramacy (rbgramacy@ams.ucsc.edu) * ********************************************************************************/ #include #include #include #include #include "rand_draws.h" #include "rand_pdf.h" #include "matrix.h" #include "predict.h" #include "linalg.h" #include "rhelp.h" #include "lh.h" #include /* #define DEBUG */ /* * predictive_mean: * * compute the predictive mean of a single observation * used by predict_data and predict * * FFrow[col], KKrow[n1], KiZmFb[n1], b[col] */ double predictive_mean(n1, col, FFrow, KKrow, b, KiZmFb) unsigned int n1, col; double *FFrow, *KKrow, *KiZmFb, *b; { double zzm; /* Note that KKrow has been passed without any jitter. */ /* f(x)' * beta */ zzm = linalg_ddot(col, FFrow, 1, b, 1); /* E[Z(x)] = f(x)' * beta + k'*Ki*(Zdat - F*beta) */ zzm += linalg_ddot(n1, KKrow, 1, KiZmFb, 1); #ifdef DEBUG /* check to make sure the prediction is not too big; an old error */ if(abs(zzm) > 10e10) warning("(predict) abs(zz)=%g > 10e10", zzm); #endif return zzm; } /* * predict_data: * * used by the predict_full funtion below to fill * zmean and zs [n1] with predicted mean and var values * at the data locations, X * * b[col], KiZmFb[n1], z[n1], FFrow[n1][col], K[n1][n1]; */ void predict_data(zpm,zps2,n1,col,FFrow,K,b,ss2,zpjitter,KiZmFb) unsigned int n1, col; double *b, *KiZmFb, *zpm, *zps2, *zpjitter; double **FFrow, **K; double ss2; { int i; /* Note that now K is passed with jitter included. This was previously removed in the predict_full fn. */ /* printf("zp: "); printVector(zpjitter,5,stdout, HUMAN); */ /* for each point at which we want a prediction */ for(i=0; i 0);*/ Qy = new_vector(n1); for(i=0; i= 0); } /* clean up */ free(Qy); } /* * predictive_var: * * computes the predictive variance for a single location * used by predict. Also returns Q, rhs, Wf, and s2corr * which are useful for computing Delta-sigma * * Q[n1], rhs[n1], Wf[col], KKrow[n1], FFrow[n1], FW[col][n1], * KpFWFi[n1][n1], W[col][col]; */ double predictive_var(n1, col, Q, rhs, Wf, s2cor, ss2, k, f, FW, W, tau2, KpFWFi, corr_diag) unsigned int n1, col; double *Q, *rhs, *Wf, *k, *f, *s2cor; double **FW, **KpFWFi, **W; double ss2, corr_diag, tau2; { double s2, kappa, fWf, last; /* Var[Z(x)] = s2*[KKii + jitter + fWf - Q (K + FWF)^{-1} Q] */ /* where Q = k + FWf */ /* Q = k + tau2*FW*f(x); */ dupv(Q, k, n1); linalg_dgemv(CblasNoTrans,n1,col,tau2,FW,n1,f,1,1.0,Q,1); /* rhs = KpFWFi * Q */ linalg_dgemv(CblasNoTrans,n1,n1,1.0,KpFWFi,n1,Q,1,0.0,rhs,1); /* Q (K + tau2*FWF)^{-1} Q */ /* last = Q*rhs = Q*KpFWFi*Q */ last = linalg_ddot(n1, Q, 1, rhs, 1); /* W*f(x) */ linalg_dsymv(col,1.0,W,col,f,1,0.0,Wf,1); /* f(x)*Wf */ fWf = linalg_ddot(col, f, 1, Wf, 1); /* finish off the variance */ /* Var[Z(x)] = s2*[KKii + jitter + fWf - Q (K + FWF)^{-1} Q] */ /* Var[Z(x)] = s2*[kappa - Q C^{-1} Q] */ /* of course corr_diag = 1.0 + nug, for non-mr_tgp & non calibration */ kappa = corr_diag + tau2*fWf; *s2cor = kappa - last; s2 = ss2*(*s2cor); /* this is to catch bad s2 calculations; note that jitter = nug for non-mr_tgp */ if(s2 <= 0) { s2 = 0; *s2cor = corr_diag-1.0; } return s2; } /* * predict_delta: * * used by the predict_full funtion below to fill * zmean and zs [n2] with predicted mean and var * values based on the input coded in terms of * FF,FW,W,xxKx,KpFWF,KpFWFi,b,ss2,nug,KiZmFb * * Also calls delta_sigma2 at each predictive location, * becuase it uses many of the same computed quantaties * as needed to compute the predictive variance. * * b[col], KiZmFb[n1], z[n2] FFrow[n2][col], KKrow[n2][n1], * xxKxx[n2][n2], KpFWFi[n1][n1], FW[col][n1], W[col][col], * Ds2xy[n2][n2]; */ void predict_delta(zzm,zzs2,Ds2xy,n1,n2,col,FFrow,FW,W,tau2,KKrow,xxKxx,KpFWFi,b, ss2, zzjitter,KiZmFb) unsigned int n1, n2, col; double *b, *KiZmFb, *zzm, *zzs2, *zzjitter; double **FFrow, **KKrow, **xxKxx, **KpFWFi, **FW, **W, **Ds2xy; double ss2, tau2; { int i; double s2cor; /*double Q[n1], rhs[n1], Wf[col];*/ double *Q, *rhs, *Wf; /* zero stuff out before starting the for-loop */ rhs = new_zero_vector(n1); Wf = new_zero_vector(col); Q = new_vector(n1); /* for each point at which we want a prediction */ for(i=0; i 0) improv[i] = diff; */ if(improv[i] < 0) improv[i] = 0.0; } } /* * predicted_improv: * * compute the improvement statistic for * posterior predictive data z * * This more raw statistic allows * a full summary of the Improvement I(X) distribution, * rather than the expected improvement provided by * expected_improv. * * Samples z(X) are (strongly) preferred over the data * Z(X), and likewise for zz(XX) rather than zz-hat(XX) * * Note that there is no predictive-variance argument. */ void predicted_improv(n, nn, improv, Zmin, zp, zz) unsigned int n, nn; double *improv, *zp, *zz; double Zmin; { unsigned int which, i; double fmin, diff; /* shouldn't be called if improv is NULL */ assert(improv); /* calculate best minimum so far */ fmin = min(zp, n, &which); if(Zmin < fmin) fmin = Zmin; for(i=0; i 0) improv[i] = diff; else improv[i] = 0.0; } } /* * GetImprovRank: * * implements Matt Taddy's algorithm for determining the order * in which the nn points -- whose improv samples are recorded * in the cols of Imat_in over R rounds -- should be added into * the design in order to get the largest expected improvement. * w are R importance tempering (IT) weights */ unsigned int* GetImprovRank(int R, int nn, double **Imat_in, int g, int numirank, double *w) { /* duplicate Imat, since it will be modified by this method */ unsigned int j, i, k, /* m,*/ maxj; double *colmean, *maxcol; double **Imat; double maxmean; unsigned int *pntind; /* allocate the ranking vector */ pntind = new_zero_uivector(nn); assert(numirank >= 0 && numirank <= nn); if(numirank == 0) return pntind; /* duplicate the Improv matrix so we can modify it */ Imat = new_dup_matrix(Imat_in, R, nn); /* first, raise improv to the appropriate power */ for (j=0; j 0.0) Imat[i][j] = 1.0; else for(k=1; k myfmax(fabs(XX[i]-Xo[l]), fabs(XX[i]-Xo[u]))) search = 0; else{ l++; u++; } } /*printf("l=%d, u=%d, Xo[l]=%g, Xo[u]=%g, XX[i]=%g \n", l, u, Xo[l],Xo[u],XX[i]);*/ /* width of the window in X-space */ range = myfmax(fabs(XX[i]-Xo[l]), fabs(XX[i]-Xo[u])); /* calculate the weights in the window; * every weight outside the window will be zero */ zerov(w,n); for(j=l; j<=u; j++){ dist = fabs(XX[i]-Xo[j])/range; w[j] = (1.0-dist)*(1.0-dist); } /* record the (normalized) weighted average in the window */ sumW = sumv(&(w[l]), q); YY[i] = vmult(&(w[l]), &(Yo[l]), q)/sumW; /*printf("YY = "); printVector(YY, nn, stdout, HUMAN);*/ } /* clean up */ free(w); free(o); free(Xo); free(Yo); } /* * sobol_indices: * * calculate the Sobol S and T indices using samples of the * posterior predictive distribution (ZZm and ZZvar) at * nn*(d+2) locations */ void sobol_indices(double *ZZ, unsigned int nn, unsigned int m, double *S, double *T) { /* pointers to responses for the original two LHSs */ unsigned int j, k; double dnn, sqEZ, lVZ, ponent, U, Uminus; double *fN; double *fM1 = ZZ; double *fM2 = ZZ + nn; /* accumilate means and variances */ double EZ, EZ2, Evar; Evar = EZ = EZ2 = 0.0; for(j=0; j