/************************************************************/ /* Markov Chain Monte Carlo algorithms for the Bayesian */ /* Compound Poisson Linear Model via density evaluation */ /* Author: Wayne Zhang */ /* actuary_zhang@hotmail.com */ /************************************************************/ /** * @file bcplm.c * @brief Functions for implementing the MCMC algorithm for * Compound Poisson Linear Models using direct tweedie * density evaluation * @author Wayne Zhang */ /** common R headers and macros that extract slots */ #include "common.h" /** tweedie density evaluation functions */ #include "tweedie.h" /** univariate/multivariate random walk Metropolis-Hastings algorithms */ #include "mh.h" /** utility and inline functions */ #include "utilities.h" /** declarations for R callable functions */ #include "bcplm.h" /** cholmod_common struct initialized */ extern cholmod_common c; /** target acceptance rate for tuning the univariate scale parameter */ #define MH_ACC_TAR 0.5 /** threshold in specifying the interval of allowed acceptance rates */ #define MH_ACC_TOL 0.1 /** the number of mark times required for a parameter to be deemed as fully tuned */ #define MH_ACC_MARK 3 /** the shape parameter of the inverse-Gamma prior */ #define IG_SHAPE 0.001 /** the scale parameter of the inverse-Gamma prior */ #define IG_SCALE 0.001 /** positions in the dims vector in Bayesian models */ enum dimB { nO_POS = 0, /**= A){ /* not accepted */ *l = l1; /* revert the likelihood (this is updated in post_betak) */ } else { beta[j] = xn; acc[j]++; } } /* update the mean using the new beta */ if (dm[nU_POS]) cpglmm_fitted(beta, 1, da); else cpglm_fitted(beta, da); } /** * Simulate phi and p using the univariate RW M-H update. * * @param da a bcplm_input object * */ static void sim_phi_p(SEXP da){ int *dm = DIMS_SLOT(da); int nB = dm[nB_POS]; double *p = P_SLOT(da), *phi = PHI_SLOT(da), *mh_sd = MHSD_SLOT(da), *acc = ACC_SLOT(da), xn = 0.0; /* update phi */ acc[nB] += metrop_tnorm_rw(*phi, mh_sd[nB], 0, BDPHI_SLOT(da)[0], &xn, post_phi, (void *) da); *phi = xn ; /* update p */ acc[nB + 1] += metrop_tnorm_rw(*p, mh_sd[nB + 1], BDP_SLOT(da)[0], BDP_SLOT(da)[1], &xn, post_p, (void *) da); *p = xn ; } /** * Simulate u using the naive Gibbs method. Block M-H update * generally does not work well and hence is not implemented. * * @param da a bcplm_input object * */ static void sim_u(SEXP da){ int *dm = DIMS_SLOT(da), *k = K_SLOT(da); int nB = dm[nB_POS], nU = dm[nU_POS]; double *u = U_SLOT(da), *l = CLLIK_SLOT(da), *mh_sd = MHSD_SLOT(da) + nB + 2, /* shift the proposal variance pointer */ *acc = ACC_SLOT(da) + nB + 2; /* shift the acc pointer */ double xo, xn, l1, l2, A; /* initialize llik_mu*/ *l = llik_mu(da); for (int j = 0; j < nU; j++){ *k = j ; xo = u[j]; xn = rnorm(xo, mh_sd[j]); l1 = *l; l2 = post_uk(xn, da); A = exp(l2 - (l1 + prior_uk(xo, da))); /* determine whether to accept the sample */ if (A < 1 && runif(0, 1) >= A){ *l = l1; /* revert llik_mu (this is updated in post_uk) */ } else{ u[j] = xn; acc[j]++; } } cpglmm_fitted(u, 0, da) ; /* update the mean using the new u */ } /** * Simulate the variance components. If there is only one random effect * per level, the posterior is the inverse Gamma. Otherwise, the posterior * is the inverse Wishart. * * @param da a bcplm_input object * */ static void sim_Sigma(SEXP da){ SEXP V = R_do_slot(da, install("Sigma")) ; int *dm = DIMS_SLOT(da), *Gp = Gp_SLOT(da), *nc = NCOL_SLOT(da), *nlev = NLEV_SLOT(da); int nT = dm[nT_POS], mc = imax(nc, nT); double *v, su, *u = U_SLOT(da), *scl = Alloca(mc * mc, double); R_CheckStack(); for (int i = 0; i < nT; i++){ v = REAL(VECTOR_ELT(V, i)); if (nc[i] == 1){ /* simulate from the inverse-Gamma */ su = sqr_length(u + Gp[i], nlev[i]); v[0] = 1/rgamma(0.5 * nlev[i] + IG_SHAPE, 1.0/(su * 0.5 + IG_SCALE)); } else { /* simulate from the inverse-Wishart */ mult_xtx(nlev[i], nc[i], u + Gp[i], scl); /* t(x) * (x) */ for (int j = 0; j < nc[i]; j++) scl[j * j] += 1.0; /* add prior (identity) scale matrix */ solve_po(nc[i], scl, v); rwishart(nc[i], (double) (nlev[i] + nc[i]), v, scl); solve_po(nc[i], scl, v); } } } /************************************************/ /* Additional utility functions for implementing*/ /* the MCMC sampler in bcplm */ /************************************************/ /** * Set parameters to the k_th initial values provided in the inits slot * * @param da a bcplm_input object * @param k indicates the k_th set of initial values * * \note the order of the parameters in inits is: beta, phi, p, u and Sigma */ static void get_init(SEXP da, int k){ SEXP inits = R_do_slot(da, install("inits")); /* inits is a list */ int *dm = DIMS_SLOT(da); int nB = dm[nB_POS], nU = dm[nU_POS], nT = dm[nT_POS]; double *init = REAL(VECTOR_ELT(inits, k)); /* set beta, phi, p*/ Memcpy(FIXEF_SLOT(da), init, nB) ; *PHI_SLOT(da) = init[nB] ; *P_SLOT(da) = init[nB + 1] ; /* set U and Sigma */ if (nU) { SEXP V = R_do_slot(da, install("Sigma")); int pos = 0, st = nB + 2, *nc = NCOL_SLOT(da) ; double *v ; Memcpy(U_SLOT(da), init + st, nU); /* set Sigma */ for (int i = 0; i < nT; i++){ v = REAL(VECTOR_ELT(V, i)) ; Memcpy(v, init + st + nU + pos, nc[i] * nc[i]) ; pos += nc[i] * nc[i] ; } } } /** * Save parameters to the ns_th row of the simulation results * * @param da a bcplm_input object * @param ns indicates the ns_th row * @param nS number of rows in sims * @param sims a long vector to store simulations results * */ static void set_sims(SEXP da, int ns, int nS, double *sims){ int *dm = DIMS_SLOT(da) ; int pos = 0, nB = dm[nB_POS], nU = dm[nU_POS], nT = dm[nT_POS]; double *beta = FIXEF_SLOT(da), *u = U_SLOT(da); for (int j = 0; j < nB; j++) sims[j * nS + ns] = beta[j] ; sims[nB * nS + ns] = *PHI_SLOT(da) ; sims[(nB + 1) * nS + ns] = *P_SLOT(da) ; /* set U and Sigma */ if (nU) { SEXP V = R_do_slot(da, install("Sigma")); int *nc = NCOL_SLOT(da), st = nB + 2; double *v; for (int j = 0; j < nU; j++) sims[(j + st) * nS + ns] = u[j] ; for (int i = 0; i < nT; i++){ v = REAL(VECTOR_ELT(V, i)); for (int j = 0; j < nc[i] * nc[i]; j++) sims[(st + nU + pos + j) * nS + ns] = v[j] ; pos += nc[i] * nc[i] ; } } } /** * update the proposal standard deviations * * @param p the number of parameters to be tuned * @param acc pointer to the acceptance rate in the M-H update * @param mh_sd pointer to the vector of proposal standard deviations * @param mark pointer to the vector of marks * */ static R_INLINE void tune_var(int p, double *acc, double *mh_sd, int *mark) { double acc_bd; for (int j = 0; j < p; j++){ acc_bd = fmin2(fmax2(acc[j], 0.01), 0.99); /* bound the empirical acceptance */ if (acc[j] < (MH_ACC_TAR - MH_ACC_TOL)) mh_sd[j] /= 2 - acc_bd/MH_ACC_TAR; else if (acc[j] > (MH_ACC_TAR - MH_ACC_TOL)) mh_sd[j] *= 2 - (1 - acc_bd)/(1 - MH_ACC_TAR); else mark[j]++; } } /************************************************/ /* MCMC simulations for cplm */ /************************************************/ /** * main workhorse for the MCMC simulation * * @param da a bcplm_input object * @param nit number of iterations * @param nbn number of burn-in * @param nth thinning rate * @param nS the number of rows of sims * @param nR report interval * @param sims a 2d array to store simulation results * * \note It's assumed that eta and mu are already updated. Also, * nS is passed as an argument rather than determined from da. * This is because the tuning process uses a different number of * simulations than the n.keep element in the dims slot. * */ static void do_mcmc(SEXP da, int nit, int nbn, int nth, int nS, int nR, double *sims){ int *dm = DIMS_SLOT(da); int nU = dm[nU_POS], nmh = dm[nmh_POS], ns = 0, do_print = 0; /* initialize acc */ double *acc = ACC_SLOT(da); AZERO(acc, nmh); /* run MCMC simulatons */ GetRNGstate(); for (int iter = 0; iter < nit; iter++){ do_print = (nR > 0 && (iter + 1) % nR == 0); if (do_print) Rprintf(_("Iteration: %d \n "), iter + 1); /* update parameters */ sim_beta(da); sim_phi_p(da); if (nU){ sim_u(da); sim_Sigma(da); } /* store results */ if (iter >= nbn && (iter + 1 - nbn) % nth == 0 ){ ns = (iter + 1 - nbn) / nth - 1; set_sims(da, ns, nS, sims); } /* print out acceptance rate if necessary */ if (do_print) print_acc(iter + 1, nmh, acc, 0); R_CheckUserInterrupt(); } PutRNGstate(); /* compute acceptance percentage */ for (int i = 0; i < nmh; i++) acc[i] /= nit ; } /** * tune the proposal variances * * @param da an input list object * */ static void tune_mcmc(SEXP da){ int *dm = DIMS_SLOT(da); int nmh = dm[nmh_POS], etn = ceil(dm[tnit_POS] * 1.0 / dm[ntn_POS]) ; // # iters per tuning loop; double *mh_sd = MHSD_SLOT(da), *acc = ACC_SLOT(da), *sims = Calloc(etn * dm[nA_POS], double); int nmark = 0, *mark = Calloc(nmh, int); AZERO(mark, nmh); /* run MCMC and tune parameters */ if (dm[rpt_POS]) Rprintf(_("Tuning phase...\n")); for (int i = 0; i < dm[ntn_POS]; i++) { do_mcmc(da, etn, 0, 1, etn, 0, sims); /* run mcmc */ tune_var(nmh, acc, mh_sd, mark); /* adjust proposal sd's */ /* determine whether the parameters are fully tuned */ nmark = 0; for (int j = 0; j < nmh; j++) if (mark[j] >= 3) nmark++; if (nmark == nmh) break; } if (dm[rpt_POS]){ print_acc(1, nmh, acc, 1); Rprintf("-----------------------------------------\n"); } Free(sims); Free(mark); } /** * implement MCMC for compound Poisson linear models * * @param da an bcplm_input object * * @return the simulated values * */ SEXP bcplm_mcmc (SEXP da){ /* get dimensions */ int *dm = DIMS_SLOT(da) ; int nR = dm[rpt_POS]; SEXP ans, ans_tmp; /* tune the scale parameter for M-H update */ if (dm[tnit_POS]) { update_mu(da); /* update eta mu*/ tune_mcmc(da); } /* run Markov chains */ PROTECT(ans = allocVector(VECSXP, dm[chn_POS])) ; for (int k = 0; k < dm[chn_POS]; k++){ if (nR) Rprintf(_("Start Markov chain %d\n"), k + 1); get_init(da, k) ; /* initialize the chain */ update_mu(da) ; /* update eta and mu */ /* run MCMC and store result */ PROTECT(ans_tmp = allocMatrix(REALSXP, dm[kp_POS], dm[nA_POS])); do_mcmc(da, dm[itr_POS], dm[bun_POS], dm[thn_POS], dm[kp_POS], nR, REAL(ans_tmp)); SET_VECTOR_ELT(ans, k, ans_tmp); UNPROTECT(1); if (nR) Rprintf("-----------------------------------------\n"); } if (nR) Rprintf(_("Markov Chain Monte Carlo ends!\n")); UNPROTECT(1); return ans; } /************************************************/ /* Export R Callable functions */ /************************************************/ /** * R callable function to update the mean in a bcplm object * * @param da an bcplm_input object * */ SEXP bcplm_update_mu(SEXP da){ update_mu(da); return R_NilValue; } /** * R callable function to compute the conditional posterior of p * * @param x value at which to evaluate the density * @param da an bcplm_input object * * @return the log of the conditional posterior of p */ SEXP bcplm_post_p(SEXP x, SEXP da){ return ScalarReal(post_p(REAL(x)[0], (void *) da)); } /** * R callable function to compute the conditional posterior of phi * * @param x value at which to evaluate the density * @param da an bcplm_input object * * @return the log of the conditional posterior of phi */ SEXP bcplm_post_phi(SEXP x, SEXP da){ return ScalarReal(post_phi(REAL(x)[0], (void *) da)); } /** * R callable function to compute the conditional posterior of betak * * @param x value at which to evaluate the density * @param da an bcplm_input object * * @return the log of the conditional posterior of betak */ SEXP bcplm_post_betak(SEXP x, SEXP da){ return ScalarReal(post_betak(REAL(x)[0], (void *) da)); } /** * R callable function to compute the conditional posterior of uk * * @param x value at which to evaluate the density * @param da an bcplm_input object * * @return the log of the conditional posterior of uk */ SEXP bcplm_post_uk(SEXP x, SEXP da){ return ScalarReal(post_uk(REAL(x)[0], (void *) da)); }