Revision a7a30ac2a6f34e1d7710938b4547e6b5b702d881 authored by Wayne Zhang on 16 August 2012, 00:00:00 UTC, committed by Gabor Csardi on 16 August 2012, 00:00:00 UTC
1 parent 0aa535d
tweedie.c
``````/************************************************************/
/*               Tweedie density evaluation                 */
/*              Author:  Wayne Zhang                        */
/*            actuary_zhang@hotmail.com                     */
/************************************************************/

/**
* @file tweedie.c
* @brief Approximating the density of the Tweedie compound Poisson
* distribution using the series evaluation method.
* @author Wayne Zhang
*/

#include <R.h>
#include <Rinternals.h>          /* for R structures */
#include <Rmath.h>               /* for R functions */
#include "utilities.h"           /* for dcumsum, dmax and dmin */
#include "tweedie.h"             /* prototypes */

/** the threshold used in finding the bounds of the series */
#define TWEEDIE_DROP 37.0
/** the loop increment used in finding the bounds of the series */
#define TWEEDIE_INCRE 5
/** the max number of terms allowed in the finite sum approximation*/
#define TWEEDIE_NTERM 20000

/**
* Compute the log density for the tweedie compound Poisson distribution.
* This is based on the dtweedie.series function in R, but bounds
* are not determined using all observations because this could result in
* dramatically slower computation in certain circumstances.
*
* @param n the number of observations
* @param y  the vector of observations
* @param mu the vector of means
* @param phi scalar: the dispersion parameter
* @param p scalar: the index parameter
* @param wts the optional vector of weights (set to NULL if no weight is supplied)
* @param ans the vector that stores the computed log density
*
*/

void dtweedie(int n, double *y, double *mu, double phi, double p,
double *wts, double *ans){

int np = 0, pos = 0 ;
double p1 = p - 1.0, p2 = 2.0 - p;
double a = - p2 / p1, a1 = 1.0 / p1;
double cc, j, w, phiw, sum_ww = 0.0, ww_max ;

/* # positive values */
for (int i = 0; i < n; i++)
if (y[i]) np++ ;
/* all zeros in the data (probably nonsense) */
if (np == 0) {
for (int i = 0; i < n; i++){
phiw = wts ? (phi / wts[i]) : phi ;
ans[i] = -pow(mu[i], p2) / (phiw * p2) ;
}
return ;
}
/* only need the lower bound and the # terms to be stored */
int jh, *jl = Calloc(np, int), *jd = Calloc(np, int);
double *jmax = Calloc(np, double), *logz = Calloc(np, double);

/* compute jmax for each y > 0*/
cc = a * log(p1) - log(p2);
for (int i = 0; i < n; i++){
if (y[i]) {
phiw = wts ? (phi / wts[i]) : phi ;
jmax[pos] = fmax2(1.0, pow(y[i], p2) / (phiw * p2));
logz[pos] = - a * log(y[i]) - a1 * log(phiw) + cc;
pos++ ;
}
}

/* find bounds in the summation */
for (int i = 0; i < np; i++){
/* locate upper bound */
cc = logz[i] + a1 + a * log(-a);
j = jmax[i] ;
w = a1 * j ;
while (1) {
j += TWEEDIE_INCRE ;
if (j * (cc - a1 * log(j)) < (w - TWEEDIE_DROP))
break ;
}
jh = ceil(j);
/* locate lower bound */
j = jmax[i];
while (1) {
j -= TWEEDIE_INCRE ;
if (j < 1 || j * (cc - a1 * log(j)) < w - TWEEDIE_DROP)
break ;
}
jl[i] = imax2(1, floor(j)) ;
jd[i] = jh - jl[i] + 1;
}

/* set limit for # terms in the sum */
int nterms = imin2(imax(jd, np), TWEEDIE_NTERM), iterm ;
double *ww = Calloc(nterms, double) ;
/* evaluate series using the finite sum*/
pos = 0;
for (int i = 0; i < n; i++){
phiw = wts ? (phi / wts[i]) : phi ;
/* y == 0 */
ans[i] = -pow(mu[i], p2) / (phiw * p2) ;
/* y > 0 */
if (y[i]) {
sum_ww = 0.0 ;
iterm = imin2(jd[pos], nterms) ;    /* avoid stepping out of bounds */
for (int k = 0; k < iterm; k++){
j = k + jl[pos] ;
ww[k] = j * logz[pos] - lgamma(1 + j) - lgamma(-a * j);
}
ww_max = dmax(ww, iterm) ;
for (int k = 0; k < iterm; k++)
sum_ww += exp(ww[k] - ww_max);
ans[i] += -y[i] / (phiw * p1 * pow(mu[i], p1)) - log(y[i]) +
log(sum_ww) + ww_max  ;
pos++;
}
}
Free(jmax);
Free(logz);
Free(jl);
Free(jd);
Free(ww);
}

/**
* compute -2 logliklihood of a vector of observations assuming the
* tweedie compound Poisson density
*
* @param n the number of observations
* @param y  the vector of observations
* @param mu the vector of means
* @param phi scalar: the dispersion parameter
* @param p scalar: the index parameter
* @param wts the optional vector of weights (set to NULL if no weight is supplied)
*
* @return -2 logliklihood
*/

double dl2tweedie(int n, double *y, double *mu, double phi, double p,
double *wts){
double *ansv = Calloc(n, double);
dtweedie(n, y, mu, phi, p, wts, ansv);
double ans = -2 * dcumsum(ansv, n);
Free(ansv);
return ans ;
}

/**
* compute vector of logliklihood of tweedie
*
* @param y vector of response
* @param mu vector of means
* @param phi scale parameter
* @param p index parameter
* @param wts prior weights
*
* @return vector of loglikelihood
*/
SEXP cplm_dltweedie(SEXP y, SEXP mu, SEXP phi, SEXP p, SEXP wts){
int n = LENGTH(y);
SEXP ans ;
PROTECT(ans = allocVector(REALSXP, n));
dtweedie(n, REAL(y), REAL(mu), REAL(phi), REAL(p),
REAL(wts), REAL(ans));
UNPROTECT(1) ;
return ans ;
}
`````` Computing file changes ...