Content - 701616a96c8a670f69ba9fe932492b93265343e5 - 9b1cfe2/em.c

visit type:
Tip revision: a990a88ef14b38f257f1a0d368ba8be1bd3d7e4b authored by clwgg on 05 April 2018, 11:52:37 UTC
Updated Manuscript information
Tip revision: a990a88
em.c
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <float.h>
#include <string.h>
#include <math.h>

#include "d_model.h"

static double comp_ll (em_data *data)
{
  double logsum = 0.0;
  int n,i,j;

  all_f *arr = data->arr;

  for (n = 0; n < arr->poscount; ++n) {
    pos_f p = arr->a[n];

    for (i = 0; i < p.num; ++i) {

      double freq = p.pos[i];

      double possum = 0.0;
      for( j = 0; j < 3; ++j ) {
        possum = possum + ( data->a[j] * prob_norm( data->m[j], data->w[j], freq ));
      }
      possum = possum + data->a[3] * prob_unif( arr->minfrac, freq );
      if (possum == 0.0) possum = DBL_MIN;
      logsum = logsum + log( possum );
    }
  }

  return logsum;
}

static void iter_EM (em_data *data, double *x)
{

  double tmp;

  all_f *arr = data->arr;
  int i, j;
    /* i: number of positions, j: number of normals in mixture */

  double g[3];
    /* stores each row g[j] in M[i][j] matrix while iterating i */
  memset(g, 0, sizeof(double) * 3 );

  double cs[3];
    /* calculates column sums cs[j] of M[i][j] */
  memset(cs, 0, sizeof(double) * 3 );
  double csx[3];
    /* calculates column sum of g[j] * x[i] */
  memset(csx, 0, sizeof(double) * 3 );
  double csd[3];
    /* calculates column sum of g[j] * (x[i] - m[j])^2 */
  memset(csd, 0, sizeof(double) * 3 );

  double sum = 0;
    /* calculates sum of all values in M[i][j] */

  for (i = 0; i < arr->basecount; ++i) {
    /*
     This loop is the E-step of EM and computes the sums of the
     posterior gamma(j) that will be neccessary for the
     calculations done in M.
     */
    double rs = 0;
      /* row sums of g[j] for normalization */

    for( j = 0; j < 3; ++j ) {
      g[j] = data->a[j] * ((tmp = prob_norm( data->m[j], data->w[j], x[i] )) == 0 ? DBL_MIN : tmp);
      rs = rs + g[j];
    }

    double ug;
      /* ug stores the positionwise posterior of the uniform mixture component*/
    ug = data->a[3] * prob_unif( arr->minfrac, x[i] );
      /* ug needs to be included in the row sum, it can be thought of as a fourth column */
    rs = rs + ug;

    for( j = 0; j < 3; ++j ) {
      g[j] = g[j] / rs;
      cs[j] = cs[j] + g[j];
      csx[j] = csx[j] + (g[j] * x[i]);
      csd[j] = csd[j] + (g[j] * pow(x[i] - data->m[j],2));
      sum = sum + g[j];
    }

    /* ug of course also is included in the total sum */
    sum = sum + (ug / rs);
  }

  for( j = 0; j < 3; ++j ) {
    /*
     This loop is the M-step of EM which gets new estimates 
     for a, m and w based on the different sums calculated in E.
     */

    double a = cs[j] / sum;
    double m = (1.0 / cs[j]) * csx[j];
    double w = sqrt( (1.0 / cs[j]) * csd[j] );

    if ((data->f >> 0) & 1 && !isnan(a))
      data->a[j] = a;

    if ((data->f >> 1) & 1 && !isnan(m))
      data->m[j] = m;

    if ((data->f >> 2) & 1 && !isnan(w))
      data->w[j] = w;
  }

  /* ua is the proportion of the uniform mixture that is included in the
   * em_data struct at the fourth position of the "a" array a[3]. */
  double ua = 1 - (data->a[0] + data->a[1] + data->a[2]);
  if ((data->f >> 3) & 1 && !isnan(ua))
    data->a[3] = ua;

}

void run_EM (em_data *data)
{

  double ll = comp_ll(data);
  double delta_ll = 1;
  double last = ll;

  all_f *arr = data->arr;
  double *x = malloc( sizeof(double) * arr->basecount );
  if (flatten_arr( x, arr ) == 0) {
    fprintf(stderr, "problem allocating Data in EM\n");
    return;
  }

  int c = 0;
  while ( delta_ll > 0.01 ) {

    iter_EM(data, x);

    if (c % 10 == 0) {
      last = ll;
      ll = comp_ll(data);
      delta_ll = ll - last;
    }

//    if (c % 100 == 0) fprintf(stderr, "EM iteration %d, delta loglik: %f\n", c, delta_ll);
    ++c;
  }

//  fprintf(stderr, "EM iteration %d, delta loglik: %f\n", c, delta_ll);

  free(x);
  data->ll = ll;
}

void setup_EMstr (em_data *data, 
                  const double a[4], 
                  const double m[3], 
                  const double w[3], 
                  const int f)
{
  memcpy(data->a, a, sizeof a[0] * 4);
  memcpy(data->m, m, sizeof m[0] * 3);
  memcpy(data->w, w, sizeof w[0] * 3);
  data->f = f;
}
Browse the archive

https://github.com/clwgg/nQuire