#include "Marginals.h" /* // calculate marginals from the combos, store in results void marginalGenotypeLikelihoods(list& genotypeCombos, Results& results) { map > > rawMarginals; // push the marginal likelihoods into the rawMarginals vectors in the results for (list::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) { for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) { const SampleDataLikelihood& sdl = **i; rawMarginals[sdl.name][sdl.genotype].push_back(gc->posteriorProb); } } // safely add the raw marginal vectors using logsumexp for (Results::iterator r = results.begin(); r != results.end(); ++r) { ResultData& sample = r->second; map >& rawmgs = rawMarginals[r->first]; vector probs; for (map >::iterator m = rawmgs.begin(); m != rawmgs.end(); ++m) { probs.push_back(logsumexp_probs(m->second)); } long double normalizer = logsumexp_probs(probs); vector::iterator p = probs.begin(); for (map >::iterator m = rawmgs.begin(); m != rawmgs.end(); ++m, ++p) { sample.marginals[m->first] = *p - normalizer; } } } */ // recompute data likelihoods using marginals from the combos // assumes that the genotype combos are in the same order as the likelihoods // assumes that the genotype combos are the same size as the number of samples in the likelihoods // returns the delta from the previous marginals, informative in the case of EM long double marginalGenotypeLikelihoods(list& genotypeCombos, SampleDataLikelihoods& likelihoods) { long double delta = 0; vector< map > rawMarginals; rawMarginals.resize(likelihoods.size()); vector< map >::iterator rawMarginalsItr; // push the marginal likelihoods into the rawMarginals maps for (list::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) { rawMarginalsItr = rawMarginals.begin(); for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) { const SampleDataLikelihood& sdl = **i; map& rmgs = *rawMarginalsItr++; map::iterator rmgsItr = rmgs.find(sdl.genotype); if (rmgsItr == rmgs.end()) { rmgs[sdl.genotype] = gc->posteriorProb; } else { //vector x; //x.push_back(rmgsItr->second); x.push_back(gc->posteriorProb); //rmgs[sdl.genotype] = logsumexp_probs(x); rmgs[sdl.genotype] = log(safe_exp(rmgsItr->second) + safe_exp(gc->posteriorProb)); } } } // safely add the raw marginal vectors using logsumexp // and use to update the sample data likelihoods rawMarginalsItr = rawMarginals.begin(); long double minAllowedMarginal = -1e-16; for (SampleDataLikelihoods::iterator s = likelihoods.begin(); s != likelihoods.end(); ++s) { vector& sdls = *s; const map& rawmgs = *rawMarginalsItr++; map marginals; vector rawprobs; for (map::const_iterator m = rawmgs.begin(); m != rawmgs.end(); ++m) { long double p = m->second; marginals[m->first] = p; rawprobs.push_back(p); } long double normalizer = logsumexp_probs(rawprobs); for (vector::iterator sdl = sdls.begin(); sdl != sdls.end(); ++sdl) { long double newmarginal = marginals[sdl->genotype] - normalizer; delta += newmarginal - sdl->marginal; // ensure the marginal is non-0 to guard against underflow sdl->marginal = min(minAllowedMarginal, newmarginal); } } return delta; } /* void bestMarginalGenotypeCombo(GenotypeCombo& combo, Results& results, SampleDataLikelihoods& samples, long double theta, bool pooled, bool permute, bool hwePriors, bool binomialObsPriors, bool alleleBalancePriors, long double diffusionPriorScalar) { for (SampleDataLikelihoods::iterator s = samples.begin(); s != samples.end(); ++s) { vector& sdls = *s; const string& name = sdls.front().name; const map& marginals = results[name].marginals;; map::const_iterator m = marginals.begin(); long double bestMarginalProb = m->second; Genotype* bestMarginalGenotype = m->first; ++m; for (; m != marginals.end(); ++m) { if (abs(m->second) < abs(bestMarginalProb)) { bestMarginalProb = m->second; bestMarginalGenotype = m->first; } } for (vector::iterator d = sdls.begin(); d != sdls.end(); ++d) { if (bestMarginalGenotype == d->genotype) { combo.push_back(&*d); break; } } } combo.init(binomialObsPriors); combo.calculatePosteriorProbability(theta, pooled, permute, hwePriors, binomialObsPriors, alleleBalancePriors, diffusionPriorScalar); } */ long double balancedMarginalGenotypeLikelihoods(list& genotypeCombos, SampleDataLikelihoods& likelihoods) { long double delta = 0; //map > > rawMarginals; vector< map > > rawMarginals; rawMarginals.resize(likelihoods.size()); vector< map > >::iterator rawMarginalsItr; // push the marginal likelihoods into the rawMarginals maps for (list::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) { if (gc->isHomozygous()) { rawMarginalsItr = rawMarginals.begin(); for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) { const SampleDataLikelihood& sdl = **i; map >& rmgs = *rawMarginalsItr++; rmgs[sdl.genotype].push_back(gc->posteriorProb); } } else { bool isComboKing = true; rawMarginalsItr = rawMarginals.begin(); for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) { const SampleDataLikelihood& sdl = **i; if (sdl.rank != 0) { isComboKing = false; map >& rmgs = *rawMarginalsItr; rmgs[sdl.genotype].push_back(gc->posteriorProb); } ++rawMarginalsItr; } if (isComboKing) { rawMarginalsItr = rawMarginals.begin(); for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) { const SampleDataLikelihood& sdl = **i; map >& rmgs = *rawMarginalsItr++; rmgs[sdl.genotype].push_back(gc->posteriorProb); } } } } // safely add the raw marginal vectors using logsumexp rawMarginalsItr = rawMarginals.begin(); for (SampleDataLikelihoods::iterator s = likelihoods.begin(); s != likelihoods.end(); ++s) { vector& sdls = *s; const map >& rawmgs = *rawMarginalsItr++; map marginals; vector rawprobs; for (map >::const_iterator m = rawmgs.begin(); m != rawmgs.end(); ++m) { long double p = logsumexp_probs(m->second); marginals[m->first] = p; rawprobs.push_back(p); } long double normalizer = logsumexp_probs(rawprobs); for (vector::iterator sdl = sdls.begin(); sdl != sdls.end(); ++sdl) { long double newmarginal = marginals[sdl->genotype] - normalizer; delta += newmarginal - sdl->marginal; sdl->marginal = newmarginal; } } return delta; }