import numpy as np
from scipy.stats import norm as norm
from copy import deepcopy
class GridDistribution:
def __init__(self, x, y):
self.x = x
self.y = y
def pdf(self, data):
# Find the closest bins
rhs = np.searchsorted(self.x, data)
lhs = (rhs - 1).clip(0)
rhs = rhs.clip(0, len(self.x) - 1)
# Linear approximation (trapezoid rule)
rhs_dist = np.abs(self.x[rhs] - data)
lhs_dist = np.abs(self.x[lhs] - data)
denom = rhs_dist + lhs_dist
denom[denom == 0] = 1. # handle the zero-distance edge-case
rhs_weight = 1.0 - rhs_dist / denom
lhs_weight = 1.0 - rhs_weight
return lhs_weight * self.y[lhs] + rhs_weight * self.y[rhs]
def trapezoid(x, y):
return np.sum((x[1:] - x[0:-1]) * (y[1:] + y[0:-1]) / 2.)
def generate_sweeps(num_sweeps, num_samples):
results = []
for sweep in xrange(num_sweeps):
a = np.arange(num_samples)
np.random.shuffle(a)
results.extend(a)
return np.array(results)
def predictive_recursion(z, num_sweeps, grid_x, mu0=0., sig0=1.,
nullprob=1.0, decay=-0.67):
sweeporder = generate_sweeps(num_sweeps, len(z))
theta_guess = np.ones(len(grid_x)) / float(len(grid_x))
return predictive_recursion_fdr(z, sweeporder, grid_x, theta_guess,
mu0, sig0, nullprob, decay)
def predictive_recursion_fdr(z, sweeporder, grid_x, theta_guess, mu0 = 0.,
sig0 = 1.0, nullprob = 1.0, decay = -0.67):
gridsize = grid_x.shape[0]
theta_subdens = deepcopy(theta_guess)
pi0 = nullprob
joint1 = np.zeros(gridsize)
ftheta1 = np.zeros(gridsize)
# Begin sweep through the data
for i, k in enumerate(sweeporder):
cc = (3. + i)**decay
joint1 = norm.pdf(grid_x, loc=z[k] - mu0, scale=sig0) * theta_subdens
m0 = pi0 * norm.pdf(z[k] - mu0, 0., sig0)
m1 = trapezoid(grid_x, joint1)
mmix = m0 + m1
pi0 = (1. - cc) * pi0 + cc * (m0 / mmix)
ftheta1 = joint1 / mmix
theta_subdens = (1. - cc) * theta_subdens + cc * ftheta1
# Now calculate marginal distribution along the grid points
y_mix = np.zeros(gridsize)
y_signal = np.zeros(gridsize)
for i, x in enumerate(grid_x):
joint1 = norm.pdf(grid_x, x - mu0, sig0) * theta_subdens
m0 = pi0 * norm.pdf(x, mu0, sig0)
m1 = trapezoid(grid_x, joint1)
y_mix[i] = m0 + m1;
y_signal[i] = m1 / (1. - pi0)
return {'grid_x': grid_x,
'sweeporder': sweeporder,
'theta_subdens': theta_subdens,
'pi0': pi0,
'y_mix': y_mix,
'y_signal': y_signal}