Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 907bf5c37e33318d1b4a089e4385e32e2bc58100 authored by Wesley Tansey on 23 June 2016, 01:57:13 UTC, committed by Wesley Tansey on 23 June 2016, 01:57:13 UTC
Added a simplified interface for fdr smoothing that only requires the data and the edges
1 parent 97c546a
  • Files
  • Changes
  • bd3d3c3
  • /
  • smoothfdr
  • /
  • normix.py
Raw File Download
Permalinks

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:907bf5c37e33318d1b4a089e4385e32e2bc58100
directory badge Iframe embedding
swh:1:dir:a6c0fd806e00b75984345b63925b36633898d701
content badge Iframe embedding
swh:1:cnt:e37a6bc0c16cc7181cf6a372801717af3cf4386f
Citations

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
normix.py
import numpy as np
from scipy.stats import norm as norm
from scipy.optimize import fmin_bfgs
from copy import deepcopy

class GridDistribution:
	def __init__(self, x, y):
		self.x = x
		self.y = y

	def pdf(self, data):
		# Find the closest bins
		rhs = np.searchsorted(self.x, data)
		lhs = (rhs - 1).clip(0)
		rhs = rhs.clip(0, len(self.x) - 1)

		# Linear approximation (trapezoid rule)
		rhs_dist = np.abs(self.x[rhs] - data)
		lhs_dist = np.abs(self.x[lhs] - data)
		denom = rhs_dist + lhs_dist
		denom[denom == 0] = 1. # handle the zero-distance edge-case
		rhs_weight = 1.0 - rhs_dist / denom
		lhs_weight = 1.0 - rhs_weight

		return lhs_weight * self.y[lhs] + rhs_weight * self.y[rhs]

def trapezoid(x, y):
	return np.sum((x[1:] - x[0:-1]) * (y[1:] + y[0:-1]) / 2.)

def generate_sweeps(num_sweeps, num_samples):
	results = []
	for sweep in xrange(num_sweeps):
		a = np.arange(num_samples)
		np.random.shuffle(a)
		results.extend(a)
	return np.array(results)

def predictive_recursion(z, num_sweeps, grid_x, mu0=0., sig0=1.,
							nullprob=1.0, decay=-0.67):
	sweeporder = generate_sweeps(num_sweeps, len(z))
	theta_guess = np.ones(len(grid_x)) / float(len(grid_x))
	return predictive_recursion_fdr(z, sweeporder, grid_x, theta_guess,
									mu0, sig0, nullprob, decay)

def predictive_recursion_fdr(z, sweeporder, grid_x, theta_guess, mu0 = 0.,
							sig0 = 1.0, nullprob = 1.0, decay = -0.67):
	gridsize = grid_x.shape[0]
	theta_subdens = deepcopy(theta_guess)
	pi0 = nullprob
	joint1 = np.zeros(gridsize)
	ftheta1 = np.zeros(gridsize)

	# Begin sweep through the data
	for i, k in enumerate(sweeporder):
		cc = (3. + i)**decay
		joint1 = norm.pdf(grid_x, loc=z[k] - mu0, scale=sig0) * theta_subdens
		m0 = pi0 * norm.pdf(z[k] - mu0, 0., sig0)
		m1 = trapezoid(grid_x, joint1)
		mmix = m0 + m1
		pi0 = (1. - cc) * pi0 + cc * (m0 / mmix)
		ftheta1 = joint1 / mmix
		theta_subdens = (1. - cc) * theta_subdens + cc * ftheta1

	# Now calculate marginal distribution along the grid points
	y_mix = np.zeros(gridsize)
	y_signal = np.zeros(gridsize)
	for i, x in enumerate(grid_x):
		joint1 = norm.pdf(grid_x, x - mu0, sig0) * theta_subdens
		m0 = pi0 * norm.pdf(x, mu0, sig0)
		m1 = trapezoid(grid_x, joint1)
		y_mix[i] = m0 + m1;
		y_signal[i] = m1 / (1. - pi0)

	return {'grid_x': grid_x,
            'sweeporder': sweeporder,
			'theta_subdens': theta_subdens,
			'pi0': pi0,
			'y_mix': y_mix,
			'y_signal': y_signal}

def empirical_null(z, nmids=150, pct=-0.01, pct0=0.25, df=4):
    '''Estimate f(z) and f_0(z) using a polynomial approximation to Efron (2004)'s method.'''
    N = len(z)
    med = np.median(z)
    lb = med + (1 - pct) * (z.min() - med)
    ub = med + (1 - pct) * (z.max() - med)

    breaks = np.linspace(lb, ub, nmids+1)
    zcounts = np.histogram(z, bins=breaks)[0]
    mids = (breaks[:-1] + breaks[1:])/2

    ### Truncated Polynomial

    # Truncate to [-3, 3]
    selected = np.logical_and(mids >= -3, mids <= 3)
    zcounts = zcounts[selected]
    mids = mids[selected]

    # Form a polynomial basis and multiply by z-counts
    X = np.array([mids ** i for i in xrange(df+1)]).T
    beta0 = np.zeros(df+1)
    loglambda_loss = lambda beta, X, y: -((X * y[:,np.newaxis]).dot(beta) - np.exp(X.dot(beta))).sum()
    results = fmin_bfgs(loglambda_loss, beta0, args=(X, zcounts), disp=True)
    a = np.linspace(-3,3,1000)
    B = np.array([a ** i for i in xrange(df+1)]).T
    beta_hat = results

    # Back out the mean and variance from the Taylor terms
    x_max = mids[np.argmax(X.dot(beta_hat))]
    loglambda_deriv1_atmode = np.array([i * beta_hat[i] * x_max**(i-1) for i in xrange(1,df+1)]).sum()
    loglambda_deriv2_atmode = np.array([i * (i-1) * beta_hat[i] * x_max**(i-2) for i in xrange(2,df+1)]).sum()
    
    # Handle the edge cases that arise with numerical precision issues
    sigma_enull = np.sqrt(-1.0/loglambda_deriv2_atmode) if loglambda_deriv2_atmode < 0 else 1
    mu_enull = (x_max - loglambda_deriv1_atmode/loglambda_deriv2_atmode) if loglambda_deriv2_atmode != 0 else 0

    return (mu_enull, sigma_enull)
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Contact— JavaScript license information— Web API