Revision 49cb69c5a18fdb262964fbfeb47ab2099eb32c5c authored by Wesley Tansey on 03 May 2018, 19:46:59 UTC, committed by Wesley Tansey on 03 May 2018, 19:46:59 UTC
sfdr.py
import matplotlib as mpl
from matplotlib import cm, colors
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import argparse
import csv
import sys
from scipy.sparse import csc_matrix, dia_matrix, linalg as sla
from scipy.stats import norm
from smoothed_fdr import SmoothedFdr, GaussianKnown, calc_plateaus
from normix import GridDistribution, predictive_recursion, empirical_null
import signal_distributions
from utils import *
from plotutils import *
def main():
parser = argparse.ArgumentParser(description='Runs the smoothed FDR algorithm.')
parser.add_argument('data_file', help='The file containing the raw z-score data.')
parser.add_argument('signal_distribution_file', help='The file location where the estimated signal distribution will be saved.')
parser.add_argument('--verbose', type=int, default=0, help='Print detailed progress information to the console. 0=none, 1=outer-loop only, 2=all details.')
parser.add_argument('--data_header', action='store_true', help='Specifies that there is a header line in the data file.')
# Predictive recursion settings
parser.add_argument('--pr_grid_x', nargs=3, type=int, default=[-7,7,57], help='The grid parameters (min, max, points) for the predictive recursion approximate distribution.')
parser.add_argument('--pr_sweeps', type=int, default=50, help='The number of randomized sweeps to make over the data.')
parser.add_argument('--pr_nullprob', type=float, default=1.0, help='The initial guess for the marginal probability of coming from the null distribution.')
parser.add_argument('--pr_decay', type=float, default=-0.67, help='The exponential decay rate for the recursive update weights.')
parser.set_defaults(data_header=False)
# Get the arguments from the command line
args = parser.parse_args()
# Load the dataset from file
data = np.loadtxt(args.data_file, delimiter=',', skiprows=1 if args.data_header else 0)
if args.verbose:
print('Estimating null distribution empirically via Efron\'s method.')
null_mean, null_stdev = empirical_null(data.flatten())
null_dist = GaussianKnown(null_mean, null_stdev)
if args.verbose:
print('Null: N({0}, {1}^2)'.format(null_mean, null_stdev))
if args.verbose:
print('Performing predictive recursion to estimate the signal distribution [{0}, {1}] ({2} bins)'.format(args.pr_grid_x[0], args.pr_grid_x[1], args.pr_grid_x[2]))
grid_x = np.linspace(args.pr_grid_x[0], args.pr_grid_x[1], args.pr_grid_x[2])
signal_data = data.flatten()
pr_results = predictive_recursion(signal_data,
args.pr_sweeps, grid_x,
mu0=args.null_mean, sig0=args.null_stdev,
nullprob=args.pr_nullprob, decay=args.pr_decay)
# Get the estimated distribution
estimated_dist = GridDistribution(pr_results['grid_x'], pr_results['y_signal'])
penalties = load_trails(args.trails)
#TODO: Fill in the rest so there's a better lightweight script
Computing file changes ...