Content - 600970fa1369f5523520c522b0e2f973fc225940 - d7c631f/SNFUtils.py

visit type:
https://github.com/stephenfloor/tripseq-analysis

05 April 2024, 18:34:26 UTC
Tip revision: 3e823abcca5b8c1e5e89dd9bd4c49e8673b3e957 authored by Stephen Floor on 24 June 2017, 00:52:49 UTC
email update
Tip revision: 3e823ab
SNFUtils.py
#!/usr/bin/env python 

import os,subprocess,sys
from itertools import izip
import numpy as np 
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats.stats import pearsonr, spearmanr 

def plot_txs (tx_i, tx_j, clust_i, clust_j, 
              x_i, y_i, error_i, xfit_i, yfit_i,
              x_j, y_j, error_j, xfit_j, yfit_j):
    
    #plt.figure()
    fig, ax = plt.subplots()
    plt.errorbar(x_i, y_i, marker='x', yerr=error_i, ls="None")
    plt.plot(xfit_i, yfit_i)
    plt.errorbar(x_j, y_j, marker='s', yerr=error_j, ls="None")
    plt.plot(xfit_j, yfit_j)
    
    diffs = [yfit_i[i] - yfit_j[i] for i in range(len(yfit_i))]
    plt.plot(xfit_i, diffs)

    ymax = max( max(y_i), max(yfit_i), max(y_j), max(yfit_j), max(diffs))*1.2+1
    ymin = min( min(y_i), min(yfit_i), min(y_j), min(yfit_j), min(diffs))*1.2-1
    #plt.plot(x, y, 'x', xnew, ynew)
    plt.axis([.5, 9.5, ymin, ymax])
    ax.set_xticks(range(1,10))
    ax.set_xticklabels(['80S', 'poly2', 'poly3', 'poly4', 'poly5', 'poly6', 'poly7', 'poly8', 'cyto']) 
    plt.legend([tx_i + "_clust %s" % clust_i, tx_i + "_fit", tx_j + "_clust %s" % clust_j, tx_j + "_fit", "difference"], fontsize=8)

    #residuals = sum(infodict['fvec']**2)
    #plt.title("Sq. Resid.: %5.4f; Res/Median: %5.4f" % (residuals, residuals/ymedian))
    #perr = np.sqrt(np.diag(pcov))
    #perr_percent = [ np.fabs(perr[i]/popt[i]) for i in range(len(popt))]
    #avg_percent_error = np.mean(perr_percent)
    #total_percent_error = sum(perr_percent)
    #weighted_perr = sum([ perr_percent[i] * np.fabs(popt[i]) for i in range(len(popt))])
    
    prsn = pearsonr( yfit_i, yfit_j)[0]
    sprmn = spearmanr( yfit_i, yfit_j)[0]

    plt.text(.75, 1, "Parms pearson: %3.2f spearman: %3.2f" % (prsn, sprmn), fontsize=8)
    plt.savefig("%s vs %s" % (tx_i, tx_j))
    plt.close(fig) 
              
def third_order_poly_fit_plot (x, y, outname, yerror):
    def func(x, p1, p2, p3, p4):
        return p1 + p2 * x + p3 * x**2 + p4 * x**3
    
    xdata = np.array(x) 
    ydata = np.array(y)
    ymedian = np.median(y) 
    xnew = np.arange(1, max(x), 0.001) 
    popt, pcov, infodict, mesg, ier = curve_fit(func, xdata, ydata,p0=(1, 1, 1, 1),full_output=1) 
    ynew = [func(i, popt[0], popt[1], popt[2], popt[3]) for i in xnew]
    #plt.figure()
    fig, ax = plt.subplots()
    plt.errorbar(x, y, marker='x', yerr=yerror, ls="None")
    plt.plot(xnew, ynew)
    #plt.plot(x, y, 'x', xnew, ynew)
    plt.axis([.5, 9.5, 0, max( max(y), max(ynew) ) + 1])
    ax.set_xticklabels(['', '80S', 'poly2', 'poly3', 'poly4', 'poly5', 'poly6', 'poly7', 'poly8', 'cyto']) 
    plt.legend(['Input', 'Third order polynomial'])
    residuals = sum(infodict['fvec']**2)
    plt.title("Sq. Resid.: %5.4f; Res/Median: %5.4f" % (residuals, residuals/ymedian))
    perr = np.sqrt(np.diag(pcov))
    perr_percent = [ np.fabs(perr[i]/popt[i]) for i in range(len(popt))]
    avg_percent_error = np.mean(perr_percent)
    total_percent_error = sum(perr_percent)
    weighted_perr = sum([ perr_percent[i] * np.fabs(popt[i]) for i in range(len(popt))])
    prsn = pearsonr( [func(i, popt[0], popt[1], popt[2], popt[3]) for i in x], y)[0]
    sprmn = spearmanr( [func(i, popt[0], popt[1], popt[2], popt[3]) for i in x], y)[0]
    def prt(inp): #"pretty" 
        return ["%3.2f" % inp[i] for i in range(len(inp))]

    plt.text(.75, 1, "Parms %s\nerrors %s\n%% error %s\nmean %%: %3.2f sum %%: %3.2f weighted %%: %3.2f pearson: %3.2f spearman: %3.2f" % 
              (prt(popt), prt(perr), prt(perr_percent), avg_percent_error, total_percent_error, weighted_perr, prsn, sprmn),
              fontsize=8)
    plt.savefig(outname)
    plt.close(fig) 

def plot_dist (x, y, outname, yerror):
    
    xdata = np.array(x) 
    ydata = np.array(y)
    ymedian = np.median(y) 

    fig, ax = plt.subplots()
    plt.errorbar(x, y, marker='x', yerr=yerror, ls="None")

    plt.axis([.5, 9.5, 0, max(y) + 1])
    ax.set_xticks(range(1,10))
    ax.set_xticklabels(['80S', 'poly2', 'poly3', 'poly4', 'poly5', 'poly6', 'poly7', 'poly8', 'cyto']) 
    plt.legend(['Input'])

    plt.savefig(outname)
    plt.close(fig) 

def plot_dist_fancy (x, y, outname, yerror, title):
    
    xdata = np.array(x) 
    ydata = np.array(y)
    ymedian = np.median(y) 


    fig, ax = plt.subplots()
    plt.errorbar(x, y, marker='o', markersize=16, color='k', yerr=yerror, ls="None")



    # plot them all
    #plt.axis([.5, len(x) + 0.5, 0, max(max(y), max([y[i] + yerror[i] for i in range(len(y))]))])
    #ax.set_xticks(range(1,len(x)+1))
    #ax.set_xticklabels(['40S', '60S', '80S', 'poly2', 'poly3', 'poly4', 'poly5', 'poly6', 'poly7', 'poly8', 'cyto'], size=20, rotation=45) 

    # leave out 40/60 for comparison to clustering 
    plt.axis([2.5, len(x) + 0.5, 0, max(max(y[2:]), max([y[i] + yerror[i] for i in range(2, len(y))]))])
    ax.set_xticks(range(3,len(x)+1))
    ax.set_xticklabels(['80S', 'poly2', 'poly3', 'poly4', 'poly5', 'poly6', 'poly7', 'poly8', 'cyto'], size=20, rotation=45) 

    # leave out 40/60/80 for comparison to frac-seq rtpcr 
    #plt.axis([3.5, len(x) + 0.5, 0, max(max(y), max([y[i] + yerror[i] for i in range(len(y))]))])
    #ax.set_xticks(range(4,len(x)+1))
    #ax.set_xticklabels(['poly2', 'poly3', 'poly4', 'poly5', 'poly6', 'poly7', 'poly8', 'cyto'], size=20, rotation=45) 

    ylabels = ax.get_yticks().tolist() 
    ax.set_yticklabels(ylabels, size=20)

    plt.title(title, size=24)
    plt.ylabel("TPM", size=24)
    
    plt.tick_params(which='both', length=8, width=2, pad=10)

    plt.tick_params(
    which='both',      # both major and minor ticks are affected
    bottom='off',      # ticks along the bottom edge are off
    top='off',
    right='off')         # ticks along the top edge are off
    #labelbottom='off') # labels along the bottom edge are off


    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    ax.spines['bottom'].set_linewidth(4)
    ax.spines['left'].set_linewidth(4)
    #ax.spines['bottom'].set_visible(False)
    #ax.spines['left'].set_visible(False)

    plt.axvline(x=10.5, linewidth=5, color='#cccccc', dashes=(4,16), dash_capstyle="round") 

    plt.tight_layout()

    plt.savefig(outname)
    plt.close(fig) 


def pairwise(t):
    it = iter(t)
    return izip(it, it)

def chunkwise(t, size=2):
    it = iter(t)
    return izip(*[it]*size)

def stdout_from_command(command):
    p = subprocess.Popen(command,
                         stdout = subprocess.PIPE,
                         shell = True)
    return iter(p.stdout.readline, b'')

def safe_open_file(filename):
    if (os.path.exists(filename)):
        sys.exit("FATAL: file %s exists; cowardly refusing to overwrite." % filename)
    try:
        outfile = open(filename, "w")
    except:
        sys.exit("FATAL: cannot open file %s for writing." % filename)

    return outfile

def prompt(promptstr): 
    print promptstr

    inp = raw_input("\nContinue?  [y/n]").lower()

    if (inp == "n" or inp == "no"):
        sys.exit(0)

def is_number(s):
    if s is None: 
        return False
    try:
        float(s)
        return True
    except (ValueError, TypeError):
        return False

def merge(d1, d2, merge_fn=lambda x,y:y):
    """
    http://stackoverflow.com/questions/38987/how-can-i-merge-two-python-dictionaries-in-a-single-expression
    Merges two dictionaries, non-destructively, combining 
    values on duplicate keys as defined by the optional merge
    function.  The default behavior replaces the values in d1
    with corresponding values in d2.  (There is no other generally
    applicable merge strategy, but often you'll have homogeneous 
    types in your dicts, so specifying a merge technique can be 
    valuable.)

    Examples:

    >>> d1
    {'a': 1, 'c': 3, 'b': 2}
    >>> merge(d1, d1)
    {'a': 1, 'c': 3, 'b': 2}
    >>> merge(d1, d1, lambda x,y: x+y)
    {'a': 2, 'c': 6, 'b': 4}

    """
    print "---- d1 ----" 
    print d1
    print "---- d2 ----" 
    print d2 
    result = dict(d1)
    for k,v in d2.iteritems():
        if k in result:
            result[k] = merge_fn(result[k], v)
        else:
            result[k] = v
    return result