https://github.com/stevendflygare/muscleQNT
Tip revision: 16a7adca009034fca2a6f68a8b83f8003a68962d authored by stevendflygare on 16 March 2015, 17:06:20 UTC
user state change
user state change
Tip revision: 16a7adc
new_analyze4.py
#!/opt/local/bin/python2.7
import sys
import re
from PIL import Image
import scipy.ndimage as si
import numpy as np
import random
import scipy.stats as ss
import argparse
import matplotlib
#matplotlib.use('Agg')
import matplotlib.pyplot as plt
import itertools
#command line parsing
parser = argparse.ArgumentParser()
parser.add_argument("fiber_file", help="fiber size input file")
parser.add_argument("image_metrics", help="image metrics input file")
parser.add_argument("-c","--conversion",help="pixel to micro-meter conversion (multiplicative factor)",type=float)
parser.add_argument("-rf","--relative",help="use relative frequency instead of raw counts in histogram",action="store_true")
parser.add_argument("-bs","--bin_sizes",help="comma separated list of pixel break values for bins",type=str)
parser.add_argument("-hb",help="number of histogram bins",type=int,default=20)
parser.add_argument("-xl",help="x-axis label",type=str,default="")
parser.add_argument("-yl",help="y-axis label",type=str,default="")
args = parser.parse_args()
def load_fiber_sizes(fiber_file,mice_hash):
f = open(fiber_file,'r')
for line in f:
data = re.split(",",line.strip())
m = re.search("([0-9a-zA-Z]+)_([0-9a-zA-Z]+)_(\d+)\.",data[0])
if m:
if m.group(1) in mice_hash:
mice_hash[m.group(1)][0].append(map(int,data[1:]))
else:
print "problem, mouse not found going to exit: " + m.group(1)
sys.exit(0)
else:
print "problem, image name is not recognized, please check naming scheme of images: " + data[0]
print "exiting..."
sys.exit(0)
f.close()
def create_histogram(mice_hash,hist_name,rf,hb,user_bins,xlabel,ylabel,conversion):
#do permutations to get all possibilities and then let the final histogram be the average
image_names = []
mt_histograms = []
wt_histograms = []
p_index = 0
bins = []
if user_bins:
bins = map(int,re.split(",",user_bins))
hb = len(bins)-1
for k in mice_hash:
image_names.append([])
for im in mice_hash[k][1]:
image_names[-1].append((k,im))
print image_names
for p in itertools.product(*image_names):
mutant = []
wild_type = []
#print p
for i in p:
if mice_hash[i[0]][2] == 0:
wild_type.extend(mice_hash[i[0]][0][mice_hash[i[0]][1].index(i[1])])
else:
mutant.extend(mice_hash[i[0]][0][mice_hash[i[0]][1].index(i[1])])
if p_index == 0:
p_index+=1
wt_hist, tmpbins = np.histogram(wild_type,hb)
if not bins:
bins = tmpbins[:]
if rf:
wt_relfreq = wt_hist/float(sum(wt_hist))
wt_histograms.append(wt_relfreq)
else:
wt_histograms.append(wt_hist)
mt_hist, tmpbins = np.histogram(mutant,bins)
if rf:
mt_relfreq = mt_hist/float(sum(mt_hist))
mt_histograms.append(mt_relfreq)
else:
mt_histograms.append(mt_hist)
else:
wt_hist, tmpbins = np.histogram(wild_type,bins)
if rf:
wt_relfreq = wt_hist/float(sum(wt_hist))
wt_histograms.append(wt_relfreq)
else:
wt_histograms.append(wt_hist)
mt_hist, tmpbins = np.histogram(mutant,bins)
if rf:
mt_relfreq = mt_hist/float(sum(mt_hist))
mt_histograms.append(mt_relfreq)
else:
mt_histograms.append(mt_hist)
#print np.matrix(wt_histograms)
wt_hist = np.mean(wt_histograms,axis=0)
wt_std = np.std(wt_histograms,axis=0)
mt_hist = np.mean(mt_histograms,axis=0)
mt_std = np.std(mt_histograms,axis=0)
#print "making histogram"
#use relative frequencies instead of the density
ind = np.arange(len(wt_hist)) # the x locations for the groups
width = 0.35 # the width of the bars
rects1 = plt.bar(ind, wt_hist, width, color='b', yerr=wt_std, error_kw=dict(elinewidth=4, ecolor='green'))
rects2 = plt.bar(ind+width, mt_hist, width, color='r', yerr=mt_std, error_kw=dict(elinewidth=4, ecolor='pink'))
a = list(wt_hist)
a.extend(mt_hist)
if not rf:
plt.ylim([0, int(max(a)*1.10)])
tick_vals = []
print "\nbins start and end pixel values: "
for i in range(len(bins)):
tick_vals.append(i+1)
if i > 0:
print "\tbin "+str(i)+": "+str(bins[i-1])+"-"+str(bins[i])
#for i in range(len(bins)):
# if i < len(bins)-1:
# if not conversion:
#tick_vals.append(int(np.mean(bins[i:i+2])/100))
# tick_vals.append(int(np.mean(bins[i:i+2])))
# else:
# tick_vals.append(round(np.mean(bins[i:i+2])*conversion,2))
#plt.xticks(ind,tick_vals)
plt.xticks(ind+width,tick_vals)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.legend( (rects1[0], rects2[0]), ('Wild Type', 'Mutant') )
plt.show()
#plt.savefig(hist_name)
plt.clf()
def create_decile_plot(mice_hash,plot_name):
xvals = range(0,105,5)
for m in mice_hash:
if mice_hash[m][2] == 0:
y = np.percentile(mice_hash[m][0],xvals)
plt.plot(xvals,y,"bo-")
for m in mice_hash:
if mice_hash[m][2] == 1:
y = np.percentile(mice_hash[m][0],xvals)
plt.plot(xvals,y,"ro-")
plt.axis([0,110,0,2000])
plt.savefig(plot_name)
plt.clf()
f = open(args.image_metrics,'r')
index = 0
mice = {} #key is mouse id, then the value of each key is [[fibers],[images],mutant/wildtype]
m_slow = {}
m_fast = {}
for line in f:
if index > 0:
data = re.split(",",line.strip())
m = re.search("([0-9a-zA-Z]+)_([0-9a-zA-Z]+)_(\d+)\.",data[0])
if m:
if m.group(1) in mice:
mice[m.group(1)][1].append(m.group(2))
mice[m.group(1)][2] = int(m.group(3))
m_slow[m.group(1)][1].append(m.group(2))
m_slow[m.group(1)][2] = int(m.group(3))
m_fast[m.group(1)][1].append(m.group(2))
m_fast[m.group(1)][2] = int(m.group(3))
else:
mice[m.group(1)] = [[],[],0]
mice[m.group(1)][1].append(m.group(2))
mice[m.group(1)][2] = int(m.group(3))
m_slow[m.group(1)] = [[],[],0]
m_slow[m.group(1)][1].append(m.group(2))
m_slow[m.group(1)][2] = int(m.group(3))
m_fast[m.group(1)] = [[],[],0]
m_fast[m.group(1)][1].append(m.group(2))
m_fast[m.group(1)][2] = int(m.group(3))
else:
print "problem, image name is not recognized, please check naming scheme of images: " + data[0]
print "exiting..."
sys.exit(0)
index+=1
f.close()
load_fiber_sizes(args.fiber_file,mice)
create_histogram(mice,"fiber_histogram.svg",args.relative,args.hb,args.bin_sizes,args.xl,args.yl,args.conversion)
#chi2, p, dof, ex = ss.chi2_contingency([wt_hist,mt_hist])
#print "chi2 test p-value: " + str(p)
#plotarray = []
#plotarray.append(mutant)
#plotarray.append(wild_type)
#plt.boxplot(plotarray)
#plt.xticks(np.arange(3),("","mutant","wild type"))
#plt.savefig("box_plots.png")
#plt.hist(plotarray,20,histtype='bar',color=['red','blue'],label=['Mutant','Wild Type'])
#plt.legend()
#plt.savefig("fiber_histogram.svg")