https://github.com/jttoivon/MODER
Tip revision: c485231e5b468ae509306e1aaeebaa0f3004572d authored by Jarkko Toivonen on 31 March 2020, 18:10:18 UTC
Fixed indexing bug.
Fixed indexing bug.
Tip revision: c485231
to_html.py
#!/usr/bin/python
import getopt
import sys
import re
import subprocess
import os
import numpy
import string
import math
import StringIO
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import ticker
from mpl_toolkits.axes_grid1 import make_axes_locatable
css="""
h1 {
margin-left: 100px;
}
ul {
list-style-type: square;
}
#info {
overflow: auto;
background-color: lightblue;
float: left;
border: solid green;
}
#programinfo {
overflow: auto;
background-color: lightblue;
float: left;
clear: left;
border: solid green;
margin-top: 20px;
margin-bottom: 20px;
padding-right: 20px;
}
#programinfo ul {
max-width: 60em;
}
div#lambdaTable th{
text-align: left;
}
div#factors {
# clear: left;
float: left;
text-align: center;
}
div#factors img{
# clear: left;
# float: left;
padding: 20px;
}
div#flankfactors {
clear: both;
}
#cobs {
clear:left;
float: left;
text-align: center;
// color: red;
}
div#iterations {
float: left;
#text-align: center;
color: red;
}
div#iterations img{
width: 200;
height: 200;
}
div#background {
overflow: auto;
clear: left;
}
table.ppmtable th{
background-color: lightblue;
}
table.ppmtable td,
table.ppmtable th
{
padding: 10px;
border: 1px solid black;
text-align: center;
}
table.ppmtable {
border-collapse: collapse;
border-spacing: 0px;
}
h3.tableheading {
margin-top: 40px;
margin-bottom: 10px;
}
div.rc {
border: 2px solid white;
}
div.normal {
border: 2px solid red;
}
div.logoContainer {vertical-align: middle;}
div.logoButtonContainer {display: inline-block; vertical-align: middle;}
div.normal { text-align: center; cursor: pointer;}
div.rc { text-align: center; cursor: pointer;}
div.logoImageContainer {display: inline-block; vertical-align: middle;}
"""
dna_orients=["HT", "HH", "TT", "TH"]
dna_orient_dict = {"HT" : 0, "HH" : 1, "TT" : 2, "TH" : 3}
rna_orients=["HT", "TH"]
rna_orient_dict = {"HT" : 0, "TH" : 1}
max_logo_width = 500 # This is defined in myspacek40
# Entropy of a probability distribution 'l'
def entropy(l):
sum=0
for f in l:
if f != 0:
try:
sum+=f*math.log(f,2)
except ValueError:
print l
raise
return -sum;
def information_content(l):
return 2-entropy(l)
def matrix_information_content(m):
columns=m.transpose().tolist() # l is list of columns
total_ic = 0.0
for column in columns:
total_ic += information_content(column)
return total_ic
# Normalize a PFM
def normalize(m):
for i in range(0,m.shape[1]):
if sum(m[:,i]) != 0:
m[:,i] /= sum(m[:,i])
return m
def reverse_complement_pwm(m):
result=m.copy()
for c in range(0,m.shape[1]):
for r in range(0,m.shape[0]):
result[m.shape[0]-r-1,m.shape[1]-c-1] = m[r,c]
return result
def matrices_in_orientation(o, p1, p2):
if o == "HT":
return (p1, p2)
elif o == "HH":
return (p1, reverse_complement_pwm(p2))
elif o == "TT":
return (reverse_complement_pwm(p1), p2)
elif o == "TH":
return (reverse_complement_pwm(p1), reverse_complement_pwm(p2))
def find_lines(x, str, pos, count):
resultfile= StringIO.StringIO(string.join(x,""))
resultlist=[]
# read matrix header
while True:
line=resultfile.readline()
if re.match(str, line):
#line = resultfile.readline()
if pos == 0:
resultlist.append(line)
count -= 1
else:
pos -= 1
break
elif line=="":
raise AttributeError, "Tag '%s' not found" % str
while pos > 0 and line != "":
line = resultfile.readline()
pos -= 1
while count > 0:
count -= 1
line = resultfile.readline()
resultlist.append(line)
return resultlist
def readarray(lines):
result=[]
for line in lines:
line = line.rstrip('\n')
tmp=line.split("\t")
result.append(tmp)
return numpy.array(result)
def readmatrix(x):
result=[]
try:
rows,cols=x[0].split("x")
first=1
except ValueError:
first=0 # no header line
for line in x[first:]:
line=line.strip()
# tmp=map(float,line.split('\t'))
tmp=map(float,line.split())
result.append(tmp)
return numpy.array(result)
def compute_expected(pwm1, pwm2, o, d):
m1,m2 = matrices_in_orientation(o, pwm1, pwm2)
k1 = m1.shape[1]
k2 = m2.shape[1]
dimer_len = k1+k2+d
fill=1.00
# Left occurrence
result1 = numpy.empty((4, dimer_len))
result1.fill(fill)
for pos in xrange(0, k1):
result1[:,pos] = m1[:,pos]
# Right occurrence
result2 = numpy.empty((4, dimer_len))
result2.fill(fill)
for pos in xrange(k1+d, dimer_len):
result2[:,pos] = m2[:,pos-(k1+d)]
expected = normalize(result1 * result2)
return expected
# Write results for a cob case
def write_results(cob, o, d, pwm1, pwm2, observed, expected, deviation, last_iteration_output, get_flanks):
k1 = pwm1.shape[1]
k2 = pwm2.shape[1]
if get_flanks:
try:
flank=readmatrix(find_lines(last_iteration_output, "Flank dimer case matrix %s %s %i:" % (cob, o, d), 2, 4))
except AttributeError:
flank=numpy.zeros(expected.shape)
oname="observed.%s.%s.%i.pfm" % (cob, o, d)
ename="expected.%s.%s.%i.pfm" % (cob, o, d)
dname="deviation.%s.%s.%i.dev" % (cob, o, d)
fname="flank.%s.%s.%i.pfm" % (cob, o, d)
writematrixfile(observed, oname)
writematrixfile(expected, ename)
writematrixfile(deviation, dname)
if get_flanks:
writematrixfile(flank, fname)
oname_rc="observed.%s.%s.%i-rc.pfm" % (cob, o, d)
ename_rc="expected.%s.%s.%i-rc.pfm" % (cob, o, d)
dname_rc="deviation.%s.%s.%i-rc.dev" % (cob, o, d)
fname_rc="flank.%s.%s.%i-rc.pfm" % (cob, o, d)
observed_rc=reverse_complement_pwm(observed)
expected_rc=reverse_complement_pwm(expected)
deviation_rc=reverse_complement_pwm(deviation)
if get_flanks:
flank_rc=reverse_complement_pwm(flank)
writematrixfile(observed_rc, oname_rc)
writematrixfile(expected_rc, ename_rc)
writematrixfile(deviation_rc, dname_rc)
if get_flanks:
writematrixfile(flank_rc, fname_rc)
# Forward direction
myrun("myspacek40 %s --logo %s %s" % (myspacek_flags, oname, oname.replace(".pfm", ".svg")))
myrun("myspacek40 %s --logo %s %s" % (myspacek_flags, ename, ename.replace(".pfm", ".svg")))
myrun("myspacek40 %s --difflogo %s %s" % (myspacek_flags, oname, ename)) # Deviation logo
if get_flanks:
if flank.shape[1] <= max_logo_width:
myrun("myspacek40 %s -core=%i,%i,%i --logo %s %s" % (myspacek_flags, k1, k2, d, fname, fname.replace(".pfm", ".svg")))
else:
print "Could not create logo for flanked dimer %s %s %i: too wide logo" % (cob, o, d)
# Reverse complement
myrun("myspacek40 %s --logo %s %s" % (myspacek_flags, oname_rc, oname_rc.replace(".pfm", ".svg")))
myrun("myspacek40 %s --logo %s %s" % (myspacek_flags, ename_rc, ename_rc.replace(".pfm", ".svg")))
myrun("myspacek40 %s --difflogo %s %s" % (myspacek_flags, oname_rc, ename_rc)) # Deviation logo
if get_flanks:
if flank_rc.shape[1] <= max_logo_width:
myrun("myspacek40 %s -core=%i,%i,%i --logo %s %s" % (myspacek_flags, k2, k1, d, fname_rc, fname_rc.replace(".pfm", ".svg")))
else:
print "Could not create logo for flanked dimer %s %s %i: too wide logo" % (cob, o, d)
for rc in ["", "-rc"]:
with open("three.%s.%s.%i%s.html" % (cob, o, d, rc), "w") as f:
oname = "observed.%s.%s.%i%s" % (cob, o, d, rc)
ename = "expected.%s.%s.%i%s" % (cob, o, d, rc)
dname = "deviation.%s.%s.%i%s" % (cob, o, d, rc)
myrun("mv %s.pfm_minus_%s.pfm.svg %s.svg" % (oname, ename, dname))
f.write('<h1>%s %s %i</h1>' % (cob, o, d))
f.write('<figure><figcaption>Observed:</figcaption><a href="%s.pfm"><img src="%s.svg"\></a></figure>' % (oname,oname))
f.write('<figure><figcaption>Expected:</figcaption><a href="%s.pfm"><img src="%s.svg"\></a></figure>' % (ename,ename))
f.write('<figure><figcaption>Deviation:</figcaption><a href="%s.dev"><img src="%s.svg"\></a></figure>' % (dname,dname))
# with open("three.%s.%s.%i-rc.html" % (cob, o, d), "w") as f:
# oname = "observed.%s.%s.%i" % (cob, o, d)
# ename = "expected.%s.%s.%i" % (cob, o, d)
# dname = "deviation.%s.%s.%i" % (cob, o, d)
# myrun("mv %s-rc.pfm_minus_%s-rc.pfm.svg %s-rc.svg" % (oname, ename, dname))
# f.write('<h1>%s %s %i reverse complement</h1>' % (cob, o, d))
# f.write('<figure><figcaption>Observed:</figcaption><img src="%s-rc.svg"\></figure>' % oname)
# f.write('<figure><figcaption>Expected:</figcaption><img src="%s-rc.svg"\></figure>' % ename)
# f.write('<figure><figcaption>Deviation:</figcaption><img src="%s-rc.svg"\></figure>' % (dname))
def get_cob_case(cob, o, d, pwm1, pwm2, last_iteration_output, get_flanks):
expected = compute_expected(pwm1, pwm2, o, d)
try:
deviation=readmatrix(find_lines(last_iteration_output, "Deviation matrix %s %s %i:" % (cob, o, d), 2, 4))
except AttributeError:
deviation=numpy.zeros(expected.shape)
observed = expected + deviation
g = numpy.vectorize(lambda x : max(x,0)) # This cuts negative values to 0
observed = normalize(g(observed)) # Because precision of (about) 6 digits is used, some elements can be slightly negative
write_results(cob, o, d, pwm1, pwm2, observed, expected, deviation, last_iteration_output, get_flanks)
return observed
def seconds_to_hms(seconds):
fraction=seconds-int(seconds)
seconds=int(seconds)
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
use_si_units = True
if use_si_units:
if h != 0:
result = "%dh %02dm %02.2fs" % (h, m, s+fraction)
elif m != 0:
result = "%dm %02.2fs" % (m, s+fraction)
else:
result = "%.2fs" % (s+fraction)
else:
if h != 0:
result = "%d:%02d:%02.2f" % (h, m, s+fraction)
elif m != 0:
result = "%d:%02.2f" % (m, s+fraction)
else:
result = "%.2f" % (s+fraction)
return result
def extract(query, list):
m = re.search(query, list)
try:
value = m.group(1)
except:
value = None
return value
def extract_list(query, list):
return re.findall(query, list)
def logo_container(anchor, image, ending, title=""):
if len(title) > 0:
attr='title="%s"' % title
else:
attr=''
return """<div class="logoContainer">
<div class="logoButtonContainer">
<div class="normal" onclick='myclick(event, "%s")'>+</div>
<div class="rc" onclick='myclick(event, "%s")'>-</div>
</div>
<div class="logoImageContainer" >
<a href="%s"><img class="image" src="%s" %s></a>
</div>
</div>""" % (ending, ending, anchor, image, attr)
# f ends with .svg, and is the filename of the logo
#def print_logo_container(f, logo, ending, title=""):
# f.write(logo_container(logo.replace(".svg", ending), logo, ending, title))
def mycommand(s):
p = subprocess.Popen(s, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(output, error) = p.communicate()
return (p.returncode, output, error)
def make_table_h(f, files, headers, titles=[]):
f.write("<table>")
if len(headers) > 0:
f.write("<tr>")
for h in headers:
f.write("<th>")
f.write(h)
f.write("</th>")
f.write("</tr>")
f.write("<tr>")
if len(titles) != len(files):
titles = [""]*len(files)
for logo, title in zip(files, titles):
link=logo.replace(".svg", ".pfm")
f.write("<td>")
f.write(logo_container(link, logo, ".pfm", title))
f.write("</td>")
f.write("</tr>")
f.write("</table>")
def make_table_v(files, f, header=""):
f.write("<table>")
# if len(header) > 0:
# print "<tr>"
# print "<th>"
# print f
# print "</th>"
# print "</tr>"
for plot in files:
f.write("<tr>")
f.write("<td>")
f.write('<a href="%s"><img src="%s"\></a>' % (plot,plot))
f.write("</td>")
f.write("</tr>")
f.write("</table>")
def make_table_v2(f, files, headers=[], links=[], titles=[]):
if len(headers) != len(files):
headers = [""]*len(files)
if len(links) != len(files):
links = [""]*len(files)
if len(titles) != len(files):
titles = [""]*len(files)
f.write("<table>")
for x,h,l in zip(files, headers, links):
f.write("<tr>")
if len(h) > 0:
f.write("<th>%s</th>" % h)
f.write("<td>")
if l == "":
l=x
f.write('<a href="%s"><img src="%s"\></a>' % (l,x))
f.write("</td>")
f.write("</tr>")
f.write("</table>")
def make_table_v3(f, files, headers, links, ending, titles=[]):
if len(titles) != len(files):
titles = [""]*len(files)
f.write("<table>")
for x,h,link, title in zip(files, headers, links, titles):
f.write("<tr>")
f.write("<th>%s</th>" % h)
f.write("<td>")
f.write(logo_container(link, x, ending, title))
#f.write('<a href="%s"><img src="%s"\></a>' % (l,f))
f.write("</td>")
f.write("</tr>")
f.write("</table>")
def make_table(table, headers, f):
f.write("<table>")
if len(headers) > 0:
f.write("<tr>")
for h in headers:
f.write("<th>")
f.write(h)
f.write("</th>")
f.write("</tr>")
for row in table:
f.write("<tr>")
for c in row:
f.write("<td>")
f.write(str(c))
f.write("</td>")
f.write("</tr>")
f.write("</table>")
def make_better_table(f, table, headers=[], row_headers=[], htmlclass=""):
if len(htmlclass) > 0:
f.write('<table class="%s">' % htmlclass)
else:
f.write("<table>")
if len(headers) > 0:
f.write("<tr>")
for x in headers:
f.write("<th>%s</th>" % x)
f.write("</tr>")
for i, row in enumerate(table):
f.write("<tr>")
if len(row_headers) > 0:
f.write("<th>%s</th>" % row_headers[i])
for c in row:
f.write("<td>")
f.write(c)
f.write("</td>")
f.write("</tr>")
f.write("</table>")
def get_best_cob_cases(cob_codes):
best_cases=[] # ["best.observed.0-0.svg", ...]
best_cases_links=[] # ["best.0-0.html", ...]
best_cases_headers=[] # ["best.observed.0-0.svg", ...]
for c in cob_codes:
best_cases.append("best.observed.%s.svg" % c)
best_cases_links.append("best.%s.html" % c)
try:
best_cases_headers.append(os.readlink("best.observed.%s.svg" % c))
except OSError:
best_cases_headers.append("best.observed.%s.svg(nonexisting)"%c)
return best_cases, best_cases_links, best_cases_headers
def get_lambda_table(results_output):
lambda_headers = factors + cobs + ["Background", "Sum"]
# lambda_table = [ [factors[i], 0.0] for i in xrange(number_of_factors) ] + \
# [ [cobs[i], 0.0] for i in xrange(number_of_cobs) ] +\
# [ ["Background", 0.0], ["Sum", 0.0] ]
bg_lambda = float(extract(r"Background lambda is (.*)", results_output))
temp = extract(r"Fixed lambdas are (.*)", results_output)
monomer_lambdas = eval(temp)
# for i,dummy in enumerate(factors):
# lambda_table[i][1] = monomer_lambdas[i]
cob_lambdas = [0] * number_of_cobs
for i, c in enumerate(cob_codes):
cob_lambdas[i] = float(extract(r"Sum of dimer lambdas of cob table %s is (.*)" % c, results_output))
lambdas = monomer_lambdas + cob_lambdas + [bg_lambda]
lambda_sum = sum(lambdas)
lambdas.append(lambda_sum)
lambda_table2 = zip(lambda_headers, lambdas)
# lambda_table[number_of_factors+number_of_cobs+1][1] = sum([float(lambda_table[i][1]) for i in xrange(0, number_of_factors + number_of_cobs + 1) ])
# print repr(lambda_table)
# print repr(lambda_table2)
# assert lambda_table == lambda_table2
return lambda_table2
def get_info(results_output, full_output, cob_codes):
maxiter = int(extract(r"Maximum number of iterations is (.*)", full_output))
iterations = int(extract(r"EM-algorithm took (.*) = .* iterations", results_output))
Lmin = int(extract(r"Minimum sequence length is (.*)", full_output))
Lmax = int(extract(r"Maximum sequence length is (.*)", full_output))
try:
lines = int(extract(r"Using (.*) sequences", full_output))
except TypeError:
lines = int(extract(r"Read (.*) lines from file",full_output))
command = extract(r"Command line was: (.*)", full_output)
start_time = extract(r"Starting program at (.*)", full_output)
version = extract(r"MODER version (.*)", full_output)
hostname = extract(r"Running on host: (.*)", full_output)
threads = extract(r"Using (.*) openmp threads", full_output)
epsilon = float(extract(r"Epsilon is (.*)", full_output))
excluded = 0
for c in cob_codes:
excluded += len(extract(r"Cob %s excluded cases: \[(.*)\]" % c, results_output).split(", "))
return iterations, maxiter, Lmin, Lmax, lines, epsilon, excluded, command, start_time, version, hostname, threads
def get_seeds(full_output, number_of_factors):
temp=extract_list(r"Fixed seeds are \[(.+)\]", full_output)
temp2=[x.split(", ") for x in temp]
seeds_begin = extract(r"Initial fixed seeds are \[(.+)\]", full_output).split(", ")
#seeds_begin = ["GACCGGAAGCG", "CACCTG"]
try:
seeds_end=temp2[-1]
except IndexError:
seeds_end=seeds_begin
with open("seeds.txt", "w") as f:
f.write("Initial %s\n" % " ".join(seeds_begin))
try:
prev=temp2[0]
except IndexError:
pass
for i, t in enumerate(temp2):
ch=[" "] * number_of_factors
for j in xrange(number_of_factors):
ch[j] = str(j+1) if prev[j] != t[j] else " "
f.write("Round %02i %s\t%s\n" % (i, " ".join(t), " ".join(ch))) # Third field contains a number for each factor that had its seed changes
# compared to the one from the previous iteration
prev = t
return seeds_begin, seeds_end
# This was the old method
# def get_run_time(inputfile):
# timefilename=re.sub("\.out", ".time", inputfile)
# try:
# with open(timefilename, "r") as f:
# timedata = "".join(f.readlines())
# runtime=extract(r"Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (.+)", timedata)
# except IOError:
# runtime="unknown"
# return runtime
def get_run_time(results_output):
temp=extract(r"Whole program took (.+) seconds wall-time", results_output)
try:
s=seconds_to_hms(float(temp))
except TypeError:
s="unknown"
return s
def printmatrix(x, file=sys.stdout, headers=[], colheaders=[], format="%f", sep="\t"):
rows, cols = x.shape
printheaders = len(headers) != 0
printcolheaders = len(colheaders) != 0
assert(printheaders==False or len(headers) == rows)
if printcolheaders:
if printheaders:
file.write(sep)
for j in xrange(cols-1):
file.write("%s" % colheaders[j])
file.write(sep)
file.write("%s" % colheaders[cols-1])
file.write("\n")
for i in range(rows):
if printheaders:
file.write("%s%s" % (headers[i], sep))
file.write(format % x[i,0])
for j in xrange(1,cols):
file.write(sep)
file.write(format % x[i,j])
file.write("\n")
def writematrixfile(x, filename):
with open(filename, "w") as f:
printmatrix(x, f)
# def mycommand(s):
# p = subprocess.Popen(s, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# (output, error) = p.communicate()
# return (p.returncode, output, error)
def myrun(command):
result=os.system("%s 2>&1 > /dev/null" % command)
if result == 0:
print "SUCCESS %s" % command
else:
print "FAILURE %s" % command
def myargmax(a):
return numpy.unravel_index(numpy.argmax(a), a.shape)
def get_monomers(factors, results_output, last_iteration_output):
factor_lengths = [0]*len(factors)
factor_ics = [0]*len(factors)
factor_pwms = [0]*len(factors)
factor_flanked_pwms = [0]*len(factors)
for i, factor in enumerate(factors):
lines=find_lines(results_output, "Fixed matrix %i:" % i, 2, 4)
# with open("%s.pfm" % factor, "w") as f:
with open("monomer.%i.pfm" % i, "w") as f:
f.writelines(lines)
factor_pwms[i] = pwm = readmatrix(lines)
pwm_rc=reverse_complement_pwm(pwm)
# writematrixfile(pwm_rc, "%s-rc.pfm" % factor)
writematrixfile(pwm_rc, "monomer.%i-rc.pfm" % i)
factor_lengths[i] = pwm.shape[1]
factor_ics[i] = matrix_information_content(pwm)
if get_flanks:
lines=find_lines(last_iteration_output, "Flank fixed matrix %i:" % i, 2, 4)
with open("flank-%i.pfm" % i, "w") as f:
f.writelines(lines)
flank_pwm=readmatrix(lines)
factor_flanked_pwms[i] = flank_pwm
flank_pwm_rc=reverse_complement_pwm(flank_pwm)
writematrixfile(flank_pwm_rc, "flank-%i-rc.pfm" % i)
return factor_lengths, factor_ics, factor_pwms, factor_flanked_pwms
def get_dimer_cases(results_output, iterations, last_iteration_output):
for i, cob_factor in enumerate(cob_factors):
number_of_orientations = 1 if use_rna else 3
if cob_factor[0] != cob_factor[1]:
number_of_orientations += 1
tf1, tf2 = cob_factor
# Find the cob table
lines=find_lines(results_output, "Dimer lambdas %s:" % cob_codes[i], 1, number_of_orientations + 1)
temp = readarray(lines)
dmin[i] = int(temp[0,1])
dmax[i] = int(temp[0,-1])
cob_tables[i] = temp[1:,1:].astype(float) # Remove column and row names
#print cob_tables[i]
cob_ic_tables[i]=numpy.zeros(cob_tables[i].shape)
cob_length_tables[i]=numpy.zeros(cob_tables[i].shape) # Lengths of corresponding dimeric WPM
#print len(cob_ic_tables)
#print cob_ic_tables[i].shape
#overlap_table = cob_tables[i][:,0:-dmin[i]] # Only the overlap area of the cob table
try:
oi,di = myargmax(cob_tables[i])
empty=False
best_o = orients[oi] # Maximum lambda cob case is (o,d)
best_d = dmin[i]+di
except ValueError:
empty=True
excluded=extract(r"Cob %s excluded cases: \[(.*)\]" % cob_codes[i], results_output)
if excluded:
excluded=excluded.split(", ")
# for s in excluded:
# o2,d2 = s.split(" ")
# temp[1+orient_dict[o2],1+int(d2)-dmin[i]] = -0.0002 # Gets gray colour in heatmap
# This is for the best case in this cob table
if float(temp[1+orient_dict[best_o],1+int(best_d)-dmin[i]]) > 0.0:
os.system("ln -f -s observed.%s.%s.%i.svg best.observed.%s.svg" % (cob_codes[i], best_o, best_d, cob_codes[i]))
os.system("ln -f -s observed.%s.%s.%i-rc.svg best.observed.%s-rc.svg" % (cob_codes[i], best_o, best_d, cob_codes[i]))
# os.system("ln -f -s expected.%s.%s.%i.svg best.expected.%s.svg" % (cob_codes[i], best_o, best_d, cob_codes[i]))
# os.system("ln -f -s deviation.%s.%s.%i.svg best.deviation.%s.svg" % (cob_codes[i], best_o, best_d, cob_codes[i]))
os.system("ln -f -s three.%s.%s.%s.html best.%s.html" % (cob_codes[i], best_o, best_d, cob_codes[i]))
os.system("ln -f -s three.%s.%s.%s-rc.html best.%s-rc.html" % (cob_codes[i], best_o, best_d, cob_codes[i]))
for row in xrange(0, number_of_orientations):
for d in xrange(dmin[i], dmax[i]+1):
if float(temp[1+row,1+d-dmin[i]]) > 0.00000:
#command="get_cob_case.py %s %i %s %s %i %s" % ("-f" if get_flanks else "", iterations-1, cob_codes[i], orients[row], d, inputfile)
#myrun(command)
dimer_pwm=get_cob_case(cob_codes[i], orients[row], d, monomer_pwms[tf1], monomer_pwms[tf2], results_output, get_flanks)
ic = matrix_information_content(dimer_pwm)
cob_ic_tables[i][row, d-dmin[i]] = ic
cob_length_tables[i][row, d-dmin[i]] = dimer_pwm.shape[1]
with open("cob.%s.cob" % cob_codes[i], "w") as f:
for row in xrange(temp.shape[0]):
f.write("\t".join(temp[row,:]))
f.write("\n")
g = numpy.vectorize(lambda x,y,z : "Lambda %f
IC: %f
Length: %i" % (x,y,z))
cob_titles[i] = g(cob_tables[i], cob_ic_tables[i], cob_length_tables[i])
best_cases_titles[i]=cob_titles[i][oi, di]
def create_monomer_logos(factors, factor_lengths):
# Monomers
for i, f in enumerate(factors):
#os.system("to_logo.sh -n -t %s %s.pfm" % (f, f))
# myrun("myspacek40 -noname -paths --logo %s.pfm %s.svg" % (f, f))
# myrun("myspacek40 -noname -paths --logo %s-rc.pfm %s-rc.svg" % (f, f))
myrun("myspacek40 %s --logo monomer.%i.pfm monomer.%i.svg" % (myspacek_flags, i, i))
myrun("myspacek40 %s --logo monomer.%i-rc.pfm monomer.%i-rc.svg" % (myspacek_flags, i, i))
if get_flanks:
g="flank-%i" % i
if monomer_flanked_pwms[i].shape[1] <= max_logo_width:
myrun("myspacek40 %s -core=%i --logo %s.pfm %s.svg" % (myspacek_flags, factor_lengths[i], g, g))
myrun("myspacek40 %s -core=%i --logo %s-rc.pfm %s-rc.svg" % (myspacek_flags, factor_lengths[i], g, g))
else:
print "Could not create logo for flanked monomer %i: too wide logo" % i
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
cmap(numpy.linspace(minval, maxval, n)))
return new_cmap
def float_to_string(f):
if -0.0002 <= f <= 0.0:
return "-"
else:
return '%.0f' % (f*1000)
def make_heatmap(data, drange, title="", outputfile="", fontsize=32.0):
# plt.style.use('ggplot')
linewidth=1.0
# fontsize=32.0
labelfontsize=fontsize*0.6
tickfontsize=fontsize*0.6
# rcParams['axes.titlepad'] = 20 # Padding between the title and the plot, requires recent version of matplotlib
width=data.shape[1]
height=data.shape[0] # number of orientations
fig = plt.figure()
ax = plt.subplot(111)
for y in range(data.shape[0]):
for x in range(data.shape[1]):
# plt.text(x + 0.5, y + 0.5, '%.4f' % data[y, x],
plt.text(x, y, float_to_string(data[y, x]),
horizontalalignment='center',
verticalalignment='center',
)
#plt.colorbar(heatmap)
#cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","violet","blue"])
# white_colors = [(1, 1, 1), (1, 1, 1)]
# white_cm = matplotlib.colors.LinearSegmentedColormap.from_list("valko", white_colors, N=256)
cmap = plt.get_cmap('YlOrRd')
#m=np.max(data)
# subcmap = cmap
subcmap = truncate_colormap(cmap, 0.0, 0.8)
subcmap.set_under(color=u'white', alpha=None)
# constant_color = plt.cm.Blues(np.linspace(1, 1, 2))
# stacking the 2 arrays row-wise
# colors1 = white_cm(numpy.linspace(0, 1, 256))
# colors2 = plt.cm.Reds(np.linspace(0, 1, 256))
# colors2 = subcmap(numpy.linspace(0, 1, 256))
# combined_colors = numpy.vstack((colors1, colors2))
# combined_cmap = matplotlib.colors.LinearSegmentedColormap.from_list('colormap', combined_colors)
# combined_cmap = truncate_colormap(combined_cmap, -0.0002, 1.0)
plt.imshow(data, vmin=0.0, cmap=subcmap, interpolation='nearest', aspect='equal')
divider = make_axes_locatable(ax)
if title:
plt.title(title, fontsize=fontsize)
plt.yticks(fontsize=tickfontsize)
ax.yaxis.set_ticks(numpy.arange(0,height,1))
number_of_orientations=data.shape[0]
ax.set_yticklabels(orients[0:number_of_orientations])
plt.xticks(fontsize=tickfontsize)
ax.xaxis.set_ticks(numpy.arange(0,width,1))
ax.set_xticklabels(["%i" % i for i in drange])
plt.tick_params(axis='both', which='both', bottom=False, top=False, right=False, left=False)
# These lines will create grid in minor tick, that is, between cells
ax.set_xticks(numpy.arange(-0.5, width, 1), minor=True);
ax.set_yticks(numpy.arange(-0.5, height, 1), minor=True);
# Gridlines based on minor ticks
ax.grid(which='minor', color='black', linestyle='-', linewidth=1)
cax = divider.append_axes("right", size="5%", pad=0.05)
try:
cb=plt.colorbar(cax=cax)
tick_locator = ticker.MaxNLocator(nbins=5)
cb.locator = tick_locator
##cb.ax.yaxis.set_major_locator(matplotlib.ticker.AutoLocator())
cb.update_ticks()
temp=cax.get_yticklabels()
for i,t in enumerate(temp):
#print temp[i].get_text()
# temp[i].set_text("%.0f" % (float(temp[i].get_text())*1000)) # Multiply values in colorbar by 1000
temp[i] = "%.0f" % (float(temp[i].get_text())*1000) # Multiply values in colorbar by 1000
cax.set_yticklabels(temp, fontsize=tickfontsize)
except UnicodeEncodeError: # If labels contain unicode minus, then something went wrong and better not show colorbar
cb.remove()
pass
# cax.yaxis.set_tick_params(labelright=False) # No tick labels in colorbar
if outputfile:
plt.savefig(outputfile, format="svg", bbox_inches="tight")
else:
pass#plt.show()
def visualize_cobs(cobs, cob_codes):
for i, (cob, code) in enumerate(zip(cobs, cob_codes)):
f = "cob.%s" % code
# myrun('heatmap.R -z 8 -c -s -f "%%.3f" -t %s %s.cob' % (f, f))
data=cob_tables[i]
vfunc = numpy.vectorize(lambda x: x if x > 0.0 else -0.0002)
data=vfunc(data)
drange = range(dmin[i], dmax[i]+1)
print "Creating heatmap for %s" % cob
make_heatmap(data, drange, cob, "%s.svg" % f, fontsize=20.0)
# myrun('heatmap.R -z 12 -c -s -i -t %s %s.cob 2> /dev/null > /dev/null' % (cob, f))
# myrun("sed -i '/page/d' %s.svg" % f) # R or its pheatmap package make svg files corrupt. This fixes it.
# myrun('heatmap.R -z 8 -c -s -f "%%.3f" -t %s %s.cob' % (f, f))
###################################################################################
#
# Create distance, information content, lambda, log likelihood and parameter plots
#
###################################################################################
def myplot(data, title="", xlab="", ylab="", ymax=None, headers=[], outputfile=""):
linewidth=2.0
fontsize=32.0
labelfontsize=fontsize*0.6
tickfontsize=fontsize*0.6
fig = plt.figure()
ax = plt.subplot(111)
if title:
plt.title(title, fontsize=fontsize, fontname='sans-serif')
# if ylab=="mll":
# plt.ylabel(ylab, fontsize=labelfontsize, labelpad=20)
# else:
plt.ylabel(ylab, fontsize=labelfontsize)
plt.xlabel(xlab, fontsize=labelfontsize)
plt.xticks(fontsize=tickfontsize)
plt.yticks(fontsize=tickfontsize)
if not ymax is None:
a = ymax*0.05 # Add margin to y-axis
plt.ylim(0.0-a, ymax+a)
ax.plot(data, linewidth=linewidth)
ax.margins(x=0.05) # Add margin to x-axis
plt.grid(True)
if headers:
# Shrink current axis by 30%
box = ax.get_position()
if ylab=="mll": # Make some more room for ylabel because of longer ytick labels
ax.set_position([box.x0+0.10*box.width, box.y0, box.width * 0.7, box.height])
else:
ax.set_position([box.x0, box.y0, box.width * 0.7, box.height])
# Put a legend to the right of the current axis
plt.legend(headers, title="Models", loc='center left', bbox_to_anchor=(1, 0.5))
if outputfile:
plt.savefig(outputfile, format="svg")
else:
plt.show()
def create_graphs(full_output, factors, cobs, cob_codes):
temp=extract_list(r"Log likelihood is (.+)", full_output)
mll_header=["Log likelihood"]
mll_data=[]
with open("mll.txt", "w") as f:
f.write("%s\n" % ("\t".join(mll_header)))
for t in temp:
mll_data.append(t)
f.write("%s\n" % t)
temp=extract_list(r"Total number of parameters is (.+)", full_output)
parameters_header=["Number of parameters"]
parameters_data=[]
with open("parameters.txt", "w") as f:
f.write("%s\n" % ("\t".join(parameters_header)))
for t in temp:
parameters_data.append(t)
f.write("%s\n" % t)
temp=extract_list(r"Intermediate average information content of fixed models: \[(.+)\]", full_output)
ics_header=factors
ics_data=[]
with open("ics.txt", "w") as f:
f.write("%s\n" % ("\t".join(ics_header)))
for t2 in temp:
t = t2.split(", ")
ics_data.append(t)
f.write("%s\n" % "\t".join(t))
distances_header=factors+cobs
distances_data=[]
with open("distances.txt", "w") as f:
cols=len(factors+cobs)
temp=extract_list(r"Fixed distances are \[(.+)\]", full_output)
rows=len(temp)
a=numpy.empty((rows,cols))
for r,t in enumerate(temp):
for c,x in enumerate(t.split(", ")):
a[r,c]=float(x)
for c, cob in enumerate(cob_codes):
query=r"Max distance in deviation %s is (.+)" % cob
#print query
temp=extract_list(query, full_output)
#print temp
for r,t in enumerate(temp):
a[r,len(factors)+c] = float(t)
f.write("%s\n" % ("\t".join(distances_header)))
#printmatrix(a, sys.stdout, headers=[], colheaders=[], format="%f", sep="\t")
printmatrix(a, f, headers=[], colheaders=[], format="%f", sep="\t")
distances_data = a.tolist()
lambdas_header=factors+cobs+["bg"]
lambdas_data=[]
with open("lambdas.txt", "w") as f:
temp=extract_list(r"Intermediate fixed lambdas are \[(.+)\]", full_output)
atemp=[t.split(", ") for t in temp]
#print atemp
a=numpy.array(atemp)
btemp=[]
for cob in cob_codes:
#print "Cob is %s" % cob
temp=extract_list(r"Intermediate sum of dimer lambdas %s is (.+)" % cob, full_output)
btemp.append(temp)
# print btemp
b=numpy.transpose(numpy.array(btemp))
temp=extract_list(r"Intermediate background lambda is (.+)", full_output)
c=numpy.transpose(numpy.array([temp]))
f.write("%s\n" % ("\t".join(lambdas_header)))
if len(cob_codes) > 0:
d=numpy.concatenate((a,b,c), 1)
else:
d=numpy.concatenate((a,c), 1)
#printmatrix(d, format="%s")
printmatrix(d, f, headers=[], colheaders=[], format="%s", sep="\t")
lambdas_data=d.tolist()
myplot(mll_data, "%s log likelihood" % name, "iterations", "mll", headers=lambdas_header, outputfile="mll.svg")
myplot(parameters_data, "%s number of parameters" % name, "iterations", "params", headers=parameters_header, outputfile="parameters.svg")
myplot(ics_data, "%s information content" % name, "iterations", "IC", ymax=2.0, headers=ics_header, outputfile="ics.svg")
myplot(distances_data, "%s convergence" % name, "iterations", "distance", ymax=1.0, headers=distances_header, outputfile="distances.svg")
myplot(lambdas_data, "%s lambdas" % name, "iterations", "lambda", ymax=1.0, headers=lambdas_header, outputfile="lambdas.svg")
# myrun('plot.R -c -s -x iterations -y mll -t "%s log likelihood" mll.txt' % name)
# myrun('plot.R -c -s -x iterations -y params -t "%s number of parameters" parameters.txt' % name)
# myrun('plot.R -c -s -b 2.0 -x iterations -y IC -t "%s information content" ics.txt' % name)
# myrun('plot.R -c -s -b 1.0 -h 0.01 -x iterations -y distance -t "%s distances" distances.txt' % name)
# myrun('plot.R -c -s -b 1.0 -x iterations -y lambda -t "%s lambdas" lambdas.txt' % name)
# This is to locate helper scripts in the same directory as this file
execdir=os.path.abspath(os.path.dirname(sys.argv[0]))
#print "execdir is %s" % execdir
path=os.getenv("PATH")
os.putenv("PATH", path+":"+execdir)
usage="""Usage:
\tto_html.py tf1name,tf2name,... moderoutputfile
Parses the output created by MODER and converts it to a graphical
html page. The first parameter is a comma separated list
of factor names. The second parameter is the name of the
MODER output file.
A directory 'moderoutputfile.report' is created, and
the report will be in file 'moderoutputfile.report/index.html'
"""
try:
optlist, args = getopt.getopt(sys.argv[1:], 'hd', ["help", "debug"])
except getopt.GetoptError as e:
print e
sys.stderr.write(usage)
sys.exit(1)
optdict = dict(optlist)
args = [sys.argv[0]]+ args
debug=False
#print optdict
for o, a in optlist:
if o in ("-h", "--help"):
print usage
sys.exit(0)
elif o in ("-d", "--debug"):
debug=True
print "Debugging on"
else:
sys.stderr.write("Unknown option: %s\n" % o)
sys.stderr.write(usage)
sys.exit(1)
if len(args) == 1:
print usage
sys.exit(0)
elif len(args) != 3:
sys.stderr.write("Error, give two parameters.\n")
sys.stderr.write(usage)
sys.exit(1)
name=args[1]
orig=inputfile=args[2]
if inputfile[1:].count(".") > 0: # Contains a file extension
mydir=re.sub("\.[^.]*?$", ".report", inputfile)
else:
mydir="%s.report" % inputfile
reportfile="%s/index.html" % mydir
inputfile = "../%s" % os.path.basename(inputfile)
try:
os.mkdir(mydir)
except OSError:
pass
os.chdir(mydir)
factors=name.split(',') # For example ['FLI1a', 'FLI1b']
try:
with open(inputfile) as f:
full_output = "".join(f.readlines())
except IOError:
sys.stderr.write("Could not read file %s. Exiting!\n" % orig)
sys.exit(1)
use_rna = extract(r"Use RNA alphabet: (.*)", full_output)
if use_rna == "yes":
use_rna = True
orients = rna_orients
orient_dict = rna_orient_dict
myspacek_flags="-paths -noname -rna"
else:
use_rna = False
orients = dna_orients
orient_dict = dna_orient_dict
myspacek_flags="-paths -noname"
cob_factors=extract(r"Cob combinations are ([0-9,-]*)", full_output)
if cob_factors:
cob_factors=cob_factors.split(",")
else:
cob_factors=[]
cob_factors=[map(int, x.split("-")) for x in cob_factors] # cob_factors=[[0,0], [1,1], [0,1]]
number_of_cobs=len(cob_factors)
factor_codes=set()
for x,y in cob_factors:
factor_codes.add(x)
factor_codes.add(y)
# In the if clause below, if only one name e.g. HNF4A is given and four codes 0,1,2,3 are used in cob types, then form names
# HNF4Aa,HNF4Ab,HNF4Ac,HNF4Ad
if len(factors) == 1 and len(factor_codes) > 1:
new_factors=[factors[0]+chr(ord('a')+x) for x in xrange(max(factor_codes)+1) ]
factors = new_factors
number_of_factors=len(factors)
cobs=['-'.join([factors[x[0]], factors[x[1]]]) for x in cob_factors ] # cobs=["TEAD4-TEAD4", "ERG-ERG", "TEAD4-ERG"]
cob_codes=[ "-".join(map(str,x)) for x in cob_factors ] # cob_codes=["0-0", "1-1", "0-1"]
command="sed -n '/Results/,$p' %s" % inputfile
(ret_val, results_output, error) = mycommand(command)
assert ret_val == 0
######################################################################################################
#
# to_logos
#
cob_tables=[0]*number_of_cobs
cob_ic_tables=[0]*number_of_cobs
cob_length_tables=[0]*number_of_cobs
cob_titles=[0]*number_of_cobs
dmin=[0]*number_of_cobs
dmax=[0]*number_of_cobs
get_flanks = extract(r"Get full flanks: (.*)", full_output) == "yes"
iterations = int(extract(r"EM-algorithm took (.*) = .* iterations", results_output))
command="sed -n '/Round %i/,/^-+$/p' %s" % (iterations-1,inputfile) # Output from last iteration onwards
(ret_val, last_iteration_output, error) = mycommand(command)
monomer_lengths, monomer_ics, monomer_pwms, monomer_flanked_pwms = get_monomers(factors, results_output, last_iteration_output)
best_cases_titles=[0]*number_of_cobs
get_dimer_cases(results_output, iterations, last_iteration_output)
create_monomer_logos(factors, monomer_lengths)
visualize_cobs(cobs, cob_codes)
if debug:
create_graphs(full_output, factors, cobs, cob_codes)
# temp=extract_list(r"Background distribution \(intermed\): \[(.+)\]", full_output)
# bg=[x.split(", ") for x in temp]
# with open("background.txt", "w") as f:
# bg_t=numpy.array(bg).transpose()
# for t in bg_t:
# f.write("%s\n" % ("\t".join(t)))
# myrun('myspacek40 --logo -paths background.txt background.svg')
######################################################################################################
#
# Print html
#
#logo_files = [ s+".svg" for s in factors ] # logo_files=["TEAD4.svg", "ERG.svg"]
logo_files = [ "monomer.%i.svg" % i for i in xrange(number_of_factors) ] # logo_files=["monomer.0.svg", "monomer.1.svg"]
#cob_files = [ s+".svg" for s in cobs ] # cob_files=["TEAD4-TEAD4.svg", "ERG-ERG.svg", "TEAD4-ERG.svg"]
cob_files = [ "cob.%s.svg" % s for s in cob_codes ] # cob_files=["cob.0-0.svg", "cob.1-1.svg", "cob.0-1.svg"]
cob_links = [ "cob.%s.array.html" % s for s in cob_codes ] # ["cob.0-0.array.html", "cob.1-1.array.html", ... ]
best_cases, best_cases_links, best_cases_headers = get_best_cob_cases(cob_codes)
lambda_table = get_lambda_table(results_output)
iterations, maxiter, Lmin, Lmax, lines, epsilon, excluded, command, start_time, version, hostname, threads = get_info(results_output, full_output, cob_codes)
#Background distribution: [0.30201, 0.294127, 0.202849, 0.201013]
bg_dist = map(float, extract(r"Background distribution: \[(.*)\]", results_output).split(', '))
seeds_begin, seeds_end = get_seeds(full_output, number_of_factors)
def get_monomer_modularity(full_output):
temp=extract_list(r"Is monomer pwm learnt purely modularly: \[(.+)\]", full_output)
temp2=[x.split(", ") for x in temp]
return temp2[-1]
monomer_modularity = get_monomer_modularity(full_output)
runtime = get_run_time(results_output)
f = open('index.html', 'w')
f.write("<html>\n")
f.write("<head>\n")
javascript='''
<script type="text/javascript">
function myclick(e, ending)
{
t=e.target;
t.style.borderColor="red";
logo_container = t.parentNode.parentNode;
image_node = logo_container.getElementsByClassName("image")[0];
anchor_node = logo_container.getElementsByTagName("a")[0];
if (t.className=="normal") {
var other=logo_container.getElementsByClassName("rc")[0];
image_node.src = image_node.src.replace("-rc.svg", ".svg");
anchor_node.href= anchor_node.href.replace("-rc".concat(ending), ending);
} else {
var other=logo_container.getElementsByClassName("normal")[0];
if (! image_node.src.endsWith("-rc.svg")) {
image_node.src= image_node.src.replace(".svg", "-rc.svg");
anchor_node.href= anchor_node.href.replace(ending, "-rc".concat(ending));
}
}
other.style.borderColor="white";
}
</script>
'''
f.write(javascript)
f.write('<link rel="stylesheet" href="style.css" type="text/css" />\n')
f.write("<title>%s - %s</title>\n" % (name, re.sub("^../", "", inputfile)))
f.write("</head>\n")
f.write("<body>\n")
f.write("<h1>MODER - MOtif DEtectoR</h1>\n")
f.write("<h2>%s - %s</h2>\n" % (name, re.sub("^../", "", inputfile)))
###################
#
# Print the infobox
f.write('<div id="info">\n')
f.write('<div style="float: left; padding: 20px;">\n')
f.write("<ul>\n")
#print """<li>Result file: <a href="%s" onclick="window.open('%s', 'newwindow', 'width=300, height=250'); return false;">%s</a></li>""" % (inputfile, inputfile, inputfile)
f.write("""<li>Result file: <a href="%s">%s</a></li>""" % (inputfile, re.sub("^../", "", inputfile)))
if Lmin == Lmax:
L="%s" % Lmin
else:
L="%s-%s" % (Lmin, Lmax)
f.write("<li>Data contains %i sequences of length %s </li>" % (lines, L))
f.write("<li>Running time was (wall-clock) %s</li>" % runtime)
if iterations == maxiter:
f.write("<li>EM-algorithm took <span style='color: red;'>%i iterations</span> (max-iter=%i)</li>" % (iterations, maxiter))
else:
f.write("<li>EM-algorithm took %i iterations (max-iter=%i)</li>" % (iterations, maxiter))
f.write("<li>Convergence criterion cutoff is %g</li>" % epsilon)
f.write("<li>Excluded cob cases: %i</li>" % excluded)
f.write("<li>Are monomers learnt modularly: %s</li>" % " ".join(monomer_modularity))
f.write('<li>Initial and final <a href="seeds.txt">consensus sequences</a> of lengths %s:</li>' % (" ".join(map(lambda x : str(len(x)), seeds_begin))))
f.write("<ul>")
f.write('<li style="font-family: monospace;">%s</s>' % (" ".join(seeds_begin)))
f.write('<li style="font-family: monospace;">%s</s>' % (" ".join(seeds_end)))
f.write("</ul>")
f.write("<li>Bg: %s</li>" % (" ".join(["%.2f" % x for x in bg_dist])))
f.write("</ul>")
f.write("</div>\n")
f.write('<div id="lambdaTable" style="float: left; padding: 20px;">\n')
make_table(lambda_table, ["Model", "Lambda"], f)
f.write("</div>\n")
f.write("</div>\n")
###################
#
# Print the factors
monomer_lambdas=[ y for x,y in lambda_table][0:number_of_factors]
# These are for the title attribute of the images
monomer_titles=[ "Lambda: %f
IC: %.2f
Length: %i" % (l,i, length) for l, i, length in zip(monomer_lambdas, monomer_ics, monomer_lengths)]
f.write('<div id="factors">')
f.write('<h2>Monomer motifs</h2>')
make_table_h(f, logo_files, factors, monomer_titles)
f.write("</div>")
###################
#
# Print the cob tables and the best case from each cob table
if number_of_cobs > 0:
f.write('<div id="cobs">')
f.write('<h2>COB tables</h2>')
make_table_v2(f, cob_files, [""]*number_of_cobs, cob_links)
f.write('<h2>Strongest dimeric case from each cob table</h2>')
make_table_v3(f, best_cases, best_cases_headers, best_cases_links, ".html", best_cases_titles)
f.write("</div>")
###############################
#
# Print the factors with flanks
if get_flanks:
f.write('<div id="flankfactors">')
f.write('<h2>Monomer motifs with flanks</h2>')
flank_logo_files=["flank-%i.svg" % i for i in xrange(number_of_factors)]
make_table_v3(f, flank_logo_files, factors, [x.replace(".svg", ".pfm") for x in flank_logo_files], ".pfm", monomer_titles)
f.write("</div>")
###################
#
# Print behaviour as function of iterations
if debug:
f.write('<div id="iterations">')
#(files, headers=[], links=[], f, titles=[])
make_table_v(["distances.svg", "ics.svg", "lambdas.svg", "mll.svg", "parameters.svg"], f=f)
f.write("</div>")
#print '<div id="background">'
#print '<p><em>Note.</em> Background model is a multinomial distribution for mononucleotides. In the below logo each position gives the background distribution of the corresponding EM-iteration.</p>'
#print '<img src="background.svg"\>'
#print '</div>'
citation="""Jarkko Toivonen, Teemu Kivioja, Arttu Jolma, Yimeng Yin, Jussi Taipale, Esko Ukkonen (2018) Modular discovery of monomeric and dimeric
transcription factor binding motifs for large data sets, <i>Nucleic Acids Research</i>,
Volume 46, Issue 8, 4 May 2018, Pages e44."""
bibtex=""
moder_doi="https://doi.org/10.1093/nar/gky027"
f.write('<div id="programinfo">\n')
f.write("<ul>\n")
f.write("<li>Command line was: %s</li>\n" % command)
f.write("<li>Program started at: %s</li>\n" % start_time)
f.write("<li>MODER version: %s</li>\n" % version)
f.write("<li>MODER was run on host: %s</li>\n" % hostname)
f.write("<li>Number of simultaneous threads: %s</li>\n" % threads)
f.write("<li>MODER is available from <a href='https://github.com/jttoivon/MODER'>GitHub</a></li>")
f.write("<li>If you use MODER in your research, please cite: %s <a href='%s'>Link to article.</a>\n</li>" % (citation,moder_doi))
f.write("</ul>\n")
f.write("</div>")
f.write("</body>")
f.write("</html>")
##################################################################################################################
#
# Print the cob.x-y.array.html files that contain the observed, expected and deviation logos for all dimeric cases
#
# dmin=[0]*number_of_cobs
# dmax=[0]*number_of_cobs
# cob_tables=[0]*number_of_cobs
for i, cob_factor in enumerate(cob_factors):
number_of_orientations = 1 if use_rna else 3
if cob_factor[0] != cob_factor[1]:
number_of_orientations += 1
# Find the cob table
# lines=common.find_lines(data, "Dimer lambdas %s:" % cob_codes[i], 1, number_of_orientations + 1)
# temp = common.readarray(lines)
# dmin[i] = int(temp[0,1])
# dmax[i] = int(temp[0,-1])
# cob_tables[i] = temp[1:,1:].astype(float) # Remove column and row names
# g = numpy.vectorize(lambda x : "Lambda %f" % x)
# cob_titles[i] = g(cob_tables[i])
link_table = []
link_expected_table = []
link_deviation_table = []
link_flank_table = []
for row in xrange(0, number_of_orientations):
temp_list=[]
temp_list2=[]
temp_list3=[]
temp_list4=[]
for d in xrange(dmin[i], dmax[i]+1):
if cob_tables[i][row,d-dmin[i]] > 0.00000:
title=cob_titles[i][row, d-dmin[i]]
ending="%s.%s.%i" % (cob_codes[i], orients[row], d)
html="three.%s.html" % ending
# temp_list.append('<a href="three.%s.html"><img src="observed.%s.svg"\></a>' % (ending, ending))
temp_list.append(logo_container(html, "observed.%s.svg" % ending, ".html", title))
temp_list2.append(logo_container(html, "expected.%s.svg" % ending, ".html", title))
temp_list3.append(logo_container(html, "deviation.%s.svg" % ending, ".html", title))
temp_list4.append(logo_container(html, "flank.%s.svg" % ending, ".html", title))
else:
temp_list.append('<p>-</p>')
temp_list2.append('<p>-</p>')
temp_list3.append('<p>-</p>')
temp_list4.append('<p>-</p>')
link_table.append(temp_list)
link_expected_table.append(temp_list2)
link_deviation_table.append(temp_list3)
link_flank_table.append(temp_list4)
with open("cob.%s.array.html" % cob_codes[i], "w") as f:
f.write("<title>%s PPMs - %s</title>\n" % (name, inputfile))
f.write('<link rel="stylesheet" href="style.css" type="text/css" />\n')
f.write(javascript)
f.write('<a href="cob.%s.cob" type="text/plain"><img src="cob.%s.svg"\></a>\n' % (cob_codes[i], cob_codes[i]))
# Display the logos involved in the cob table
f.write('<div id="factors">')
index_set=set(cob_factor) # If both indices are the same, then return just one
make_table_h(f, [logo_files[i] for i2 in index_set], [factors[i] for i3 in index_set], [monomer_titles[i] for i4 in index_set])
f.write("</div>")
f.write('<h3 class="tableheading">Observed Matrices</h3>\n')
make_better_table(f, link_table, [""]+range(dmin[i], dmax[i]+1), orients, htmlclass="ppmtable")
f.write('<h3 class="tableheading">Expected Matrices</h3>\n')
make_better_table(f, link_expected_table, [""]+range(dmin[i], dmax[i]+1), orients, htmlclass="ppmtable")
f.write('<h3 class="tableheading">Deviation Matrices</h3>\n')
make_better_table(f, link_deviation_table, [""]+range(dmin[i], dmax[i]+1), orients, htmlclass="ppmtable")
if get_flanks:
for j in xrange(len(factors)):
f.write('<h3 class="tableheading">Flanked Monomer Matrix %i</h3>\n' % j)
f.write(logo_container("flank-%i.pfm" % j, "flank-%i.svg" % j, ".pfm", monomer_titles[j]))
#f.write('<img src="flank-%i.svg"\>\n' % j)
f.write('<h3 class="tableheading">Flanked Dimer Matrices</h3>\n')
make_better_table(f, link_flank_table, [""]+range(dmin[i], dmax[i]+1), orients, htmlclass="ppmtable")
#myrun("cp %s/style.css ." % execdir)
with open("monomer_weights.txt", "w") as f:
f.write("%s\n" % (",".join(factors)))
f.write("%s\n" % (",".join(map(str, monomer_lambdas))))
with open("style.css", "w") as f:
f.write(css)
print "The report is in file %s" % reportfile