https://github.com/fjruizruano/ngs-protocols
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
rexp_select_contigs.py
#!/usr/bin/python
import re
import operator
from Bio import SeqIO
data = SeqIO.parse("out.txt", "fasta")
id_dict = {}
for s in data:
info = re.split("CL|Contig|\055| ", s.description)
CL = info[1]
contig = info[2]
cov = float(info[4])
if CL in id_dict:
id_dict[CL][contig] = cov
else:
id_dict[CL] = {contig : cov}
sel = {}
for el in id_dict:
info = id_dict[el]
info_s = sorted(info.items(), key=operator.itemgetter(1))
info_s.reverse()
sel[int(el)] = []
i = 0
for x in info_s:
i += x[1]
ii = 1.0*i/3
j = 0
for x in info_s:
if j < ii:
sel[int(el)].append(x[0])
j += x[1]
sel_s = sorted(sel.items(), key=operator.itemgetter(0))
w = open("out.list","w")
for l in sel_s:
for con in l[1]:
w.write("CL%sContig%s\n" % (str(l[0]),con))
w.close()