https://github.com/fjruizruano/ngs-protocols
Raw File
Tip revision: 39a091d1fa569a7fc717ac73c4b3de07f0a1204d authored by fjruizruano on 03 August 2023, 11:48:27 UTC
adding gfa2fas.py and extract_gfa.py
Tip revision: 39a091d
sequence_ref_alt.py
#!/usr/bin/python

import sys
from Bio import SeqIO

print "Usage: sequence_ref_alt.py FastaFile RefAltFile"

try:
    fas = sys.argv[1]
except:
    fas = raw_input("Introduce FASTA file: ")

try:
    refalt = sys.argv[2]
except:
    refalt = raw_input("Introduce Ref-Alt File")

eq = {0:"N",1:"N",2:"A",3:"C",4:"T",5:"G"}

sequences = SeqIO.parse(open(fas), "fasta")

ra_data = open(refalt).readlines()

ra_dict = {}

for line in ra_data:
    info = line.split()
    name = info[0]
    posi = int(info[1])-1
    ref = int(info[2])
    ref = eq[ref]
    alt = int(info[3])
    alt = eq[alt]
    if name in ra_dict:
        ra_dict[name][posi] = [ref,alt]
    else:
        ra_dict[name] = {posi:[ref,alt]}

w = open("sequences_ref_alt.fasta", "w")

for s in sequences:
    name = str(s.id)
    seq_ref = list(str(s.seq))
    seq_alt = list(str(s.seq))
    if name in ra_dict:
        for el in ra_dict[name]:
            nucs = ra_dict[name][el]
            seq_ref[el] = nucs[0]
            seq_alt[el] = nucs[1]
    w.write(">%s_ref\n%s\n" % (name,"".join(seq_ref)))
    w.write(">%s_alt\n%s\n" % (name,"".join(seq_alt)))


w.close()
back to top