https://github.com/JuliaFlynn/PacBio_barcode_assocation
Tip revision: 29eac92475a9ff8e24fb390986c865b504c03f51 authored by JuliaFlynn on 26 May 2022, 22:22:57 UTC
Update README.md
Update README.md
Tip revision: 29eac92
pacbio7b.pl
#!/usr/bin/perl
#
# inputs 01_bc_ORF_association file (ordered alphabetically by bc seq)
# outputs bc_ORF organized relative to mutations to parental sequence
# updated to organize by amino acid then codon
use POSIX ;
use List::Util qw(first) ;
sub aatoi {
my($iaa,$aa) ;
$aa = uc($_[0]) ;
$iaa = 99 ;
if ($aa eq "A") {$iaa = 0} ;
if ($aa eq "C") {$iaa = 1} ;
if ($aa eq "D") {$iaa = 2} ;
if ($aa eq "E") {$iaa = 3} ;
if ($aa eq "F") {$iaa = 4} ;
if ($aa eq "G") {$iaa = 5} ;
if ($aa eq "H") {$iaa = 6} ;
if ($aa eq "I") {$iaa = 7} ;
if ($aa eq "K") {$iaa = 8} ;
if ($aa eq "L") {$iaa = 9} ;
if ($aa eq "M") {$iaa = 10} ;
if ($aa eq "N") {$iaa = 11} ;
if ($aa eq "P") {$iaa = 12} ;
if ($aa eq "Q") {$iaa = 13} ;
if ($aa eq "R") {$iaa = 14} ;
if ($aa eq "S") {$iaa = 15} ;
if ($aa eq "T") {$iaa = 16} ;
if ($aa eq "V") {$iaa = 17} ;
if ($aa eq "W") {$iaa = 18} ;
if ($aa eq "Y") {$iaa = 19} ;
if ($aa eq "*") {$iaa = 20} ;
$iaa ;
}
sub itoaa {
my($i,$aa) ;
$i = $_[0] ;
$aa = "Z" ;
if ($i == 0) {$aa = "A"} ;
if ($i == 1) {$aa = "C"} ;
if ($i == 2) {$aa = "D"} ;
if ($i == 3) {$aa = "E"} ;
if ($i == 4) {$aa = "F"} ;
if ($i == 5) {$aa = "G"} ;
if ($i == 6) {$aa = "H"} ;
if ($i == 7) {$aa = "I"} ;
if ($i == 8) {$aa = "K"} ;
if ($i == 9) {$aa = "L"} ;
if ($i == 10) {$aa = "M"} ;
if ($i == 11) {$aa = "N"} ;
if ($i == 12) {$aa = "P"} ;
if ($i == 13) {$aa = "Q"} ;
if ($i == 14) {$aa = "R"} ;
if ($i == 15) {$aa = "S"} ;
if ($i == 16) {$aa = "T"} ;
if ($i == 17) {$aa = "V"} ;
if ($i == 18) {$aa = "W"} ;
if ($i == 19) {$aa = "Y"} ;
if ($i == 20) {$aa = "*"} ;
$aa ;
}
sub itocodon {
my($i,$aa,$codon) ;
$i = $_[0] ;
if ($i == 1) {$codon = "aaa"}
if ($i == 2) {$codon = "aac"}
if ($i == 3) {$codon = "aag"}
if ($i == 4) {$codon = "aat"}
if ($i == 5) {$codon = "aca"}
if ($i == 6) {$codon = "acc"}
if ($i == 7) {$codon = "acg"}
if ($i == 8) {$codon = "act"}
if ($i == 9) {$codon = "aga"}
if ($i == 10) {$codon = "agc"}
if ($i == 11) {$codon = "agg"}
if ($i == 12) {$codon = "agt"}
if ($i == 13) {$codon = "ata"}
if ($i == 14) {$codon = "atc"}
if ($i == 15) {$codon = "atg"}
if ($i == 16) {$codon = "att"}
if ($i == 17) {$codon = "caa"}
if ($i == 18) {$codon = "cac"}
if ($i == 19) {$codon = "cag"}
if ($i == 20) {$codon = "cat"}
if ($i == 21) {$codon = "cca"}
if ($i == 22) {$codon = "ccc"}
if ($i == 23) {$codon = "ccg"}
if ($i == 24) {$codon = "cct"}
if ($i == 25) {$codon = "cga"}
if ($i == 26) {$codon = "cgc"}
if ($i == 27) {$codon = "cgg"}
if ($i == 28) {$codon = "cgt"}
if ($i == 29) {$codon = "cta"}
if ($i == 30) {$codon = "ctc"}
if ($i == 31) {$codon = "ctg"}
if ($i == 32) {$codon = "ctt"}
if ($i == 33) {$codon = "gaa"}
if ($i == 34) {$codon = "gac"}
if ($i == 35) {$codon = "gag"}
if ($i == 36) {$codon = "gat"}
if ($i == 37) {$codon = "gca"}
if ($i == 38) {$codon = "gcc"}
if ($i == 39) {$codon = "gcg"}
if ($i == 40) {$codon = "gct"}
if ($i == 41) {$codon = "gga"}
if ($i == 42) {$codon = "ggc"}
if ($i == 43) {$codon = "ggg"}
if ($i == 44) {$codon = "ggt"}
if ($i == 45) {$codon = "gta"}
if ($i == 46) {$codon = "gtc"}
if ($i == 47) {$codon = "gtg"}
if ($i == 48) {$codon = "gtt"}
if ($i == 49) {$codon = "taa"}
if ($i == 50) {$codon = "tac"}
if ($i == 51) {$codon = "tag"}
if ($i == 52) {$codon = "tat"}
if ($i == 53) {$codon = "tca"}
if ($i == 54) {$codon = "tcc"}
if ($i == 55) {$codon = "tcg"}
if ($i == 56) {$codon = "tct"}
if ($i == 57) {$codon = "tga"}
if ($i == 58) {$codon = "tgc"}
if ($i == 59) {$codon = "tgg"}
if ($i == 60) {$codon = "tgt"}
if ($i == 61) {$codon = "tta"}
if ($i == 62) {$codon = "ttc"}
if ($i == 63) {$codon = "ttg"}
if ($i == 64) {$codon = "ttt"}
$codon ;
}
sub codontoi {
my($q,$aa,$codon) ;
$codon = lc($_[0]) ;
$codon = substr($codon,0,3) ;
if ($codon eq "aaa") {$aa = 1} ;
if ($codon eq "aac") {$aa = 2} ;
if ($codon eq "aag") {$aa = 3} ;
if ($codon eq "aat") {$aa = 4} ;
if ($codon eq "aca") {$aa = 5} ;
if ($codon eq "acc") {$aa = 6} ;
if ($codon eq "acg") {$aa = 7} ;
if ($codon eq "act") {$aa = 8} ;
if ($codon eq "aga") {$aa = 9} ;
if ($codon eq "agc") {$aa = 10} ;
if ($codon eq "agg") {$aa = 11} ;
if ($codon eq "agt") {$aa = 12} ;
if ($codon eq "ata") {$aa = 13} ;
if ($codon eq "atc") {$aa = 14} ;
if ($codon eq "atg") {$aa = 15} ;
if ($codon eq "att") {$aa = 16} ;
if ($codon eq "caa") {$aa = 17} ;
if ($codon eq "cac") {$aa = 18} ;
if ($codon eq "cag") {$aa = 19} ;
if ($codon eq "cat") {$aa = 20} ;
if ($codon eq "cca") {$aa = 21} ;
if ($codon eq "ccc") {$aa = 22} ;
if ($codon eq "ccg") {$aa = 23} ;
if ($codon eq "cct") {$aa = 24} ;
if ($codon eq "cga") {$aa = 25} ;
if ($codon eq "cgc") {$aa = 26} ;
if ($codon eq "cgg") {$aa = 27} ;
if ($codon eq "cgt") {$aa = 28} ;
if ($codon eq "cta") {$aa = 29} ;
if ($codon eq "ctc") {$aa = 30} ;
if ($codon eq "ctg") {$aa = 31} ;
if ($codon eq "ctt") {$aa = 32} ;
if ($codon eq "gaa") {$aa = 33} ;
if ($codon eq "gac") {$aa = 34} ;
if ($codon eq "gag") {$aa = 35} ;
if ($codon eq "gat") {$aa = 36} ;
if ($codon eq "gca") {$aa = 37} ;
if ($codon eq "gcc") {$aa = 38} ;
if ($codon eq "gcg") {$aa = 39} ;
if ($codon eq "gct") {$aa = 40} ;
if ($codon eq "gga") {$aa = 41} ;
if ($codon eq "ggc") {$aa = 42} ;
if ($codon eq "ggg") {$aa = 43} ;
if ($codon eq "ggt") {$aa = 44} ;
if ($codon eq "gta") {$aa = 45} ;
if ($codon eq "gtc") {$aa = 46} ;
if ($codon eq "gtg") {$aa = 47} ;
if ($codon eq "gtt") {$aa = 48} ;
if ($codon eq "taa") {$aa = 49} ;
if ($codon eq "tac") {$aa = 50} ;
if ($codon eq "tag") {$aa = 51} ;
if ($codon eq "tat") {$aa = 52} ;
if ($codon eq "tca") {$aa = 53} ;
if ($codon eq "tcc") {$aa = 54} ;
if ($codon eq "tcg") {$aa = 55} ;
if ($codon eq "tct") {$aa = 56} ;
if ($codon eq "tga") {$aa = 57} ;
if ($codon eq "tgc") {$aa = 58} ;
if ($codon eq "tgg") {$aa = 59} ;
if ($codon eq "tgt") {$aa = 60} ;
if ($codon eq "tta") {$aa = 61} ;
if ($codon eq "ttc") {$aa = 62} ;
if ($codon eq "ttg") {$aa = 63} ;
if ($codon eq "ttt") {$aa = 64} ;
$aa ;
}
sub codontoaa {
my($q,$aa,$codon) ;
$codon = lc($_[0]) ;
$codon = substr($codon,0,3) ;
if ($codon eq "aaa") {$aa = "K"} ;
if ($codon eq "aac") {$aa = "N"} ;
if ($codon eq "aag") {$aa = "K"} ;
if ($codon eq "aat") {$aa = "N"} ;
if ($codon eq "aca") {$aa = "T"} ;
if ($codon eq "acc") {$aa = "T"} ;
if ($codon eq "acg") {$aa = "T"} ;
if ($codon eq "act") {$aa = "T"} ;
if ($codon eq "aga") {$aa = "R"} ;
if ($codon eq "agc") {$aa = "S"} ;
if ($codon eq "agg") {$aa = "R"} ;
if ($codon eq "agt") {$aa = "S"} ;
if ($codon eq "ata") {$aa = "I"} ;
if ($codon eq "atc") {$aa = "I"} ;
if ($codon eq "atg") {$aa = "M"} ;
if ($codon eq "att") {$aa = "I"} ;
if ($codon eq "caa") {$aa = "Q"} ;
if ($codon eq "cac") {$aa = "H"} ;
if ($codon eq "cag") {$aa = "Q"} ;
if ($codon eq "cat") {$aa = "H"} ;
if ($codon eq "cca") {$aa = "P"} ;
if ($codon eq "ccc") {$aa = "P"} ;
if ($codon eq "ccg") {$aa = "P"} ;
if ($codon eq "cct") {$aa = "P"} ;
if ($codon eq "cga") {$aa = "R"} ;
if ($codon eq "cgc") {$aa = "R"} ;
if ($codon eq "cgg") {$aa = "R"} ;
if ($codon eq "cgt") {$aa = "R"} ;
if ($codon eq "cta") {$aa = "L"} ;
if ($codon eq "ctc") {$aa = "L"} ;
if ($codon eq "ctg") {$aa = "L"} ;
if ($codon eq "ctt") {$aa = "L"} ;
if ($codon eq "gaa") {$aa = "E"} ;
if ($codon eq "gac") {$aa = "D"} ;
if ($codon eq "gag") {$aa = "E"} ;
if ($codon eq "gat") {$aa = "D"} ;
if ($codon eq "gca") {$aa = "A"} ;
if ($codon eq "gcc") {$aa = "A"} ;
if ($codon eq "gcg") {$aa = "A"} ;
if ($codon eq "gct") {$aa = "A"} ;
if ($codon eq "gga") {$aa = "G"} ;
if ($codon eq "ggc") {$aa = "G"} ;
if ($codon eq "ggg") {$aa = "G"} ;
if ($codon eq "ggt") {$aa = "G"} ;
if ($codon eq "gta") {$aa = "V"} ;
if ($codon eq "gtc") {$aa = "V"} ;
if ($codon eq "gtg") {$aa = "V"} ;
if ($codon eq "gtt") {$aa = "V"} ;
if ($codon eq "taa") {$aa = "*"} ;
if ($codon eq "tac") {$aa = "Y"} ;
if ($codon eq "tag") {$aa = "*"} ;
if ($codon eq "tat") {$aa = "Y"} ;
if ($codon eq "tca") {$aa = "S"} ;
if ($codon eq "tcc") {$aa = "S"} ;
if ($codon eq "tcg") {$aa = "S"} ;
if ($codon eq "tct") {$aa = "S"} ;
if ($codon eq "tga") {$aa = "*"} ;
if ($codon eq "tgc") {$aa = "C"} ;
if ($codon eq "tgg") {$aa = "W"} ;
if ($codon eq "tgt") {$aa = "C"} ;
if ($codon eq "tta") {$aa = "L"} ;
if ($codon eq "ttc") {$aa = "F"} ;
if ($codon eq "ttg") {$aa = "L"} ;
if ($codon eq "ttt") {$aa = "F"} ;
$aa ;
}
if ($#ARGV !=1) {
print "usage: script.pl input_file output_file \n" ;
exit ;
}
$infile = $ARGV[0] ;
$outfile = $ARGV[1] ;
# NEED TO EDI THID FOR DIFFERENT GENES
$npositions = 241 ;
#zero arrays for storing bc info
for ($i=0; $i<=$npositions; $i++) {
for ($j=0; $j<=64; $j++) {
$bc_list[$i][$j] = "" ;
}
}
$nflaggedbc = 0 ;
open(BCF, $infile) ;
while ($line = <BCF>) {
chomp($line) ;
@spline = split (/,/, $line) ;
$bc = $spline[0] ;
$nmut = $spline[2] ;
$aa_desc = $spline[3] ;
$cod_desc = $spline[4] ;
if ($nmut == 0) {
# wildtype sequence store for output
$bc_list[0][0] = $bc_list[0][0] . "," . $bc ;
} elsif ($nmut == 1) {
# one codon differs from parental sequence, store for output
$aa_position = $cod_desc ;
$aa_position =~ s/[^0-9]//g ;
print "aapos $aa_position\n" ;
$l = length($cod_desc) ;
$mutcod = substr($cod_desc,$l-3,3) ;
$jmut = &codontoi($mutcod) ;
$imut = $aa_position ;
$bc_list[$imut][$jmut] = $bc_list[$imut][$jmut] . "," . $bc ;
}
}
close(BCF) ;
open(OUTF, ">$outfile") ;
print OUTF "Position, aa, codon, bc_list\n" ;
print OUTF "WILDTYPE, WILDTYPE, WILDTYPE" ;
print OUTF "$bc_list[0][0]\n" ;
for ($i=1; $i<=$npositions; $i++) {
@aa_list = qw(* W Y F L M I V C A G P S T N Q H R K D E ) ;
for $aa (@aa_list) {
for ($j=1; $j<=64; $j++) {
$cur_cod = &itocodon($j) ;
$cur_cod = uc($cur_cod) ;
$cur_aa = &codontoaa($cur_cod) ;
if ($cur_aa eq $aa) {
print OUTF "$i\,$cur_aa\,$cur_cod" ;
print OUTF "$bc_list[$i][$j]\n" ;
}
}
}
}
close(OUTF) ;