https://github.com/JuliaFlynn/PacBio_barcode_assocation
Tip revision: 29eac92475a9ff8e24fb390986c865b504c03f51 authored by JuliaFlynn on 26 May 2022, 22:22:57 UTC
Update README.md
Update README.md
Tip revision: 29eac92
pacbio6b.pl
#!/usr/bin/perl
#
sub codontoaa {
my($q,$aa,$codon) ;
$codon = lc($_[0]) ;
$codon = substr($codon,0,3) ;
if ($codon eq "aaa") {$aa = "K"} ;
if ($codon eq "aac") {$aa = "N"} ;
if ($codon eq "aag") {$aa = "K"} ;
if ($codon eq "aat") {$aa = "N"} ;
if ($codon eq "aca") {$aa = "T"} ;
if ($codon eq "acc") {$aa = "T"} ;
if ($codon eq "acg") {$aa = "T"} ;
if ($codon eq "act") {$aa = "T"} ;
if ($codon eq "aga") {$aa = "R"} ;
if ($codon eq "agc") {$aa = "S"} ;
if ($codon eq "agg") {$aa = "R"} ;
if ($codon eq "agt") {$aa = "S"} ;
if ($codon eq "ata") {$aa = "I"} ;
if ($codon eq "atc") {$aa = "I"} ;
if ($codon eq "atg") {$aa = "M"} ;
if ($codon eq "att") {$aa = "I"} ;
if ($codon eq "caa") {$aa = "Q"} ;
if ($codon eq "cac") {$aa = "H"} ;
if ($codon eq "cag") {$aa = "Q"} ;
if ($codon eq "cat") {$aa = "H"} ;
if ($codon eq "cca") {$aa = "P"} ;
if ($codon eq "ccc") {$aa = "P"} ;
if ($codon eq "ccg") {$aa = "P"} ;
if ($codon eq "cct") {$aa = "P"} ;
if ($codon eq "cga") {$aa = "R"} ;
if ($codon eq "cgc") {$aa = "R"} ;
if ($codon eq "cgg") {$aa = "R"} ;
if ($codon eq "cgt") {$aa = "R"} ;
if ($codon eq "cta") {$aa = "L"} ;
if ($codon eq "ctc") {$aa = "L"} ;
if ($codon eq "ctg") {$aa = "L"} ;
if ($codon eq "ctt") {$aa = "L"} ;
if ($codon eq "gaa") {$aa = "E"} ;
if ($codon eq "gac") {$aa = "D"} ;
if ($codon eq "gag") {$aa = "E"} ;
if ($codon eq "gat") {$aa = "D"} ;
if ($codon eq "gca") {$aa = "A"} ;
if ($codon eq "gcc") {$aa = "A"} ;
if ($codon eq "gcg") {$aa = "A"} ;
if ($codon eq "gct") {$aa = "A"} ;
if ($codon eq "gga") {$aa = "G"} ;
if ($codon eq "ggc") {$aa = "G"} ;
if ($codon eq "ggg") {$aa = "G"} ;
if ($codon eq "ggt") {$aa = "G"} ;
if ($codon eq "gta") {$aa = "V"} ;
if ($codon eq "gtc") {$aa = "V"} ;
if ($codon eq "gtg") {$aa = "V"} ;
if ($codon eq "gtt") {$aa = "V"} ;
if ($codon eq "taa") {$aa = "*"} ;
if ($codon eq "tac") {$aa = "Y"} ;
if ($codon eq "tag") {$aa = "*"} ;
if ($codon eq "tat") {$aa = "Y"} ;
if ($codon eq "tca") {$aa = "S"} ;
if ($codon eq "tcc") {$aa = "S"} ;
if ($codon eq "tcg") {$aa = "S"} ;
if ($codon eq "tct") {$aa = "S"} ;
if ($codon eq "tga") {$aa = "*"} ;
if ($codon eq "tgc") {$aa = "C"} ;
if ($codon eq "tgg") {$aa = "W"} ;
if ($codon eq "tgt") {$aa = "C"} ;
if ($codon eq "tta") {$aa = "L"} ;
if ($codon eq "ttc") {$aa = "F"} ;
if ($codon eq "ttg") {$aa = "L"} ;
if ($codon eq "ttt") {$aa = "F"} ;
$aa ;
}
sub itoaa {
my($i,$aa);
$i = $_[0];
$aa="X";
if ($i == 0) {$aa="*"} ;
if ($i == 1) {$aa="W"} ;
if ($i == 2) {$aa="Y"} ;
if ($i == 3) {$aa="F"} ;
if ($i == 4) {$aa="L"} ;
if ($i == 5) {$aa="I"} ;
if ($i == 6) {$aa="V"} ;
if ($i == 7) {$aa="M"} ;
if ($i == 8) {$aa="C"} ;
if ($i == 9) {$aa="A"} ;
if ($i == 10) {$aa="G"} ;
if ($i == 11) {$aa="P"} ;
if ($i == 12) {$aa="S"} ;
if ($i == 13) {$aa="T"} ;
if ($i == 14) {$aa="N"} ;
if ($i == 15) {$aa="Q"} ;
if ($i == 16) {$aa="D"} ;
if ($i == 17) {$aa="E"} ;
if ($i == 18) {$aa="H"} ;
if ($i == 19) {$aa="K"} ;
if ($i == 20) {$aa="R"} ;
$aa ;
}
sub aatoi {
my($i,$aa);
$aa = $_[0];
$i=-1;
if ($aa eq "*") {$i=0} ;
if ($aa eq "W") {$i=1} ;
if ($aa eq "Y") {$i=2} ;
if ($aa eq "F") {$i=3} ;
if ($aa eq "L") {$i=4} ;
if ($aa eq "I") {$i=5} ;
if ($aa eq "V") {$i=6} ;
if ($aa eq "M") {$i=7} ;
if ($aa eq "C") {$i=8} ;
if ($aa eq "A") {$i=9} ;
if ($aa eq "G") {$i=10} ;
if ($aa eq "P") {$i=11} ;
if ($aa eq "S") {$i=12} ;
if ($aa eq "T") {$i=13} ;
if ($aa eq "N") {$i=14} ;
if ($aa eq "Q") {$i=15} ;
if ($aa eq "D") {$i=16} ;
if ($aa eq "E") {$i=17} ;
if ($aa eq "H") {$i=18} ;
if ($aa eq "K") {$i=19} ;
if ($aa eq "R") {$i=20} ;
$i ;
}
if ($#ARGV!=1) {
print "usage: script.pl infile outfile\n";
exit;
}
$infile = $ARGV[0];
$outfile = $ARGV[1];
$wtdna = "ATGGCTAGTGAAACTTTTGAATTTCAAGCTGAAATTACTCAGTTGATGAGTTTGATCATCAACACCGTCTATTCTAACAAGGAAATTTTCTTGAGAGAACTGATATCTAATGCCTCGGATGCGCTAGATAAAATTAGATACAAATCTTTGTCTGATCCAAAGCAATTGGAAACAGAACCAGATCTCTTTATTAGAATCACTCCAAAGCCAGAGCAAAAAGTTTTGGAAATCAGAGATTCTGGTATTGGTATGACCAAGGCTGAATTGATTAATAACTTGGGTACCATTGCCAAGTCTGGTACCAAAGCCTTCATGGAAGCTCTATCTGCTGGTGCCGATGTATCCATGATTGGTCAATTCGGTGTTGGTTTTTACTCTTTATTCTTAGTTGCCGACAGAGTTCAGGTTATTTCAAAGAGCAACGACGACGAACAATACATCTGGGAATCTAACGCTGGTGGTTCTTTCACTGTTACTCTAGACGAAGTTAATGAAAGAATTGGTAGGGGTACCATCTTGAGGTTATTCTTGAAAGATGACCAATTGGAGTACTTGGAAGAAAAGAGAATAAAGGAAGTTATCAAGAGACATTCTGAGTTCGTGGCCTACCCAATCCAATTAGTCGTCACCAAGGAAGTTGAAAAGGAAGTTCCAATTCCAGAAGAAGAAAAGAAAGACGAGGAAAAGAAGGATGAGGAAAAGAAGGATGAAGACGACAAGAAACCAAAACTCGAGGAAGTCGATGAAGAAGAGGAAAAGAAGCCAAAGACGAAAAAAGTTAAAGAAGAAGTTCAAGAGATAGAAGAACTAAACAAGACTAAGCCTTTGTGGACTAGAAACCCATCTGATATCACTCAAGAAGAATACAATGCTTTCTATAAGTCTATTTCAAACGACTGGGAAGACCCATTGTACGTTAAGCATTTCTCCGTTGAAGGTCAATTGGAATTTAGAGCTATCTTATTCATTCCAAAGAGAGCACCATTCGACTTGTTTGAGAGTAAAAAGAAGAAGAATAATATCAAGTTGTACGTTCGTCGTGTTTTCATCACTGATGAAGCTGAAGACTTGATTCCAGAGTGGTTATCTTTCGTCAAGGGTGTTGTTGACTCTGAGGATTTACCATTGAATTTGTCCAGAGAAATGTTACAACAAAATAAGATCATGAAGGTTATTAGAAAGAACATTGTCAAAAAGTTGATTGAAGCCTTCAACGAAATTGCTGAAGACTCTGAACAATTTGAAAAGTTCTACTCGGCTTTCTCCAAAAATATCAAGTTGGGTGTACATGAAGATACCCAAAACAGGGCTGCTTTGGCTAAGTTGTTACGTTACAACTCTACCAAGTCCGTAGATGAGTTGACTTCCTTAACTGATTACGTTACCAGAATGCCAGAACACCAAAAGAACATCTACTACATCACTGGTGAATCTCTAAAGGCTGTCGAAAAGTCTCCATTTTTGGATGCCTTGAAGGCTAAAAACTTCGAGGTTTTGTTCTTGACCGACCCAATTGATGAATACGCCTTCACTCAATTGAAGGAATTCGAAGGTAAAACTTTGGTTGACATTACTAAAGATTTCGAATTGGAAGAAACTGACGAAGAAAAAGCTGAAAGAGAGAAGGAGATCAAAGAATATGAACCATTGACCAAAGCTTTGAAAGAAATTTTGGGTGACCAAGTGGAGAAAGTTGTTGTTTCTTACAAATTGCTAGATGCCCCAGCTGCTATCAGAACTGGTCAATTTGGTTGGTCTGCTAACATGGAAAGAATCATGAAGGCTCAAGCCTTGAGAGACTCTTCCATGTCCTCCTACATGTCTTCCAAGAAGACTTTCGAAATTTCTCCAAAATCTCCAATTATCAAGGAATTGAAAAAGAGAGTTGACGAAGGTGGTGCTCAAGACAAGACTGTCAAGGACTTGACTAAGTTATTATATGAAACTGCTTTGTTGACTTCCGGCTTCAGTTTGGACGAACCAACTTCCTTTGCATCAAGAATTAACAGATTGATCTCTTTAGGCCTGAACATTGATGAGGATGAAGAAACAGAGACTGCTCCAGAAGCATCCACCGCAGCTCCGGTTGAAGAGGTTCCAGCTGACACCGAAATGGAAGAGGTAGATTAA" ;
open(INF, $infile) ;
open(OUTF, ">$outfile") ;
open(REJECTF, ">reject5.out") ;
open(SUMF, ">sum5.out") ;
for ($i=0; $i<709; $i++) {
for ($j=0; $j<21; $j++) {
$nbc[$i][$j]=0;
$nreads[$i][$j]=0;
}
}
while ($line = <INF>) {
chomp($line) ;
@spline = split (/,/, $line) ;
$mutaa = $spline[3] ;
$l=length($spline[1]) ;
if ($l<6) {
$tempreads = substr($spline[1],0,1) ;
} else {
$tempreads = substr($spline[1],0,2) ;
}
if ($spline[2]==0) {
$nbc[0][0]++;
$nreads[0][0]=$nreads[0][0]+$tempreads;
} elsif ($spline[2]==1) {
$l=length($mutaa) ;
$aa=substr($mutaa,$l-1,1) ;
$i=&aatoi($aa) ;
$pos=substr($mutaa,1,$l-2) ;
$nbc[$pos][$i]++;
$nreads[$pos][$i]=$nreads[$pos][$i]+$tempreads;
} else {
print REJECTF "$line\n" ;
}
}
print OUTF "Number of barcodes\n" ;
print OUTF "aa\," ;
for ($i=1; $i<306; $i++) {
print OUTF "$i\," ;
}
print OUTF "\n" ;
for ($j=0; $j<21; $j++) {
$aa=&itoaa($j) ;
print OUTF "$aa\," ;
for ($i=1; $i<709; $i++) {
print OUTF "$nbc[$i][$j]\," ;
}
print OUTF "\n" ;
}
print OUTF "\n" ;
print OUTF "\n" ;
print OUTF "\n" ;
print OUTF "Number of barcode reads\n" ;
print OUTF "aa\," ;
for ($i=1; $i<709; $i++) {
print OUTF "$i\," ;
}
print OUTF "\n" ;
for ($j=0; $j<21; $j++) {
$aa=&itoaa($j) ;
print OUTF "$aa\," ;
for ($i=1; $i<306; $i++) {
print OUTF "$nreads[$i][$j]\," ;
}
print OUTF "\n" ;
}
print OUTF "\n" ;
print OUTF "\n" ;
print OUTF "\n" ;
print OUTF "WT reads\n" ;
print OUTF "Number of WT barcodes $nbc[0][0]\n" ;
print OUTF "Number of WT reads $nreads[0][0]\n" ;
#check for WT synonyms
print OUTF "\n" ;
print OUTF "\n" ;
print OUTF "\n" ;
print OUTF "WT synonyms\n" ;
print OUTF "position\, barcodes\, reads\n" ;
for ($i=0; $i<709; $i++) {
$ntpos = $i*3 ;
$wtcodon = substr($wtdna,$ntpos,3) ;
$wtaa = &codontoaa($wtcodon) ;
$iwtaa = &aatoi($wtaa) ;
$aapos = $i+1 ;
print OUTF "$aapos\, $nbc[$aapos][$iwtaa]\, $nreads[$aapos][$iwtaa]\n" ;
}
close(INF) ;
close(OUTF) ;
close(REJECTF) ;
close(SUMF) ;