Raw File
process_bowtie_output.pl
#!/usr/bin/perl
use warnings;
use strict;

#--------------------------------------------------------------------------#
#  Author: Meng Wu, the Gordon Lab, Washington University in St. Louis     #
#                                                                          #
#  File:   process_bowtie_output.pl                                        #
#  Date:   2014-09-05                                                      #
#  Version: 1.30                                                           #
#                                                                          #
#  Usage:                                                                  #
#       see Usage in the code                                              #
#                                                                          #
#  Contact: mengwu@wustl.edu                                               #
#--------------------------------------------------------------------------#


#Take the bowtie output with the parameter as "-m 1 -l 17 -n 1 -a --best --strata"

if (!@ARGV) {
  print "\nUsage: perl $0 <bowtie_output_file> \n";
  exit;
}

open IN, "$ARGV[0]";



my $total_count=0;
my $count; 

my $insertion_F;
my $insertion_R;
my %sites;     #$sites{sampleID} = number of insertion locations for that sample
my $chromosome;

while (my $line=<IN>) {
  if ($line !~/^#/){
    my @data = split (/\t/, $line);
    $total_count++;
    $chromosome=$data[2];
    my $length=length $data[4];
    my $site;
    $count->{$chromosome}->{$data[2]}++;
    if ($data[1] eq "+"){ #set insertion location of "T" in "TA" top-strand dinucleotide
	$site = $data[3]+$length-1;
      #add read to count of forward reads for that site in that sample
	$insertion_F->{$chromosome}->{$site}++;
    }
    if ($data[1] eq "-"){
	$site=$data[3]+1;
        $insertion_R->{$chromosome}->{$site}++;
   }
  }
}

my $coverage;
my $sites;
my $sample;



foreach my $key (keys %$insertion_F){ #for each sample in %insertion_F
  if ($key=~m/(.*)/){
   open OUT, ">$ARGV[0]_processed.txt_".$1;
   foreach my $location (keys %{$insertion_F->{$key}}) {
    push @{$sites{$key}},$location;
    print OUT ">$key\t$location\t$insertion_F->{$key}->{$location}\t";
    my $sum = $insertion_F->{$key}->{$location};
    if (exists ($insertion_R->{$key}->{$location})){
      print OUT "$insertion_R->{$key}->{$location}\t";
      $sum = $sum + $insertion_R->{$key}->{$location};
      delete $insertion_R->{$key}->{$location};
    } else {
      print OUT "0\t";
    }
    print OUT "$sum\n";  
   }
  }
} 
foreach my $key (keys %$insertion_R){
 if ($key=~m/(.*)/){
   open OUT, ">>$ARGV[0]_processed.txt_".$1;
   foreach my $location (keys %{$insertion_R->{$key}}) {
   push @{$sites{$key}},$location;
   print OUT ">$key\t$location\t0\t$insertion_R->{$key}->{$location}\t$insertion_R->{$key}->{$location}\n";
   }
 }
}


back to top