https://github.com/Ettwiller/TSS
Raw File
Tip revision: 5382267dc89460b2ec30e33508dff4479c71c3ef authored by Laurence Ettwiller on 14 February 2019, 18:51:53 UTC
updated to new samtools requirement
Tip revision: 5382267
bam2firstbasebam.pl
#!/usr/bin/perl

use strict;
use warnings;
use Getopt::Long qw(GetOptions);


#this program is primary used for visualizing the TSS on IGV or other genome browser with the read being narrow down to 1 bp resolution. 
# INPUT :
#bam file X.bam (preferably generated through local alignment such as Bowtie --local)
#OUTPUT :
# X_start.bam and X_start.bam.bai

my $bamfile;
my $genome;
my $lib_type = "F";
my $out;

my $error_message = "USAGE : perl $0 --bam bamfile --genome genome.fai --lib_type FR --out bam_start.bam \n";




GetOptions ("bam=s" => \$bamfile,    # numeric
	    "genome=s"   => \$genome,
	    "lib_type=s" => \$lib_type,
	    "out=s" => \$out
    ) or die $error_message;

if (!$bamfile || !$genome || !$out ) {die "USAGE : perl $0 --bam bamfile --genome genome.fai --out outfile.bam \n";}
if ($lib_type ne "FR" && $lib_type ne "RF" && $lib_type ne "F"){ die $error_message;
}

my $resulting_bam;
if ($lib_type eq "F") #single read library with R1 being the most 5' end of the transcripts. 
{
    $resulting_bam = $bamfile;
}
elsif ($lib_type eq "FR")
{
    my $tmp = "R1";
    #extract R1 from bam
    $resulting_bam = $tmp.".bam";
    my $library_command = "samtools view -f64 -b $bamfile | samtools sort -o $resulting_bam -";
    system($library_command);
}
elsif ($lib_type eq "RF")
{
    my $tmp = "R2";
    #extract R1 from bam
    $resulting_bam = $tmp.".bam";
    my $library_command = "samtools view -f128 -b $bamfile | samtools sort -o $resulting_bam -";
    
    system($library_command);
}






my $generic = $bamfile;
$generic =~ s/\.bam//;
$generic =~ s/.*\///g;

print STDERR "Generating the bam files - be patient it may take a while - \n";


my $file_tmp = "bamtmp";my $bed = "bedtmp";my $newbam = $out;
$newbam =~ s/\.bam//;
#first convert your bam to bed. 
my $command = "bedtools bamtobed -cigar  -i $resulting_bam > $file_tmp"; 

print STDERR "$command\n";
system($command);
parse_bed($file_tmp, $bed);

#then go back to bam again - sort it at the same time. 
my $newbam_withbam = $newbam.".bam";
my $command2 = "bedtools bedtobam -i $bed -g $genome | samtools sort -o $newbam_withbam -";

my $command3 = "samtools index $newbam_withbam";

#excecute the commands

system($command2);
system($command3);

#removing tmp files that are not necessary anymore. 
unlink($file_tmp);
unlink($bed);

 
sub parse_bed {
    my ($file, $bed)=@_;
    #read the tmp file
    open (FILE, $file) or die;
    #write the first mapped postion into a bed new bed file 
    open (OUT, ">$bed") or die; 
    
    foreach my $line (<FILE>)
    {
	chomp $line;
	my $start;
	my @tmp = split /\t/, $line;
	my $chr = $tmp[0];
	my $orientation = $tmp[5];
	#if the orientation is + take the start of the read
	if ($orientation eq "+")
	{
	    $start = $tmp[1];
	    $tmp[2] = $start+1;
	}
	#if the orientation is - , take the end of the read
	else{
	    $start = $tmp[2];
	    $tmp[1] = $start-1;
	}
	#fixe up some other issues. 
	$tmp[4] =1;
	$tmp[6] ="1M";
	
	my $new_line = join("\t", @tmp);
	print OUT "$new_line\n";
    }
    close FILE;
    close OUT;
}
back to top