https://github.com/Ettwiller/TSS
Raw File
Tip revision: 5382267dc89460b2ec30e33508dff4479c71c3ef authored by Laurence Ettwiller on 14 February 2019, 18:51:53 UTC
updated to new samtools requirement
Tip revision: 5382267
organized_by_TSS_type.pl
#!/usr/bin/perl
use strict;
use Getopt::Long qw(GetOptions);

my $error_sentence = "USAGE : perl $0 --tss tss_file (bed or ftg) --genome genome_file (fasta) OPTIONAL : --detail 0 or 1 (default 0)";
my $TSS;
my $genome;
my $detail = 0; #0 or 1

GetOptions ("tss=s" => \$TSS, 
    "genome=s" => \$genome,
    "detail=s" => \$detail
    ) or die "$error_sentence";

if (!$TSS || !$genome) {die "$error_sentence\n";}


#my $TSS = "/mnt/home/ettwiller/laurence/projects/ira_cap/FINAL_PAPER/TSS/TSS_enriched_cluster_5.gtf";
#my $genome = "/mnt/home/ettwiller/laurence/projects/ira_cap/3_ecoli_RNase_inhibitors/genome/ecoli_genome_and_controls_new.fasta";

#create an index file if the file does not exist. Do nothing if exist. 
my $fai = $genome.".fai";
if (-e $fai) {
    print STDERR "$fai exist\n";
}

else {
    my $fai_command = "samtools faidx $genome";
    system($fai_command);
}
if ($detail==1)
{
    print "-1+1\tscore(RRS)\tenrichment\n";
}


my $tmp = "tmp";
my $tmp1 = "tmp1";


my $out = $TSS;
$out =~ s/.*\///g;
$out =~ s/\.gtf//;
$out = "tmp_".$out.".fasta";
#my $c = "bedtools intersect -u  -a $TSS -b ecoli_genome.bed > $tmp";
#print "$c\n";
#system($c);

my $command = " bedtools slop -i $TSS -g $fai  -s -l 1 -r 0 | bedtools getfasta  -s -fi $genome -bed - -fo $out";

#print "$command\n";
system($command);
my $out2 = $out;

my $command1 = "grep -v \">\" $out > $tmp1";
#print "$command1\n";
system($command1);

my $command2 = "paste tmp tmp1 > $out2";
#print "$command2\n";
system($command2);
my %final;
my $total =0;
open (FILE, $out2);
foreach my $line (<FILE>)
{
    chomp $line;
    my @tmp = split /\t/, $line;
    my $id = $tmp[8];
    my @tmp2 = split /\_/, $id;
    my $enrichment = $tmp2[-1];
    my $score = $tmp[5];
    my $nt = $tmp[9];
    if ($detail ==1)
    {
	print "$nt\t$score\t$enrichment\n";
    }
    $total ++;
    $final{$nt}++;

}
if ($detail == 0)
{
    foreach my $nt (sort {$final{$b}<=>$final{$a}} keys %final)
    {
	my $percent = ($final{$nt}*100)/$total;
	my $rounded = sprintf "%.2f", $percent;
	print "$nt\t$rounded \%\n";
    }
}



unlink($tmp);
unlink($tmp1);
unlink($out2);
back to top