https://github.com/STAR-Fusion/STAR-Fusion
Raw File
Tip revision: 67fa7801b8fa6759e98366689370ec920caa57ee authored by Brian Haas on 17 June 2015, 13:17:52 UTC
update, use fasta retriever instead of cdbyank
Tip revision: 67fa780
STAR-Fusion
#!/usr/bin/env perl

# contributed by Brian Haas, Broad Institute, 2015

use strict;
use warnings;
use Carp;
use Cwd;
use FindBin;
use lib ("$FindBin::Bin/lib");
use Pipeliner;
use Set::IntervalTree;

use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);

my $UTILDIR = "$FindBin::Bin/util";


=recommended_STAR_settings

  # From Stransky et al. 2014   PMID: 25204415

   STAR --genomeDir Hg19.fa_star_index \
        --readFilesIn left.fq right.fq \
        --outSAMstrandField intronMotif \
        --outFilterIntronMotifs RemoveNoncanonicalUnannotated \
        --outReadsUnmapped None --chimSegmentMin 15 \
        --chimJunctionOverhangMin 15 \
        --alignMatesGapMax 200000 \
        --alignIntronMax 200000 \
        --runThreadN 4 \
        --outSAMtype BAM SortedByCoordinate 

=cut



## Options
my $out_prefix = "star-fusion";
my $chimeric_junction_file;
my $chimeric_out_sam;
my $ref_GTF = "$FindBin::Bin/resources/gencode.v19.annotation.gtf.exons.gz";
my $ref_cdna = "$FindBin::Bin/resources/gencode.v19.annotation.gtf.exons.cdna.gz";
my $help_flag;
my $MIN_NOVEL_JUNCTION_SUPPORT = 10;
my $MIN_ALT_PCT_JUNCTION = 10.0;
my $Evalue = 1e-3;
my $tmpdir = "/tmp";
my $verbose_level = 1;

my $usage = <<__EOUSAGE__;

###################################################################################
#
#  Required:
#
#    --chimeric_out_sam|S <string>      Chimeric.out.sam file
#
#    --chimeric_junction|J <string>     Chimeric.out.junction file
#
#  Optional:
#
#    --ref_GTF|G <string>                  reference annotation GTF file (ie. gencode.gtf)
#
#    --ref_cdna|C <string>                 reference cDNA sequences fasta file (generated specially based on gtf -see docs)
#
#    --min_novel_junction_support <int>    default: 10  (minimum of 10 junction reads required if breakpoint
#                                                        lacks involvement of only reference junctions)
#
#    --min_alt_pct_junction <float>        default: 10.0  (10% of the dominant isoform junction support)
#
#    --out_prefix|O <string>               output file prefix (default: $out_prefix)
#   
#    -E <float>                            E-value threshold for blast searches (default: 0.001)
#
#    --tmpdir <string>                     file for temporary files (default: /tmp)
#
#    --verbose_level <int>                 verbosity (default: $verbose_level, max=2)
#
#    --no_filter                           do not filter predictions.
#
###################################################################################


__EOUSAGE__

    ;


my $no_filter = 0;


&GetOptions ( 'help|h' => \$help_flag,
              
              'chimeric_out_sam|S=s' => \$chimeric_out_sam,
              'chimeric_junction|J=s' => \$chimeric_junction_file,
              'ref_GTF|G=s' => \$ref_GTF,
              'ref_cdna|C=s' => \$ref_cdna,
              
              'min_novel_junction_support=i' => \$MIN_NOVEL_JUNCTION_SUPPORT,
              'min_alt_pct_junction=f' => \$MIN_ALT_PCT_JUNCTION,
              'out_prefix|O=s' => \$out_prefix,

              'E=f' => \$Evalue,
              'tmpdir=s' => \$tmpdir,
              'verbose_level=i' => \$verbose_level,
    
              'no_filter' => \$no_filter,
    );


if ($help_flag) {
    die $usage;
}
unless ($chimeric_out_sam && $chimeric_junction_file) {
    die $usage;
}
unless (-s $ref_GTF) {
    die "Error, cannot locate reference annotation file: $ref_GTF";
}
unless ( -s $ref_cdna || $no_filter ) {
    die "Error, cannot locate reference cdna file: $ref_cdna";
}


my @required_progs = qw(makeblastdb blastn STAR);

if (&missing_required_program_installed(@required_progs)) {
    
    die "Error, required program(s) are missing. Be sure they are installed and available via your PATH setting";
    
}



main: {
     
    my $pipeliner = new Pipeliner(-verbose => $verbose_level);
    
    ## predict fusions

    my $cmd = "$UTILDIR/STAR-Fusion.predict "
        . " -S $chimeric_out_sam "
        . " -J  $chimeric_junction_file "
        . " -G $ref_GTF "
        . " --min_novel_junction_support $MIN_NOVEL_JUNCTION_SUPPORT "
        . " --min_alt_pct_junction $MIN_ALT_PCT_JUNCTION "
        . " -O $out_prefix ";
    
    $pipeliner->add_commands(new Command($cmd, "STAR-Fusion.predict.ok"));

    my $predicted_fusions_file = "$out_prefix.fusion_candidates.preliminary";
    
    unless ($no_filter) {
           
        ## filter fusions
        
        $cmd = "$UTILDIR/STAR-Fusion.filter "
            . " --fusion_preds $predicted_fusions_file "
            . " --ref_cdna $ref_cdna "
            . " -E $Evalue "
            . " --tmpdir $tmpdir "
            . " > $out_prefix.fusion_candidates.final ";
        
        $pipeliner->add_commands(new Command($cmd, "STAR-Fusion.filter.ok"));
        
    }
    $pipeliner->run();
    
    if ($no_filter) {
         
        print STDERR "\n\n *** Note: --no_filtering in effect, so outputs have not been filtered for likely false positives.\n";
        print STDERR "\n\tSee output: $out_prefix.fusion_candidates.preliminary\n\n";
    }
    else {
        
        print STDERR "\n\n\t* Process complete.  See output: $out_prefix.fusion_candidates.final\n\n\n";
    }
    
    exit(0);
    
}

####
sub process_cmd {
    my ($cmd) = @_;
    
    print STDERR "CMD: $cmd\n";
    
    my $ret = system($cmd);
    
    if ($ret) {
        die "Error, CMD: $cmd died with ret $ret";
    }

    return;
}


####
sub missing_required_program_installed {
    my (@required_progs) = @_;

    my $missing = 0;
    
    foreach my $prog (@required_progs) {

        my $path = `which $prog`;
        chomp $path;
        unless ($path && $path =~ /\w/) {
            $missing++;
            print STDERR "Error, cannot locate required program: $prog\n";
        }
        else {
            print STDERR "-found prog $prog at $path\n";
        }
    }
    
    return($missing);
}

back to top