https://github.com/sanger-pathogens/ABACAS2
Raw File
Tip revision: ab46cf0f354178ebcb83f801a493b3c09f69e0d8 authored by ssjunnebo on 08 May 2017, 15:38:02 UTC
Merge pull request #3 from ssjunnebo/master
Tip revision: ab46cf0
abacas2.parallel.sh
#!/bin/bash
# Copyright (c) 2011-2015 Genome Research Ltd.
# Author: Thomas D. Otto <tdo@sanger.ac.uk>
#
# This file is part of ABACAS2.
#
# ABACAS2 is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3 of the License, or (at your option) any later
# version.
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.

reference=$1
contig=$2
ABA_MIN_LENGTH=$3
ABA_MIN_IDENTITY=$4
doBlast=$5
MemBig=$6

PERL5LIB=$PERL5LIB:/nfs/pathogen003/tdo/Tools/ABACAS2
export PERL5LIB

if [ ! -f "$contig" ] ; then
	echo "

                 *** Abacas II. ***       For any distrubation with this program, please don't blame Sammy!

usage:
abacas2.sh <reference> <Contig to order> optinal: <min aligment length> <Identity cutoff> <doblast: 0/1> <Bsub RAM>

reference:           Fasta (or multi-fasta) against which the contigs should be orders
contig:              Contigs or query that should be ordered
Min aligment length: Threshold for the length, when an alignment lenght is significant. (default 200)
Identity cutoff:     Threshold for identity to place contigs. (default 95)
Do Blast:            Does a blast for the act. (default 1)
Bsub RAM:            KB used for the abacas2 run in the second stage. (default: 6000) Attention, if run of farm2, more than 12000 shouldn't be used, as it should go to hugemem.

Further parameters:
ABA_CHECK_OVERLAP=1; export ABA_CHECK_OVERLAP # this will try to overlap contigs
ABA_splitContigs=1; export ABA_splitContigs # this parameter will split contigs. This is good to split the orign, and to find rearrangement. A split contigs has the suffix _i (i the part)
ABA_WORD_SIZE  # sets the word size. This is critical for speed issues in nucmer. default is 20

Advanced (for pipeline usage):
ABACAS_WAIT_START    Will give the first job of abacas a name
ABACAS_LAST_START    Will give the last job a name.
"

exit;
fi

if [ -z "$ABA_MIN_LENGTH" ] ; then
	ABA_MIN_LENGTH=200;
fi
if [ -z "$ABA_MIN_IDENTITY" ] ; then
	ABA_MIN_IDENTITY=95;
fi
if [ -z "$MemBig" ]; then
   MemBig=4500
fi
if [ -z "$ABA_CHECK_OVERLAP" ] ; then
        ABA_CHECK_OVERLAP=0;
        export ABA_CHECK_OVERLAP
fi
if [ -z "$doBlast" ]; then
   doBlast=1
fi

### for pipelining
tmp=$$
if [ ! -z $ABACAS_WAIT_START ] ; then
   ABACAS_WAIT_START=" -w $ABACAS_WAIT_START "
fi
if [ -z $ABACAS_LAST_START ] ; then
   ABACAS_LAST_START="stage3.$tmp"
fi


if [ "$ABA_MIN_IDENTITY" -gt "99" ] ; then
	echo "Your identity might be too high $ABA_MIN_IDENTITY > 99 "
	exit ;
fi

sed 's/|/_/g' $reference > Ref.$tmp
reference=Ref.$tmp
ln -s $contig Contigs.$tmp
contig=Contigs.$tmp

export ABA_MIN_LENGTH ABA_MIN_IDENTITY contig reference


tmp=$$
bsub $ABACAS_WAIT_START -J"stage1.$tmp" \
  -R "select[type==X86_64 && mem > 4000] rusage[mem=4000]" -M4000 \
  -o abacas2.o -e abacas2.e \
  "abacas2.runComparison.sh $reference $contig"


#do ordering, construct job dependencies
i=0
BSUB_W=""
for x in `grep '>' $reference | perl -nle '/>(\S+)/;print $1' `;
do
  ((i++))
  bsub -w"stage1.$tmp" -J"stage2.$tmp.$i" \
    -R "select[type==X86_64 && mem > $MemBig] rusage[mem=$MemBig]" \
    -M"$MemBig" -o output2.$x.o -e output2.$x.e \
    "abacas2.doTilingGraph.pl $x.coords  $contig Res";
  BSUB_W="${BSUB_W}ended(stage2.${tmp}.${i}) && "
done
# cut off last '&&'
BSUB_W=`echo "$BSUB_W" | sed 's/&& $//'`
echo "bsub: $BSUB_W"

if [ "$doBlast" == "1" ] ; then
  bsub -w "$BSUB_W" -J"stage4.$tmp.blast" -o blast.started.o \
    -e blast.started.e  -R "select[type==X86_64 && mem > 500] rusage[mem=500]" \
    -M500  "abacas2.doblast.sh $reference Res"
fi

#do bin
bsub -w "$BSUB_W" -J"$ABACAS_LAST_START" \
  -R "select[type==X86_64 && mem > 6000] rusage[mem=6000]" -M6000 \
  -o output2.bin.o -e output2.bin.e \
  "~tdo/Bin/abacas2.bin.sh $contig Res.abacasBin.fna && grep -v '>'  Res.abacasBin.fna | awk 'BEGIN {print \">Bin.union\"} {print}' > Res.abacasBin.oneSeq.fna_ && cat Res*fna > Genome.abacas.fasta && bam.correctLineLength.sh Genome.abacas.fasta ";


back to top