#!/usr/bin/env bash #### Set up SCRIPTNAME=`basename $0` function error() { local PARENT_LINENO="$1" local MESSAGE="$2" local CODE="${3:-1}" if [[ -n "$MESSAGE" ]] ; then echo "$SCRIPTNAME: Error on or near line ${PARENT_LINENO}: ${MESSAGE}; exiting with status ${CODE}" else echo "$SCRIPTNAME: Error on or near line ${PARENT_LINENO}; exiting with status ${CODE}" fi exit "${CODE}" } trap 'error ${LINENO}' ERR # setting exit on error and inheriting of ERR trap set -eE die() { if [ -n "$@" ]; then echo "$@" >&2 else echo "FAILED" >&2 fi exit 1 } #### Arguments GSIZE="3e9" QVALUE="0.01" # help message help() { echo "usage: $SCRIPTNAME +" echo "" echo "mandatory arguments:" echo "-n NAME name of the experiment; mandatory" echo "" echo "optional arguments:" echo "--keep-temp keep temporary files" echo "-g GSIZE approximate size of the genome (for MACS); default = $GSIZE" echo "-q QVALUE q-value cutoff (for MACS); default = $QVALUE" echo "-h|--help shows this message and exit" } if [ $# -eq 0 ]; then help exit fi while true; do case "$1" in -n) NAME="$2"; shift 2;; -g) GSIZE="$2"; shift 2;; -q) QVALUE="$2"; shift 2;; --keep-temp) KEEP_TEMP="--keep-temp"; shift 1;; -h|--help) help; exit 0;; --) shift 1; break;; -*) die "unrecognized option: $1";; *) break;; esac done if [ -z "$NAME" ]; then die "NAME is not specified (try -h for details)" fi BAMS=("$@") #### Run echo "Calling peaks" macs_pos_dir="${NAME}.pos.macs" macs_neg_dir="${NAME}.neg.macs" if [[ ${#BAMS[@]} -gt 1 ]]; then CAT_CMD="samtools cat -o -" else CAT_CMD="cat" fi $CAT_CMD "${BAMS[@]}" | samtools view | head -100 > peek.sam TAG_SIZE=$(( $(cut -f 10 peek.sam | wc -c) / $(cat peek.sam | wc -l) )) macs2 callpeak -t <($CAT_CMD "${BAMS[@]}" | samtools view -F 16 -b - | bedtools bamtobed -i - -split) \ --outdir "${macs_pos_dir}" -s $TAG_SIZE --nomodel -f BED -g "$GSIZE" \ -n "${NAME}.pos" -q "$QVALUE" & pos_pid="$!" macs2 callpeak -t <($CAT_CMD "${BAMS[@]}" | samtools view -f 16 -b - | bedtools bamtobed -i - -split) \ --outdir "${macs_neg_dir}" -s $TAG_SIZE --nomodel -f BED -g "$GSIZE" \ -n "${NAME}.neg" -q "$QVALUE" & neg_pid="$!" wait $pos_pid wait $neg_pid echo "Combining peaks" cat <(sed "s/\t\.\t/\t+\t/" "${macs_pos_dir}/${NAME}.pos_peaks.narrowPeak") \ <(sed "s/\t\.\t/\t-\t/" "${macs_neg_dir}/${NAME}.neg_peaks.narrowPeak") \ > ${NAME}_peaks.narrowPeak if [ -z "${KEEP_TEMP}" ]; then rm -rv "${macs_pos_dir}" "${macs_neg_dir}" fi echo "Done"