https://github.com/arjunrajlaboratory/RajLabSeqTools
Raw File
Tip revision: c8b8c79b2ec9c1bd9eb7ced427bb2aec25f19506 authored by Benjamin Emert on 26 March 2020, 17:37:11 UTC
Updated reorganizeBasespaceFiles.py to better parse samples with same first index (e.g. sample 1 and sample10)
Tip revision: c8b8c79
unzipAndConcatenateZippedFastq.sh
#!/bin/bash

ZIPFILEDIRECTORY=$1
OUTFASTQDIRECTORY=$2
PAIRED_OR_SINGLE_END_FRAGMENTS=$3

echo "you selected the pipeline settings for" $PAIRED_OR_SINGLE_END_FRAGMENTS "end reads" 

for dirname in $ZIPFILEDIRECTORY/* ; do
    cd $dirname

    INPUT=`ls *`
    SAMPLE=${INPUT%%_*}  # Cuts filename string after first '_'

    if [ ! -d $OUTFASTQDIRECTORY/raw ]; then
        mkdir $OUTFASTQDIRECTORY/raw
    fi

    if [ ! -d $OUTFASTQDIRECTORY/raw/$SAMPLE ]; then
        mkdir $OUTFASTQDIRECTORY/raw/$SAMPLE
    fi

    FASTQR1=${SAMPLE}_R1.fastq
    FASTQR2=${SAMPLE}_R2.fastq

    if [ ! -e $OUTFASTQDIRECTORY/raw/$SAMPLE/$FASTQR1 ]; then
        echo Working on $SAMPLE
        for i in *.gz; do
            gunzip -c $i > ${i%.*}
        done


        cat ./*R1*fastq > $OUTFASTQDIRECTORY/raw/$SAMPLE/$FASTQR1
        rm ./*R1*fastq 

        if [ $PAIRED_OR_SINGLE_END_FRAGMENTS = "paired" ]; then
            cat ./*R2*fastq > $OUTFASTQDIRECTORY/raw/$SAMPLE/$FASTQR2
            rm ./*R2*fastq
        fi
    fi
    
    # compress fastq after concatenation, since STAR can read compressed FASTQ files.
    # submit this final compression task to cluster since compression takes a while
    bsub gzip $OUTFASTQDIRECTORY/raw/$SAMPLE/$FASTQR1
    if [ $PAIRED_OR_SINGLE_END_FRAGMENTS = "paired" ]; then
        bsub gzip $OUTFASTQDIRECTORY/raw/$SAMPLE/$FASTQR2
    fi
done

back to top