Raw File
#!/bin/bash
rawdata1=../rawdata/scatac-seq/HGC20191230001-0003_lane7
rawdata2=../rawdata/scatac-seq/HGC20191230001-0003_lane8
name1=${rawdata1}/L7_md5sum.check.out
name2=${rawdata2}/L8_md5sum.check.out
metadata=../metadata/scatac-seq
reference=../metadata/reference/hg19_insert
result=../result/scatac-seq

pre_mapping()
{
    fastp -w 4 -c -h ${metadata}/${1}.fastp.html -j ${metadata}/${1}.fastp.json \
    -i ${2}/${1}_R1_001.fastq.gz -o ${metadata}/${1}_r1.fq.gz \
    -I ${2}/${1}_R2_001.fastq.gz -O ${metadata}/${1}_r2.fq.gz 
}

mapping()
{
    bowtie2 -p 20 -t --mm --very-sensitive -x ${reference} \
    -1 ${metadata}/${1}_r1.fq.gz \
    -2 ${metadata}/${1}_r2.fq.gz | samtools view -bS - > ${metadata}/${1}.bam
    samtools view -f 2 ${metadata}/${1}.bam | samtools sort -n -o ${metadata}/${1}_nsort.bam -
    samtools fixmate -rm ${metadata}/${1}_nsort.bam ${metadata}/${1}_nsort_fix.bam
    samtools sort -o ${metadata}/${1}_fix_sort.bam ${metadata}/${1}_nsort_fix.bam
    samtools index ${metadata}/${1}_fix_sort.bam
}

post_mapping()
{
    picard MarkDuplicates I=${metadata}/${1}_fix_sort.bam \
    O=${metadata}/${1}_sort_mdup.bam M=${metadata}/${1}_mdup_metrics.txt
    samtools view -F 1804 -f 2 -q 1 -u ${metadata}/${1}_sort_mdup.bam | samtools sort  \
    -o ${metadata}/${1}_rmdup_sort.bam -
    samtools index ${metadata}/${1}_rmdup_sort.bam
    samtools view -h ${metadata}/${1}_rmdup_sort.bam | grep -v -P '\tchrM\t' | samtools view \
    -b - > ${metadata}/${1}_sort_rm.bam
    samtools index ${metadata}/${1}_sort_rm.bam
}

get_coverage()
{
    bamCoverage -b ${metadata}/${1}_sort_rm.bam \
    -o ${metadata}/${1}.SeqDepthNorm.bw -e -p 20 \
    --binSize 25
}

single_cell()
{
    for line in `cat ${1}|grep HW`
    do
        if [[ ${line} =~ R1 ]]
        then
            echo ${line%_R1_001.fastq*}
            prefix=${line%_R1_001.fastq*}
            pre_mapping "${prefix}" "${2}"
            mapping "${prefix}"
            post_mapping "${prefix}"
            get_coverage "${prefix}"
        fi
    done
}

bulk_fastq_merge()
{
    ulimit -Sn 10000
    cat ${rawdata1}/*_R1_*.fastq.gz ${rawdata2}/*_R1_*.fastq.gz > ${metadata}/bulk_R1.fastq.gz
    cat ${rawdata1}/*_R2_*.fastq.gz ${rawdata2}/*_R2_*.fastq.gz > ${metadata}/bulk_R2.fastq.gz
}

bulk_bam_merge()
{
    ulimit -Sn 10000
    samtools merge ${metadata}/*HW*_sort.bam > ${metadata}/bulk_sort.bam
}

peak_calling()
{
    macs2 callpeak -t ${metadata}/${1}_sort_rm.bam -n ${1} --outdir ${metadata} -g hs -f BAM --nomodel --shift -100 --extsize 200 -B --SPMR
    bedGraphToBigWig ${metadata}/${1}_treat_pileup.bdg ../metadata/reference/hg19_insert.chromsize ${metadata}/${1}.bw
}

quality_report()
{
    ataqv --threads 20 --peak-file ${metadata}/${1}_peaks.narrowPeak human ${metadata}/${1}_sort_mdup.bam --metrics-file ${metadata}/${1}.ataqv.json.gz
    mkarv ${metadata}/${1} ${metadata}/${1}.ataqv.json.gz
}

# mapping and get coverage
single_cell "${name1}" "${rawdata1}" > "${result}/L7_single_cell.log" 2>&1 &
single_cell "${name2}" "${rawdata2}" > "${result}/L8_single_cell.log" 2>&1 &
# get bulk and produce quality control
bulk_bam_merge > "${result}/bulk_bam_merge.log" 2>&1 &
post_mapping "bulk" > "${result}/bulk_post_mapping.log" 2>&1 
peak_calling "bulk" > "${result}/bulk_peak_calling.log" 2>&1
quality_report "bulk" > "${result}/quality_report.log" 2>&1 &
back to top