https://github.com/CFSAN-Biostatistics/snp-pipeline
Tip revision: 065e93a7d966c8534b77b6d9de26dd350cebc35d authored by Justin Payne on 05 May 2023, 03:45:32 UTC
bugfixes, trying to improve Conda support
bugfixes, trying to improve Conda support
Tip revision: 065e93a
Dockerfile
FROM amazoncorretto:8
MAINTAINER Justin Payne, justin.payne@fda.hhs.gov
WORKDIR /tmp/
RUN yum groupinstall -y 'Development Tools' \
&& yum install -y \
bzip2-devel \
gcc-c++ \
git \
hostname \
make \
ncurses-devel \
python3 \
python3-devel \
tar \
wget \
which \
xz-devel \
zlib-devel \
&& yum clean all
WORKDIR /tmp/
#Dependency versions, can be updated in the build with build_args
#https://docs.docker.com/engine/reference/builder/#using-arg-variables
ARG BCFTOOLS_VER
ENV BCFTOOLS_VER=${BCFTOOLS_VER:-1.8}
ARG BOWTIE2_VER
ENV BOWTIE2_VER=${BOWTIE2_VER:-2.5.1}
ARG HTSLIB_VER
ENV HTSLIB_VER=${HTSLIB_VER:-1.3.2}
ARG GATK_VER
ENV GATK_VER=${GATK_VER:-3.8-1-0-gf15c1c3ef}
ARG PICARD_VER
ENV PICARD_VER=${PICARD_VER:-2.27.5}
ARG SAMTOOLS_VER
ENV SAMTOOLS_VER=${SAMTOOLS_VER:-1.8}
ARG SRATOOLKIT_VER
ENV SRATOOLKIT_VER=${SRATOOLKIT_VER:-2.8.1}
ARG VARSCAN_VER
ENV VARSCAN_VER=${VARSCAN_VER:-2.3.9}
#install bowtie2
RUN wget https://github.com/BenLangmead/bowtie2/archive/v$BOWTIE2_VER.tar.gz -qO - | tar xz && (cd bowtie2-$BOWTIE2_VER && make && make install && cd /tmp)
#install samtools
RUN wget https://github.com/samtools/htslib/releases/download/$HTSLIB_VER/htslib-$HTSLIB_VER.tar.bz2 -qO - | tar xj && (cd htslib-$HTSLIB_VER && make && make install && cd /tmp)
RUN wget https://github.com/samtools/samtools/releases/download/$SAMTOOLS_VER/samtools-$SAMTOOLS_VER.tar.bz2 -qO - | tar xj && (cd samtools-$SAMTOOLS_VER && make && make install && cd /tmp)
RUN wget https://github.com/samtools/bcftools/releases/download/$BCFTOOLS_VER/bcftools-$BCFTOOLS_VER.tar.bz2 -qO - | tar xj && (cd bcftools-$BCFTOOLS_VER && make && make install && cd /tmp)
#install varscan, art and sra
RUN wget https://bootstrap.pypa.io/get-pip.py -q \
&& python3 get-pip.py
#install VARSCAN, ART, SRA Toolkit, GATK, Picard
RUN wget http://downloads.sourceforge.net/project/varscan/VarScan.v$VARSCAN_VER.jar -q \
&& cp VarScan.v$VARSCAN_VER.jar /usr/bin/VarScan.jar
# RUN wget https://www.niehs.nih.gov/research/resources/assets/docs/artsrcchocolatecherrycake031915linuxtgz.tgz -q \
# && tar -zxf /tmp/artsrcchocolatecherrycake031915linuxtgz.tgz \
# && cd /tmp/art_src_ChocolateCherryCake_Linux \
# && ./configure \
# && make \
# && make install \
# && cd /tmp/
RUN wget http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/$SRATOOLKIT_VER/sratoolkit.$SRATOOLKIT_VER-ubuntu64.tar.gz -q \
&& tar -zxf /tmp/sratoolkit.$SRATOOLKIT_VER-ubuntu64.tar.gz \
&& cp /tmp/sratoolkit.$SRATOOLKIT_VER-ubuntu64/bin/fastq-dump.$SRATOOLKIT_VER /usr/bin/fastq-dump
# https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2
RUN wget --content-disposition https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-$GATK_VER.tar.bz2 -q \
&& tar -jxf /tmp/GenomeAnalysisTK-$GATK_VER.tar.bz2 \
&& cp /tmp/GenomeAnalysisTK-$GATK_VER/GenomeAnalysisTK.jar /usr/bin/GenomeAnalysisTK.jar
RUN wget https://github.com/broadinstitute/picard/releases/download/$PICARD_VER/picard.jar -q \
&& cp picard.jar /usr/bin/picard.jar
#install snp-pipeline and snp-mutator
RUN pip install numpy biopython snp-mutator
WORKDIR /src/
COPY ./ /src/
RUN pip install .
ENV PATH "$PATH:/tmp/samtools-$SAMTOOLS_VER/bin:/tmp/bcftools-$BCFTOOLS_VER/bin:/tmp/bowtie2-$BOWTIE2_VER/bin"
ENV CLASSPATH "/usr/bin/VarScan.jar:/usr/bin/picard.jar:/usr/bin/GenomeAnalysisTK.jar"
ENV NUMCORES 4
#Test snp_pipeline
WORKDIR /test/
RUN cfsan_snp_pipeline data lambdaVirusInputs testLambdaVirus \
&& cd testLambdaVirus \
&& cfsan_snp_pipeline run -s samples reference/lambda_virus.fasta \
&& copy_snppipeline_data.py lambdaVirusExpectedResults expectedResults \
&& diff -q snplist.txt expectedResults/snplist.txt \
&& diff -q snpma.fasta expectedResults/snpma.fasta \
&& diff -q referenceSNP.fasta expectedResults/referenceSNP.fasta
ENTRYPOINT ["run_snp_pipeline.sh"]
CMD ["-h"]