https://github.com/jvivian/one_off_scripts
Raw File
Tip revision: 3ad04be99cd01e6a047c1b530cc8a1de82bd862e authored by John Vivian on 02 February 2017, 01:31:18 UTC
Refactor SRA pipeline to use faster method than fastq-dump
Tip revision: 3ad04be
create-test-inputs.py
#!/usr/bin/env python2.7
import argparse
import os
import shutil
import subprocess
import sys


def make_reference(ref_path, chrom):
    with open(chrom + '.' + ref_path, 'w') as f:
        subprocess.check_call(['samtools', 'faidx', ref_path])
        subprocess.check_call(['samtools', 'faidx', ref_path, chrom], stdout=f)


def make_gtf(gtf_path, chrom):
    with open(chrom + '.' + gtf_path, 'w') as f:
        subprocess.check_call(['grep', '"^{}\b"'.format(chrom), gtf_path], stdout=f)


def make_bam(bam_path, chrom):
    with open(chrom + '.' + bam_path, 'w') as f:
        subprocess.check_call(['samtools', 'index', bam_path])
        subprocess.check_call(['samtools', 'view', '-b', '-h', bam_path, chrom], stdout=f)


def truncate_fastq(fastq_path):
    with open('trunc' + '.' + fastq_path, 'w') as f:
        subprocess.check_call(['sed', '-n', '-e', '1,20000p', fastq_path], stdout=f)


def make_vcf(vcf_path, chrom):
    subprocess.check_call(['vcftools', '--vcf', vcf_path, '--chr', chrom, '--recode', '--out', chrom + '.' + vcf_path])
    shutil.move(chrom + '.' + vcf_path + '.recode.vcf', chrom + '.' + vcf_path)
    os.remove(chrom + '.' + vcf_path + '.log')
    os.remove(vcf_path + '.vcfidx')


def main():
    """
    Author: John Vivian

    Make test inputs used for continuous integration (or other things)

    Dependencies
    ------------
    Samtools: apt-get install samtools
    VCFtools: apt-get install vcftools
    """
    parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter)
    subparsers = parser.add_subparsers(dest='command')
    parser.add_argument('--chr', type=str, default='chr6', help='Determine chromosome to use')
    ref = subparsers.add_parser('reference', help='Generate a test reference (chr6)')
    gtf = subparsers.add_parser('gtf', help='Generate a test GTF file (chr6)')
    bam = subparsers.add_parser('bam', help='Generate a test Bam (chr6)')
    trunc = subparsers.add_parser('truncate-fastq', help='Truncates a fastq to 5,000 reads')
    vcf = subparsers.add_parser('vcf', help='Generate a test VCF (chr6)')
    # Add commands
    ref.add_argument('reference', type=str, help='Path to reference')
    gtf.add_argument('gtf', type=str, help='Path to GTF')
    bam.add_argument('bam', type=str, help='Path to bam')
    trunc.add_argument('fastqs', type=str, nargs='+', help='Path to fastq(s)')
    vcf.add_argument('vcf', type=str, help='Path to VCF')
    args = parser.parse_args()

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    if args.command == 'reference':
        make_reference(args.reference, args.chr)
    elif args.command == 'gtf':
        make_gtf(args.gtf, args.chr)
    elif args.command == 'bam':
        make_bam(args.bam, args.chr)
    elif args.command == 'truncate-fastq':
        for fastq in args.fastqs:
            truncate_fastq(fastq)
    elif args.command == 'vcf':
        make_vcf(args.vcf, args.chr)


if __name__ == '__main__':
    main()
back to top