https://github.com/jvivian/one_off_scripts
Tip revision: 3ad04be99cd01e6a047c1b530cc8a1de82bd862e authored by John Vivian on 02 February 2017, 01:31:18 UTC
Refactor SRA pipeline to use faster method than fastq-dump
Refactor SRA pipeline to use faster method than fastq-dump
Tip revision: 3ad04be
split_interleaved.py
#!/usr/bin/env python2.7
"""
Credits to Ian Fiddes
Produces interleaved (R1 and R2) fastq files from samtools pipe.
For use with samtools (> version 1.0)
Example of how to use this script:
samtools bamshuf -uO foo.bam tmp | samtools bam2fq -s /dev/null - | ./split_interleaved.py R1.fq.gz R2.fq.gz
"""
import argparse
import itertools
import gzip
import sys
def main():
p = argparse.ArgumentParser()
p.add_argument('files', nargs=2)
a = p.parse_args()
left_outf, right_outf = a.files
with gzip.open(left_outf, "w") as left_outf_handle, gzip.open(right_outf, "w") as right_outf_handle:
for read_pair in itertools.izip(*[sys.stdin] * 8):
left_outf_handle.write("".join(read_pair[:4]))
right_outf_handle.write("".join(read_pair[4:]))
if __name__ == "__main__":
main()