https://github.com/galaxyproject/dunovo
Tip revision: 365a6a816f72fd29412dfe229926bc6aa178e498 authored by Nick on 16 February 2022, 00:32:38 UTC
VERSION: Oops, it should still be 3.0.2.
VERSION: Oops, it should still be 3.0.2.
Tip revision: 365a6a8
safety-not-guaranteed.py
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import errno
import logging
import sys
from bfx import getreads
# The ascii values that represent a 0 PHRED score.
QUAL_OFFSETS = {'sanger':33, 'solexa':64}
USAGE = '$ %(prog)s -q [phred score] reads.fa > reads.fq'
DESCRIPTION = """Convert FASTA to FASTQ."""
EPILOG = """WARNING: There is no quality information in a FASTA, so this must use a dummy value
for the quality scores. This is inherently false/artifactual information, so keep that in mind when
using the output in downstream tools."""
def make_argparser():
parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION, epilog=EPILOG)
parser.add_argument('input', metavar='reads.fa', nargs='?', type=argparse.FileType('r'),
default=sys.stdin,
help='Input FASTA reads. Omit to read from stdin.')
parser.add_argument('-o', '--output', metavar='reads.fq', type=argparse.FileType('w'),
default=sys.stdout,
help='Write output FASTQ reads to this file. If not given, will print to stdout.')
parser.add_argument('-q', '--phred-score', type=int,
help='The quality score to give to all bases. There is no meaningful quality score we can '
'automatically give, so you will have to specify an artificial one. A good choice is 40, '
'the maximum score normally output by sequencers.')
parser.add_argument('-F', '--qual-format', choices=('sanger', 'solexa'), default='sanger',
help='FASTQ quality score format. Sanger scores are assumed to begin at \'{}\' ({}). '
'Default: %(default)s.'.format(QUAL_OFFSETS['sanger'], chr(QUAL_OFFSETS['sanger'])))
parser.add_argument('-a', '--to-fasta', action='store_true',
help='Convert FASTQ to FASTA instead. This assumes the input is FASTQ.')
parser.add_argument('-l', '--log', type=argparse.FileType('w'), default=sys.stderr,
help='Print log messages to this file instead of to stderr. Warning: Will overwrite the file.')
volume = parser.add_mutually_exclusive_group()
volume.add_argument('-Q', '--quiet', dest='volume', action='store_const', const=logging.CRITICAL,
default=logging.WARNING)
volume.add_argument('-v', '--verbose', dest='volume', action='store_const', const=logging.INFO)
volume.add_argument('-D', '--debug', dest='volume', action='store_const', const=logging.DEBUG)
return parser
def main(argv):
parser = make_argparser()
args = parser.parse_args(argv[1:])
logging.basicConfig(stream=args.log, level=args.volume, format='%(message)s')
if args.phred_score:
qual_start = QUAL_OFFSETS[args.qual_format]
if qual_start+args.phred_score > 126:
fail('Error: PHRED score ({}) is too large.'.format(args.fastq_out))
qual_char = chr(qual_start+args.phred_score)
elif not args.to_fasta:
fail('Error: --phred-score is required unless --to-fasta is given.')
if args.to_fasta:
fastq_to_fasta(args.input, args.output)
else:
fasta_to_fastq(args.input, args.output, qual_char)
if args.input is not sys.stdin and not args.input.closed:
args.input.close()
if args.output is not sys.stdout and not args.output.closed:
args.output.close()
def fasta_to_fastq(fasta_file, fastq_file, qual_char):
for read in getreads.getparser(fasta_file, filetype='fasta'):
quals = qual_char * len(read.seq)
fastq_file.write('@{0}\n{1}\n+\n{2}\n'.format(read.name, read.seq, quals))
def fastq_to_fasta(fastq_file, fasta_file):
for read in getreads.getparser(fastq_file, filetype='fastq'):
fasta_file.write('>{0}\n{1}\n'.format(read.name, read.seq))
def fail(message):
logging.critical(message)
if __name__ == '__main__':
sys.exit(1)
else:
raise Exception('Unrecoverable error')
if __name__ == '__main__':
try:
sys.exit(main(sys.argv))
except IOError as ioe:
if ioe.errno != errno.EPIPE:
raise