https://github.com/biocore/American-Gut
Tip revision: 68fd6d4b2fa6aeb5b4f5272c6f1006defe5b160e authored by Daniel McDonald on 17 March 2017, 17:29:27 UTC
Merge pull request #217 from ElDeveloper/conflict-fix
Merge pull request #217 from ElDeveloper/conflict-fix
Tip revision: 68fd6d4
ag
#!/usr/bin/env python
import os
import json
from gzip import open as gzopen
import click
from biom import load_table
from americangut.taxtree import (build_tree_from_taxontable,
sample_rare_unique,
build_persample_tree_from_taxontable)
@click.group()
def ag():
pass
@ag.group()
def taxtree():
pass
@taxtree.command()
@click.option('--table', required=True, type=click.Path(exists=True,
dir_okay=False),
help='The input BIOM taxon table')
@click.option('--rare-threshold', required=False,
default=0.01, help='The threshold to consider an OTU rare')
def rareunique(table, rare_threshold):
table = load_table(table)
tree, all_taxa = build_tree_from_taxontable(table)
click.echo("#SampleID\ttype\ttaxon")
it = sample_rare_unique(tree, None, all_taxa, rare_threshold)
for samp, ft, rare, uniq in it:
for r in rare:
click.echo("%s\t%s\t%s" % (samp, 'rare', r))
for u in uniq:
click.echo("%s\t%s\t%s" % (samp, 'unique', u))
@taxtree.command()
@click.option('--table', required=True, type=click.Path(exists=True,
dir_okay=False),
help='The input BIOM taxon table')
@click.option('--output', required=True, type=click.Path(exists=False,
writable=True))
def persample_json(table, output):
os.makedirs(os.path.abspath(output))
table = load_table(table)
for samp, tree in build_persample_tree_from_taxontable(table):
with gzopen(os.path.join(output, '%s.json.gz' % samp), 'w') as fp:
fp.write(json.dumps(tree, indent=2))
@ag.command()
@click.argument('accession-metadata', required=True, nargs=-1)
def sample_to_accession(accession_metadata):
"""Dump {accession: [sample]} and {sample: accession}"""
accession_to_sample = {}
sample_to_accession = {}
for f in accession_metadata:
accession = os.path.basename(f).rsplit('.', 1)[0]
accession_to_sample[accession] = []
with open(f) as fp:
for line in fp:
if line.startswith('#'):
continue
sample = line.split('\t', 1)[0]
accession_to_sample[accession].append(sample)
sample_to_accession[sample] = accession
with open('accession_to_sample.json', 'w') as fp:
fp.write(json.dumps(accession_to_sample))
with open('sample_to_accession.json', 'w') as fp:
fp.write(json.dumps(sample_to_accession))
if __name__ == '__main__':
ag()