https://github.com/biocore/American-Gut
Raw File
Tip revision: 68fd6d4b2fa6aeb5b4f5272c6f1006defe5b160e authored by Daniel McDonald on 17 March 2017, 17:29:27 UTC
Merge pull request #217 from ElDeveloper/conflict-fix
Tip revision: 68fd6d4
ag
#!/usr/bin/env python

import os
import json
from gzip import open as gzopen

import click
from biom import load_table

from americangut.taxtree import (build_tree_from_taxontable,
                                 sample_rare_unique,
                                 build_persample_tree_from_taxontable)


@click.group()
def ag():
    pass


@ag.group()
def taxtree():
    pass


@taxtree.command()
@click.option('--table', required=True, type=click.Path(exists=True,
                                                        dir_okay=False),
              help='The input BIOM taxon table')
@click.option('--rare-threshold', required=False,
              default=0.01, help='The threshold to consider an OTU rare')
def rareunique(table, rare_threshold):
    table = load_table(table)
    tree, all_taxa = build_tree_from_taxontable(table)
    click.echo("#SampleID\ttype\ttaxon")
    it = sample_rare_unique(tree, None, all_taxa, rare_threshold)
    for samp, ft, rare, uniq in it:
        for r in rare:
            click.echo("%s\t%s\t%s" % (samp, 'rare', r))
        for u in uniq:
            click.echo("%s\t%s\t%s" % (samp, 'unique', u))


@taxtree.command()
@click.option('--table', required=True, type=click.Path(exists=True,
                                                        dir_okay=False),
              help='The input BIOM taxon table')
@click.option('--output', required=True, type=click.Path(exists=False,
                                                         writable=True))
def persample_json(table, output):
    os.makedirs(os.path.abspath(output))
    table = load_table(table)
    for samp, tree in build_persample_tree_from_taxontable(table):
        with gzopen(os.path.join(output, '%s.json.gz' % samp), 'w') as fp:
            fp.write(json.dumps(tree, indent=2))


@ag.command()
@click.argument('accession-metadata', required=True, nargs=-1)
def sample_to_accession(accession_metadata):
    """Dump {accession: [sample]} and {sample: accession}"""
    accession_to_sample = {}
    sample_to_accession = {}

    for f in accession_metadata:
        accession = os.path.basename(f).rsplit('.', 1)[0]
        accession_to_sample[accession] = []
        with open(f) as fp:
            for line in fp:
                if line.startswith('#'):
                    continue
                sample = line.split('\t', 1)[0]
                accession_to_sample[accession].append(sample)
                sample_to_accession[sample] = accession

    with open('accession_to_sample.json', 'w') as fp:
        fp.write(json.dumps(accession_to_sample))

    with open('sample_to_accession.json', 'w') as fp:
        fp.write(json.dumps(sample_to_accession))


if __name__ == '__main__':
    ag()
back to top