Raw File
cleanup.py
#!/usr/bin/env python

helptext ='''Cleanup Script for HybSeqPipeline

The structured directory system generated by HybSeqPipline generates a lot ot of files.
Many of these files are not needed for downstream analysis, and can be safely removed.

The options will allow you to specify which of the unneeded files you wish to delete.
'''

import argparse,os,sys,shutil

def list_sub_dirs(parentdir):
    '''Given a parent directory return a list of all subdirectories'''
    return next(os.walk(parentdir))[1]

def remove_velvet():
    '''In the current directory, remove all directories that begin with 'velvet' '''
    velvet_dirs = [v for v in os.listdir(".") if v.startswith("velvet") and os.path.isdir(v)]
    for v in velvet_dirs:
        shutil.rmtree(v)

def remove_spades():
    '''In the current directory, remove the spades directory.'''
    spades_dirs = [s for s in os.listdir(".") if s.endswith("spades") and os.path.isdir(s)]
    for s in spades_dirs:
        shutil.rmtree(s)

def main():
    parser = argparse.ArgumentParser(description=helptext,formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("prefix",metavar="prefix",help="Directory generated by HybSeqPipeline.")

    if len(sys.argv) < 2:
        parser.print_help()
        sys.exit(1)
        
    args = parser.parse_args()

    try:
        os.chdir(args.prefix)
        gene_dirs =  list_sub_dirs('.')
        print("Found {} gene directories".format(len(gene_dirs)))
    except OSError:
        print("Directory '{}' does not exist!".format(args.prefix))
        sys.exit(1)    
    
    for gene in gene_dirs:
        os.chdir(gene)
        remove_spades()
        os.chdir("..")
        
        

if __name__ == "__main__":main()
back to top