https://github.com/vejnar/LabxPipe
Tip revision: adf29e860d11a9d1d3ecb06afff81bbf46207d62 authored by vejnar on 19 April 2023, 15:50:13 UTC
Add --suffix to extract command
Add --suffix to extract command
Tip revision: adf29e8
lxpipe_extract.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright © 2023 Charles E. Vejnar
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://www.mozilla.org/MPL/2.0/.
#
"""Extract & rename project output files"""
import argparse
import json
import os
import sys
import labxdb
import labxpipe.utils
def main(argv=None):
if argv is None:
argv = sys.argv
# Started from wrapper?
prog = os.path.basename(argv[0])
if len(argv) > 1 and argv[1] == 'extract':
job_cmd = argv[:2]
argv_parser = argv[2:]
prog += ' extract'
else:
job_cmd = argv[:1]
argv_parser = argv[1:]
# Parse arguments
parser = argparse.ArgumentParser(prog=prog, description='Extract & rename project output files.')
parser.add_argument('-c', '--pipeline', dest='path_pipeline', action='store', required=True, help='Path to pipeline')
parser.add_argument('-f', '--files', dest='files', action='store', required=True, help='Files to extract (comma separated list of step,filename pairs)')
parser.add_argument('-o', '--path_extract', dest='path_extract', action='store', default='.', help='Path to output')
parser.add_argument('-l', '--label', dest='use_label', action='store_true', help='Use label to rename')
parser.add_argument('-r', '--reference', dest='use_reference', action='store_true', help='Use reference to rename')
parser.add_argument('-w', '--lowercase', dest='lowercase', action='store_true', help='Lowercase filename')
parser.add_argument('-x', '--suffix', dest='suffix', action='store_true', help='Suffix input filename to output filename')
parser.add_argument('-e', '--extensions', dest='search_extensions', action='store', help='File extensions to search (comma separated)')
parser.add_argument('-d', '--dry_run', dest='dry_run', action='store_true', help='Dry run')
parser.add_argument('--path_config', dest='path_config', action='store', help='Path to config')
parser.add_argument('--http_url', '--labxdb_http_url', dest='labxdb_http_url', action='store', help='Database HTTP URL')
parser.add_argument('--http_login', '--labxdb_http_login', dest='labxdb_http_login', action='store', help='Database HTTP login')
parser.add_argument('--http_password', '--labxdb_http_password', dest='labxdb_http_password', action='store', help='Database HTTP password')
parser.add_argument('--http_path', '--labxdb_http_path', dest='labxdb_http_path', action='store', help='Database HTTP path')
parser.add_argument('--http_db', '--labxdb_http_db', dest='labxdb_http_db', action='store', help='Database HTTP DB')
args = parser.parse_args(argv_parser)
# Get config (JSON single file or all files in path_config)
config = {}
paths = []
if args.path_config is None:
if 'HTS_CONFIG_PATH' in os.environ:
paths.append(os.environ['HTS_CONFIG_PATH'])
elif 'XDG_CONFIG_HOME' in os.environ:
paths.append(os.path.join(os.environ['XDG_CONFIG_HOME'], 'hts'))
else:
paths.append(args.path_config)
for path in paths:
if os.path.isdir(path):
for f in sorted(os.listdir(path)):
if f.endswith('.json'):
config = {**config, **json.load(open(os.path.join(path, f)))}
elif os.path.isfile(path):
config = {**config, **json.load(open(path))}
# Input local config from args
vargs = vars(args)
for a, v in vargs.items():
if v is not None and (a not in config or v != parser.get_default(a)):
config[a] = v
if a in ['search_extensions']:
if v is None:
config[a] = []
else:
config[a] = [r.strip() for r in v.split(',')]
# Load config: Project
if not os.path.exists(config['path_pipeline']):
print('ERROR: Pipeline file not found')
return 1
else:
config = {**config, **json.load(open(config['path_pipeline']))}
# Parse files argument
raw_files = args.files.split(',')
assert len(raw_files) % 2 == 0, 'ERROR: --files must be a list of pairs step,filename comma separated'
# Split files into pairs
files = [(raw_files[i], raw_files[i + 1]) for i in range(0, len(raw_files), 2)]
# Default rename uses label
if args.use_label is False and args.use_reference is False:
args.use_label = True
# Get info
if 'db' in config['ref_info_source']:
# Init. DBLink
if 'labxdb_http_path' not in config and 'labxdb_http_db' not in config:
if 'labxdb_http_path_seq' in config:
config['labxdb_http_path'] = config['labxdb_http_path_seq']
else:
config['labxdb_http_db'] = 'seq'
dbl = labxdb.DBLink(config.get('labxdb_http_url'), config.get('labxdb_http_login'), config.get('labxdb_http_password'), config.get('labxdb_http_path'), config.get('labxdb_http_db'))
# Create ref_infos
config['ref_infos'] = {}
# Get info from run level
if 'run_refs' in config:
for run_ref in config['run_refs']:
run = dbl.get('run/get-ref/' + run_ref)[0][0]
replicate = dbl.get('replicate/get-ref/' + run['replicate_ref'])[0][0]
config['ref_infos'][run_ref] = {'label_short': replicate['label_short']}
# Get info from replicate level
if 'replicate_refs' in config:
for replicate_ref in config['replicate_refs']:
replicate = dbl.get('replicate/get-ref/' + replicate_ref)[0][0]
config['ref_infos'][replicate_ref] = {'label_short': replicate['label_short']}
# Create move list
moves = []
for ref in labxpipe.utils.get_first_key(('run_refs', 'replicate_refs'), config):
for step, fname in files:
path_in = os.path.join(config['path_output'], ref, step, fname)
if os.path.exists(path_in):
fname_ext = None
for ext in labxpipe.utils.all_exts + config['search_extensions']:
if fname.rfind(ext) != -1:
fname_base = fname[: fname.rfind(ext)]
fname_ext = fname[fname.rfind(ext) :]
break
if fname_ext is None:
fname_base = fname[: fname.rfind('.')]
fname_ext = fname[fname.rfind('.') :]
if fname_ext is None:
fname_base = fname
fname_ext = fname
if args.use_label:
label = labxpipe.utils.label2var(config['ref_infos'][ref]['label_short'])
if args.use_label and args.use_reference:
name = f'{label}_{ref}'
elif args.use_label:
name = label
elif args.use_reference:
name = ref
if args.suffix:
name += f'_{fname_base}'
if args.lowercase:
name = name.lower()
path_out = os.path.join(config['path_extract'], name + fname_ext)
moves.append((path_in, path_out))
# Check move filenames are unique
tmp = set()
for path_in, path_out in moves:
if path_out in tmp:
print(f"ERROR: output name {path_out} isn't unique. Consider options -l/-r")
return 1
else:
tmp.add(path_out)
# Move
for path_in, path_out in moves:
if config['dry_run']:
print(f'Move {path_in} -> {path_out}')
else:
os.rename(path_in, path_out)
if __name__ == '__main__':
sys.exit(main())
