https://github.com/jvivian/one_off_scripts
Raw File
Tip revision: 3ad04be99cd01e6a047c1b530cc8a1de82bd862e authored by John Vivian on 02 February 2017, 01:31:18 UTC
Refactor SRA pipeline to use faster method than fastq-dump
Tip revision: 3ad04be
upload_files_encrypted_to_s3.py
#!/usr/bin/env python2.7
# John Vivian
# 9-18-15
"""
Move files in a directory to S3 with or without encryption
"""
import hashlib
import os
import subprocess
import argparse
from urlparse import urlparse


def generate_unique_key(master_key_path, url):
    with open(master_key_path, 'r') as f:
        master_key = f.read()
    assert len(master_key) == 32, 'Invalid Key! Must be 32 characters. ' \
                                  'Key: {}, Length: {}'.format(master_key, len(master_key))
    new_key = hashlib.sha256(master_key + url).digest()
    assert len(new_key) == 32, 'New key is invalid and is not 32 characters: {}'.format(new_key)
    return new_key


def main():
    """
    Upload files with/without encryption to s3
    """
    parser = argparse.ArgumentParser(description=main.__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-d', '--directory', required=True, help='Directory for files to upload')
    parser.add_argument('-k', '--master-key', default=None, help='Path to master key')
    parser.add_argument('-s', '--s3-path', required=True, help='S3 path to upload: i.e. s3://bucket/dir')
    args = parser.parse_args()
    # Parse and check s3 path
    s3_url = urlparse(args.s3_path)
    assert s3_url.scheme == 's3', 's3 path is in an incorrect format. s3://bucket/dir. \n{}'.format(args.s3_path)
    # S3AM base call
    exit_codes = []
    url_base = 'https://s3-us-west-2.amazonaws.com/'
    files = [os.path.abspath(os.path.join(args.directory, f)) for f in os.listdir(args.directory)
             if os.path.isfile(os.path.join(args.directory, f))]
    for fpath in files:
        command = ['s3am', 'upload', '--resume']
        if args.master_key:
            url = os.path.join(url_base, s3_url.netloc, s3_url.path.lstrip('/'), os.path.basename(fpath))
            new_key = generate_unique_key(args.master_key, url)
            print 'New encryption key formed from url: {}'.format(url)

            key_fname = os.path.basename(fpath) + '.key'

            with open(key_fname, 'wb') as f_out:
                f_out.write(new_key)

            command.extend(['--sse-key-file', key_fname])

        command.extend(['file://{}'.format(fpath),
                        os.path.join(args.s3_path, os.path.basename(fpath))])
        print 'Command: {}\n'.format(' '.join(command))
        p = subprocess.Popen(command)
        exit_codes.append(p)

    [x.wait() for x in exit_codes]


if __name__ == '__main__':
    main()
back to top