swh:1:snp:4b85fa9d84e6c1f1f92230049bf7ca15aece4632
Raw File
Tip revision: ce747f374bce280f2cec4f4ae3697d6f75033538 authored by Jim Allman on 14 January 2016, 16:27:27 UTC
Change default API scheme to HTTPS
Tip revision: ce747f3
api_utils.py
from github import Github, BadCredentialsException
from peyotl.nexson_syntax import write_as_json
from peyotl.phylesystem import Phylesystem
from peyotl.collections import TreeCollectionStore
from peyotl.utility import read_config as read_peyotl_config
from peyotl.utility import get_config as get_peyotl_config
from ConfigParser import SafeConfigParser
from datetime import datetime
import tempfile
import logging
import json
import os

def get_private_dir(request):
    app_name = request.application
    leader = request.env.web2py_path
    return '%s/applications/%s/private' % (leader, app_name)

def atomic_write_json_if_not_found(obj, dest, request):
    if os.path.exists(dest):
        return False
    dir = get_private_dir(request)
    handle, tmpfn = tempfile.mkstemp(suffix='.json', dir=dir, text=True)
    # mkstemp opens the file and returns a file descriptor, 
    #   but we are using write_as_json to open with the right encoding
    os.close(handle)
    write_as_json(obj, tmpfn, indent=2, sort_keys=True)
    if os.path.exists(dest):
        return False
    os.rename(tmpfn, dest)
    return True

def compose_push_to_github_url(request, resource_id):
    if resource_id is None:
        return '{p}://{d}/{a}/push/v1'.format(p=request.env.wsgi_url_scheme,
                                              d=request.env.http_host,
                                              a=request.application)
    return '{p}://{d}/{a}/push/v1/{r}'.format(p=request.env.wsgi_url_scheme,
                                           d=request.env.http_host,
                                           a=request.application,
                                           r=resource_id)

# this allows us to raise HTTP(...)
from gluon import *
_PHYLESYSTEM = None
def get_phylesystem(request):
    global _PHYLESYSTEM
    if _PHYLESYSTEM is not None:
        return _PHYLESYSTEM
    from gitdata import GitData
    repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize, max_num_trees = read_phylesystem_config(request)
    peyotl_config, cfg_filename = read_peyotl_config()
    if 'phylesystem' not in peyotl_config.sections():
        peyotl_config.add_section('phylesystem')
    peyotl_config.set('phylesystem', 'max_file_size', max_filesize) #overrides peyotl config with max phylesytem-api filesize
    push_mirror = os.path.join(repo_parent, 'mirror')
    pmi = {
        'parent_dir': push_mirror,
        'remote_map': {
            'GitHubRemote': git_hub_remote,
            },
        }
    mirror_info = {'push':pmi}
    conf = get_conf_object(request)
    a = {}
    try:
        new_study_prefix = conf.get('apis', 'new_study_prefix')
        a['new_study_prefix'] = new_study_prefix
    except:
        pass
    _PHYLESYSTEM = Phylesystem(repos_par=repo_parent,
                               git_ssh=git_ssh,
                               pkey=pkey,
                               git_action_class=GitData,
                               mirror_info=mirror_info,
                               **a)
    _LOG = get_logger(request, 'ot_api')
    _LOG.debug('[[[[[[ repo_nexml2json = {}'.format(_PHYLESYSTEM.repo_nexml2json))
    return _PHYLESYSTEM

_TREE_COLLECTION_STORE = None
def get_tree_collection_store(request):
    global _TREE_COLLECTION_STORE
    if _TREE_COLLECTION_STORE is not None:
        return _TREE_COLLECTION_STORE
    _LOG = get_logger(request, 'ot_api')
    _LOG.debug("getting _TREE_COLLECTION_STORE...")
    from gitdata import GitData  #TODO?
    repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize = read_collections_config(request)
    _LOG.debug("  repo_parent={}".format(repo_parent))
    _LOG.debug("  repo_remote={}".format(repo_remote))
    _LOG.debug("  git_ssh={}".format(git_ssh))
    _LOG.debug("  pkey={}".format(pkey))
    _LOG.debug("  git_hub_remote={}".format(git_hub_remote))
    push_mirror = os.path.join(repo_parent, 'mirror')
    pmi = {
        'parent_dir': push_mirror,
        'remote_map': {
            'GitHubRemote': git_hub_remote,
            },
        }
    mirror_info = {'push':pmi}
    conf = get_conf_object(request)
    import pprint
    _LOG.debug("  conf:")
    _LOG.debug(pprint.pformat(conf))
    a = {}
    try:
        # any keyword args to pass along from config?
        #new_study_prefix = conf.get('apis', 'new_study_prefix')
        #a['new_study_prefix'] = new_study_prefix
        pass
    except:
        pass
    _TREE_COLLECTION_STORE = TreeCollectionStore(repos_par=repo_parent,
                                                 git_ssh=git_ssh,
                                                 pkey=pkey,
                                                 git_action_class=GitData, #TODO?
                                                 mirror_info=mirror_info,
                                                 **a)
    _LOG.debug('assumed_doc_version = {}'.format(_TREE_COLLECTION_STORE.assumed_doc_version))
    return _TREE_COLLECTION_STORE


def get_failed_push_filepath(request, doc_type=None):
    filenames_by_content_type = {'nexson': "PUSH_FAILURE_nexson.json",
                                 'collection': "PUSH_FAILURE_collection.json",
                                 'favorites': "PUSH_FAILURE_favorites.json"}
    content_type = doc_type or request.vars.get('doc_type', 'nexson')
    failure_filename = filenames_by_content_type[content_type]
    return os.path.join(get_private_dir(request), failure_filename)

def get_conf_object(request):
    app_name = request.application
    conf = SafeConfigParser(allow_no_value=True)
    localconfig_filename = os.path.join(get_private_dir(request), "localconfig")
    if os.path.isfile(localconfig_filename):
        conf.readfp(open(localconfig_filename))
    else:
        filename = os.path.join(get_private_dir(request), "config")
        conf.readfp(open(filename))
    return conf

def read_phylesystem_config(request):
    """Load settings for managing the main Nexson docstore"""
    conf = get_conf_object(request)
    repo_parent   = conf.get("apis","repo_parent")
    repo_remote = conf.get("apis", "repo_remote")
    try:
        git_ssh     = conf.get("apis", "git_ssh")
    except:
        git_ssh = 'ssh'
    try:
        pkey        = conf.get("apis", "pkey")
    except:
        pkey = None
    try:
        git_hub_remote = conf.get("apis", "git_hub_remote")
    except:
        git_hub_remote = 'git@github.com:OpenTreeOfLife'
    try:
        max_filesize = conf.get("filesize", "peyotl_max_file_size")
    except:
        max_filesize = '20000000'
    try:
        max_num_trees = conf.get("filesize", "validation_max_num_trees")
    except:
        max_num_trees = 65
    try:
        max_num_trees = int(max_num_trees)
    except ValueError:
            raise HTTP(400, json.dumps({"error": 1, "description": 'max number of trees per study in config is not an integer'}))
    return repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize, max_num_trees

def read_collections_config(request):
    """Load settings for a minor repo with shared tree collections"""
    conf = get_conf_object(request)
    collections_repo_parent   = conf.get("apis","collections_repo_parent")
    collections_repo_remote = conf.get("apis", "collections_repo_remote")
    try:
        git_ssh     = conf.get("apis", "git_ssh")
    except:
        git_ssh = 'ssh'
    try:
        pkey        = conf.get("apis", "pkey")
    except:
        pkey = None
    try:
        git_hub_remote = conf.get("apis", "git_hub_remote")
    except:
        git_hub_remote = 'git@github.com:OpenTreeOfLife'
    try:
        max_filesize = conf.get("filesize", "collections_max_file_size")
    except:
        max_filesize = '20000000'
    return collections_repo_parent, collections_repo_remote, git_ssh, pkey, git_hub_remote, max_filesize

def read_favorites_config(request):
    """Load settings for a minor repo with per-user 'favorites' information"""
    conf = get_conf_object(request)
    favorites_repo_parent   = conf.get("apis","favorites_repo_parent")
    favorites_repo_remote = conf.get("apis", "favorites_repo_remote")
    try:
        git_ssh     = conf.get("apis", "git_ssh")
    except:
        git_ssh = 'ssh'
    try:
        pkey        = conf.get("apis", "pkey")
    except:
        pkey = None
    try:
        git_hub_remote = conf.get("apis", "git_hub_remote")
    except:
        git_hub_remote = 'git@github.com:OpenTreeOfLife'
    return favorites_repo_parent, favorites_repo_remote, git_ssh, pkey, git_hub_remote

def read_logging_config(request):
    conf = get_conf_object(request)
    try:
        level = conf.get("logging", "level")
        if not level.strip():
            level = 'WARNING'
    except:
        level = 'WARNING'
    try:
        logging_format_name = conf.get("logging", "formatter")
        if not logging_format_name.strip():
            logging_format_name = 'NONE'
    except:
        logging_format_name = 'NONE'
    try:
        logging_filepath = conf.get("logging", "filepath")
        if not logging_filepath.strip():
            logging_filepath = None
    except:
        logging_filepath = None
    return level, logging_format_name, logging_filepath

def authenticate(**kwargs):
    """Verify that we received a valid Github authentication token

    This method takes a dict of keyword arguments and optionally
    over-rides the author_name and author_email associated with the
    given token, if they are present.

    Returns a PyGithub object, author name and author email.

    This method will return HTTP 400 if the auth token is not present
    or if it is not valid, i.e. if PyGithub throws a BadCredentialsException.

    """
    # this is the GitHub API auth-token for a logged-in curator
    auth_token   = kwargs.get('auth_token','')

    if not auth_token:
        raise HTTP(400,json.dumps({
            "error": 1,
            "description":"You must provide an auth_token to authenticate to the OpenTree API"
        }))
    gh           = Github(auth_token)
    gh_user      = gh.get_user()
    auth_info = {}
    try:
        auth_info['login'] = gh_user.login
    except BadCredentialsException:
        raise HTTP(400,json.dumps({
            "error": 1,
            "description":"You have provided an invalid or expired authentication token"
        }))

    auth_info['name'] = kwargs.get('author_name')
    auth_info['email'] = kwargs.get('author_email')

    # use the Github Oauth token to get a name/email if not specified
    # we don't provide these as default values above because they would
    # generate API calls regardless of author_name/author_email being specifed

    if auth_info['name'] is None:
        auth_info['name'] = gh_user.name
    if auth_info['email'] is None:
        auth_info['email']= gh_user.email
    return auth_info


_LOGGING_LEVEL_ENVAR="OT_API_LOGGING_LEVEL"
_LOGGING_FORMAT_ENVAR="OT_API_LOGGING_FORMAT"
_LOGGING_FILE_PATH_ENVAR = 'OT_API_LOG_FILE_PATH'

def _get_logging_level(s=None):
    if s is None:
        return logging.NOTSET
    supper = s.upper()
    if supper == "NOTSET":
        level = logging.NOTSET
    elif supper == "DEBUG":
        level = logging.DEBUG
    elif supper == "INFO":
        level = logging.INFO
    elif supper == "WARNING":
        level = logging.WARNING
    elif supper == "ERROR":
        level = logging.ERROR
    elif supper == "CRITICAL":
        level = logging.CRITICAL
    else:
        level = logging.NOTSET
    return level

def _get_logging_formatter(s=None):
    if s is None:
        s == 'NONE'
    else:
        s = s.upper()
    rich_formatter = logging.Formatter("[%(asctime)s] %(filename)s (%(lineno)d): %(levelname) 8s: %(message)s")
    simple_formatter = logging.Formatter("%(levelname) 8s: %(message)s")
    raw_formatter = logging.Formatter("%(message)s")
    default_formatter = None
    logging_formatter = default_formatter
    if s == "RICH":
        logging_formatter = rich_formatter
    elif s == "SIMPLE":
        logging_formatter = simple_formatter
    else:
        logging_formatter = None
    if logging_formatter is not None:
        logging_formatter.datefmt='%H:%M:%S'
    return logging_formatter

def get_logger(request, name="ot_api"):
    """
    Returns a logger with name set as given, and configured
    to the level given by the environment variable _LOGGING_LEVEL_ENVAR.
    """

#     package_dir = os.path.dirname(module_path)
#     config_filepath = os.path.join(package_dir, _LOGGING_CONFIG_FILE)
#     if os.path.exists(config_filepath):
#         try:
#             logging.config.fileConfig(config_filepath)
#             logger_set = True
#         except:
#             logger_set = False
    logger = logging.getLogger(name)
    if len(logger.handlers) == 0:
        if request is None:
            level = _get_logging_level(os.environ.get(_LOGGING_LEVEL_ENVAR))
            logging_formatter = _get_logging_formatter(os.environ.get(_LOGGING_FORMAT_ENVAR))
            logging_filepath = os.environ.get(_LOGGING_FILE_PATH_ENVAR)
        else:
            level_str, logging_format_name, logging_filepath = read_logging_config(request)
            logging_formatter = _get_logging_formatter(logging_format_name)
            level = _get_logging_level(level_str)

        logger.setLevel(level)
        if logging_filepath is not None:
            log_dir = os.path.split(logging_filepath)[0]
            if log_dir and not os.path.exists(log_dir):
                os.makedirs(log_dir)
            ch = logging.FileHandler(logging_filepath)
        else:
            ch = logging.StreamHandler()
        ch.setLevel(level)
        ch.setFormatter(logging_formatter)
        logger.addHandler(ch)
    return logger

def log_time_diff(log_obj, operation='', prev_time=None):
    '''If prev_time is not None, logs (at debug level) to 
    log_obj the difference between now and the naive datetime 
    object prev_time.
    `operation` is a string describing what events were timed.
    The current time is returned to allow for several 
    calls with the form
       x = log_time_diff(_LOG, 'no op', x)
       foo()
       x = log_time_diff(_LOG, 'foo', x)
       bar()
       x = log_time_diff(_LOG, 'bar', x)
    '''
    n = datetime.now()
    if prev_time is not None:
        td = n - prev_time
        t = td.total_seconds()
        log_obj.debug('Timed operation "{o}" took {t:f} seconds'.format(o=operation, t=t))
    return n

def get_oti_base_url(request):
    conf = get_conf_object(request)
    oti_base_url = conf.get("apis", "oti_base_url")
    if oti_base_url.startswith('//'):
        # Prepend scheme to a scheme-relative URL
        oti_base_url = "https:" + oti_base_url
    return oti_base_url

def get_oti_domain(request):
    oti_base = get_oti_base_url(request)
    s = oti_base.split('/')
    assert len(s) > 2
    return '/'.join(s[:3])

def get_collections_api_base_url(request):
    conf = get_conf_object(request)
    base_url = conf.get("apis", "collections_api_base_url")
    if base_url.startswith('//'):
        # Prepend scheme to a scheme-relative URL
        base_url = "https:" + base_url
    return base_url

def get_favorites_api_base_url(request):
    conf = get_conf_object(request)
    base_url = conf.get("apis", "favorites_api_base_url")
    if base_url.startswith('//'):
        # Prepend scheme to a scheme-relative URL
        base_url = "https:" + base_url
    return base_url
back to top