from github import Github, BadCredentialsException
from peyotl.nexson_syntax import write_as_json
from peyotl.phylesystem import Phylesystem
from peyotl.collections import TreeCollectionStore
from peyotl.utility import read_config as read_peyotl_config
from peyotl.utility import get_config as get_peyotl_config
from ConfigParser import SafeConfigParser
from datetime import datetime
import tempfile
import logging
import json
import os
def get_private_dir(request):
app_name = request.application
leader = request.env.web2py_path
return '%s/applications/%s/private' % (leader, app_name)
def atomic_write_json_if_not_found(obj, dest, request):
if os.path.exists(dest):
return False
dir = get_private_dir(request)
handle, tmpfn = tempfile.mkstemp(suffix='.json', dir=dir, text=True)
# mkstemp opens the file and returns a file descriptor,
# but we are using write_as_json to open with the right encoding
os.close(handle)
write_as_json(obj, tmpfn, indent=2, sort_keys=True)
if os.path.exists(dest):
return False
os.rename(tmpfn, dest)
return True
def compose_push_to_github_url(request, resource_id):
if resource_id is None:
return '{p}://{d}/{a}/push/v1'.format(p=request.env.wsgi_url_scheme,
d=request.env.http_host,
a=request.application)
return '{p}://{d}/{a}/push/v1/{r}'.format(p=request.env.wsgi_url_scheme,
d=request.env.http_host,
a=request.application,
r=resource_id)
# this allows us to raise HTTP(...)
from gluon import *
_PHYLESYSTEM = None
def get_phylesystem(request):
global _PHYLESYSTEM
if _PHYLESYSTEM is not None:
return _PHYLESYSTEM
from gitdata import GitData
repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize, max_num_trees = read_phylesystem_config(request)
peyotl_config, cfg_filename = read_peyotl_config()
if 'phylesystem' not in peyotl_config.sections():
peyotl_config.add_section('phylesystem')
peyotl_config.set('phylesystem', 'max_file_size', max_filesize) #overrides peyotl config with max phylesytem-api filesize
push_mirror = os.path.join(repo_parent, 'mirror')
pmi = {
'parent_dir': push_mirror,
'remote_map': {
'GitHubRemote': git_hub_remote,
},
}
mirror_info = {'push':pmi}
conf = get_conf_object(request)
a = {}
try:
new_study_prefix = conf.get('apis', 'new_study_prefix')
a['new_study_prefix'] = new_study_prefix
except:
pass
_PHYLESYSTEM = Phylesystem(repos_par=repo_parent,
git_ssh=git_ssh,
pkey=pkey,
git_action_class=GitData,
mirror_info=mirror_info,
**a)
_LOG = get_logger(request, 'ot_api')
_LOG.debug('[[[[[[ repo_nexml2json = {}'.format(_PHYLESYSTEM.repo_nexml2json))
return _PHYLESYSTEM
_TREE_COLLECTION_STORE = None
def get_tree_collection_store(request):
global _TREE_COLLECTION_STORE
if _TREE_COLLECTION_STORE is not None:
return _TREE_COLLECTION_STORE
_LOG = get_logger(request, 'ot_api')
_LOG.debug("getting _TREE_COLLECTION_STORE...")
from gitdata import GitData #TODO?
repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize = read_collections_config(request)
_LOG.debug(" repo_parent={}".format(repo_parent))
_LOG.debug(" repo_remote={}".format(repo_remote))
_LOG.debug(" git_ssh={}".format(git_ssh))
_LOG.debug(" pkey={}".format(pkey))
_LOG.debug(" git_hub_remote={}".format(git_hub_remote))
push_mirror = os.path.join(repo_parent, 'mirror')
pmi = {
'parent_dir': push_mirror,
'remote_map': {
'GitHubRemote': git_hub_remote,
},
}
mirror_info = {'push':pmi}
conf = get_conf_object(request)
import pprint
_LOG.debug(" conf:")
_LOG.debug(pprint.pformat(conf))
a = {}
try:
# any keyword args to pass along from config?
#new_study_prefix = conf.get('apis', 'new_study_prefix')
#a['new_study_prefix'] = new_study_prefix
pass
except:
pass
_TREE_COLLECTION_STORE = TreeCollectionStore(repos_par=repo_parent,
git_ssh=git_ssh,
pkey=pkey,
git_action_class=GitData, #TODO?
mirror_info=mirror_info,
**a)
_LOG.debug('assumed_doc_version = {}'.format(_TREE_COLLECTION_STORE.assumed_doc_version))
return _TREE_COLLECTION_STORE
def get_failed_push_filepath(request, doc_type=None):
filenames_by_content_type = {'nexson': "PUSH_FAILURE_nexson.json",
'collection': "PUSH_FAILURE_collection.json",
'favorites': "PUSH_FAILURE_favorites.json"}
content_type = doc_type or request.vars.get('doc_type', 'nexson')
failure_filename = filenames_by_content_type[content_type]
return os.path.join(get_private_dir(request), failure_filename)
def get_conf_object(request):
app_name = request.application
conf = SafeConfigParser(allow_no_value=True)
localconfig_filename = os.path.join(get_private_dir(request), "localconfig")
if os.path.isfile(localconfig_filename):
conf.readfp(open(localconfig_filename))
else:
filename = os.path.join(get_private_dir(request), "config")
conf.readfp(open(filename))
return conf
def read_phylesystem_config(request):
"""Load settings for managing the main Nexson docstore"""
conf = get_conf_object(request)
repo_parent = conf.get("apis","repo_parent")
repo_remote = conf.get("apis", "repo_remote")
try:
git_ssh = conf.get("apis", "git_ssh")
except:
git_ssh = 'ssh'
try:
pkey = conf.get("apis", "pkey")
except:
pkey = None
try:
git_hub_remote = conf.get("apis", "git_hub_remote")
except:
git_hub_remote = 'git@github.com:OpenTreeOfLife'
try:
max_filesize = conf.get("filesize", "peyotl_max_file_size")
except:
max_filesize = '20000000'
try:
max_num_trees = conf.get("filesize", "validation_max_num_trees")
except:
max_num_trees = 65
try:
max_num_trees = int(max_num_trees)
except ValueError:
raise HTTP(400, json.dumps({"error": 1, "description": 'max number of trees per study in config is not an integer'}))
return repo_parent, repo_remote, git_ssh, pkey, git_hub_remote, max_filesize, max_num_trees
def read_collections_config(request):
"""Load settings for a minor repo with shared tree collections"""
conf = get_conf_object(request)
collections_repo_parent = conf.get("apis","collections_repo_parent")
collections_repo_remote = conf.get("apis", "collections_repo_remote")
try:
git_ssh = conf.get("apis", "git_ssh")
except:
git_ssh = 'ssh'
try:
pkey = conf.get("apis", "pkey")
except:
pkey = None
try:
git_hub_remote = conf.get("apis", "git_hub_remote")
except:
git_hub_remote = 'git@github.com:OpenTreeOfLife'
try:
max_filesize = conf.get("filesize", "collections_max_file_size")
except:
max_filesize = '20000000'
return collections_repo_parent, collections_repo_remote, git_ssh, pkey, git_hub_remote, max_filesize
def read_favorites_config(request):
"""Load settings for a minor repo with per-user 'favorites' information"""
conf = get_conf_object(request)
favorites_repo_parent = conf.get("apis","favorites_repo_parent")
favorites_repo_remote = conf.get("apis", "favorites_repo_remote")
try:
git_ssh = conf.get("apis", "git_ssh")
except:
git_ssh = 'ssh'
try:
pkey = conf.get("apis", "pkey")
except:
pkey = None
try:
git_hub_remote = conf.get("apis", "git_hub_remote")
except:
git_hub_remote = 'git@github.com:OpenTreeOfLife'
return favorites_repo_parent, favorites_repo_remote, git_ssh, pkey, git_hub_remote
def read_logging_config(request):
conf = get_conf_object(request)
try:
level = conf.get("logging", "level")
if not level.strip():
level = 'WARNING'
except:
level = 'WARNING'
try:
logging_format_name = conf.get("logging", "formatter")
if not logging_format_name.strip():
logging_format_name = 'NONE'
except:
logging_format_name = 'NONE'
try:
logging_filepath = conf.get("logging", "filepath")
if not logging_filepath.strip():
logging_filepath = None
except:
logging_filepath = None
return level, logging_format_name, logging_filepath
def authenticate(**kwargs):
"""Verify that we received a valid Github authentication token
This method takes a dict of keyword arguments and optionally
over-rides the author_name and author_email associated with the
given token, if they are present.
Returns a PyGithub object, author name and author email.
This method will return HTTP 400 if the auth token is not present
or if it is not valid, i.e. if PyGithub throws a BadCredentialsException.
"""
# this is the GitHub API auth-token for a logged-in curator
auth_token = kwargs.get('auth_token','')
if not auth_token:
raise HTTP(400,json.dumps({
"error": 1,
"description":"You must provide an auth_token to authenticate to the OpenTree API"
}))
gh = Github(auth_token)
gh_user = gh.get_user()
auth_info = {}
try:
auth_info['login'] = gh_user.login
except BadCredentialsException:
raise HTTP(400,json.dumps({
"error": 1,
"description":"You have provided an invalid or expired authentication token"
}))
auth_info['name'] = kwargs.get('author_name')
auth_info['email'] = kwargs.get('author_email')
# use the Github Oauth token to get a name/email if not specified
# we don't provide these as default values above because they would
# generate API calls regardless of author_name/author_email being specifed
if auth_info['name'] is None:
auth_info['name'] = gh_user.name
if auth_info['email'] is None:
auth_info['email']= gh_user.email
return auth_info
_LOGGING_LEVEL_ENVAR="OT_API_LOGGING_LEVEL"
_LOGGING_FORMAT_ENVAR="OT_API_LOGGING_FORMAT"
_LOGGING_FILE_PATH_ENVAR = 'OT_API_LOG_FILE_PATH'
def _get_logging_level(s=None):
if s is None:
return logging.NOTSET
supper = s.upper()
if supper == "NOTSET":
level = logging.NOTSET
elif supper == "DEBUG":
level = logging.DEBUG
elif supper == "INFO":
level = logging.INFO
elif supper == "WARNING":
level = logging.WARNING
elif supper == "ERROR":
level = logging.ERROR
elif supper == "CRITICAL":
level = logging.CRITICAL
else:
level = logging.NOTSET
return level
def _get_logging_formatter(s=None):
if s is None:
s == 'NONE'
else:
s = s.upper()
rich_formatter = logging.Formatter("[%(asctime)s] %(filename)s (%(lineno)d): %(levelname) 8s: %(message)s")
simple_formatter = logging.Formatter("%(levelname) 8s: %(message)s")
raw_formatter = logging.Formatter("%(message)s")
default_formatter = None
logging_formatter = default_formatter
if s == "RICH":
logging_formatter = rich_formatter
elif s == "SIMPLE":
logging_formatter = simple_formatter
else:
logging_formatter = None
if logging_formatter is not None:
logging_formatter.datefmt='%H:%M:%S'
return logging_formatter
def get_logger(request, name="ot_api"):
"""
Returns a logger with name set as given, and configured
to the level given by the environment variable _LOGGING_LEVEL_ENVAR.
"""
# package_dir = os.path.dirname(module_path)
# config_filepath = os.path.join(package_dir, _LOGGING_CONFIG_FILE)
# if os.path.exists(config_filepath):
# try:
# logging.config.fileConfig(config_filepath)
# logger_set = True
# except:
# logger_set = False
logger = logging.getLogger(name)
if len(logger.handlers) == 0:
if request is None:
level = _get_logging_level(os.environ.get(_LOGGING_LEVEL_ENVAR))
logging_formatter = _get_logging_formatter(os.environ.get(_LOGGING_FORMAT_ENVAR))
logging_filepath = os.environ.get(_LOGGING_FILE_PATH_ENVAR)
else:
level_str, logging_format_name, logging_filepath = read_logging_config(request)
logging_formatter = _get_logging_formatter(logging_format_name)
level = _get_logging_level(level_str)
logger.setLevel(level)
if logging_filepath is not None:
log_dir = os.path.split(logging_filepath)[0]
if log_dir and not os.path.exists(log_dir):
os.makedirs(log_dir)
ch = logging.FileHandler(logging_filepath)
else:
ch = logging.StreamHandler()
ch.setLevel(level)
ch.setFormatter(logging_formatter)
logger.addHandler(ch)
return logger
def log_time_diff(log_obj, operation='', prev_time=None):
'''If prev_time is not None, logs (at debug level) to
log_obj the difference between now and the naive datetime
object prev_time.
`operation` is a string describing what events were timed.
The current time is returned to allow for several
calls with the form
x = log_time_diff(_LOG, 'no op', x)
foo()
x = log_time_diff(_LOG, 'foo', x)
bar()
x = log_time_diff(_LOG, 'bar', x)
'''
n = datetime.now()
if prev_time is not None:
td = n - prev_time
t = td.total_seconds()
log_obj.debug('Timed operation "{o}" took {t:f} seconds'.format(o=operation, t=t))
return n
def get_oti_base_url(request):
conf = get_conf_object(request)
oti_base_url = conf.get("apis", "oti_base_url")
if oti_base_url.startswith('//'):
# Prepend scheme to a scheme-relative URL
oti_base_url = "https:" + oti_base_url
return oti_base_url
def get_oti_domain(request):
oti_base = get_oti_base_url(request)
s = oti_base.split('/')
assert len(s) > 2
return '/'.join(s[:3])
def get_collections_api_base_url(request):
conf = get_conf_object(request)
base_url = conf.get("apis", "collections_api_base_url")
if base_url.startswith('//'):
# Prepend scheme to a scheme-relative URL
base_url = "https:" + base_url
return base_url
def get_favorites_api_base_url(request):
conf = get_conf_object(request)
base_url = conf.get("apis", "favorites_api_base_url")
if base_url.startswith('//'):
# Prepend scheme to a scheme-relative URL
base_url = "https:" + base_url
return base_url