#!/usr/bin/env python # -*- coding: utf-8 -*- import csv # import numpy as np import os import pandas as pd import pystache import re import urllib current_path = os.path.dirname(os.path.abspath(__file__)) def MirnaPathwayFinder( mappings_path=None, query_values=None, query_value_list_column_index=0, node_type='rna', output_dir='.', cache=True, debug=False): def print_debug(message): if debug: print message def generate_widget_uri(mapping, highlight_string): return ''.join([ 'http://www.wikipathways.org/wpi/PathwayWidget.php?id=', mapping['identifier'], '&', highlight_string, ]) def generate_pathways_table(html_template_input, query_value_list): # TODO handle the case where the query values are NOT display names highlight_values = map( lambda query_value: 'label[]=' + urllib.quote( query_value ), query_value_list ) highlight_string = str.join('&', highlight_values) + '&colors=red' f = open(current_path + '/table-template.html', 'r') table_template = f.read() widget_uri = generate_widget_uri( html_template_input[0], highlight_string) initial_html_string = pystache.render( table_template, html_template_input) html_string_with_widget_url = initial_html_string.replace( 'widget_uri', widget_uri ) update_widget_path = os.path.join(current_path, 'update-widget.js') with open(update_widget_path, 'r') as update_widget: update_widget_string = ''.join([ 'var highlightString = \'', highlight_string, '\';\n', update_widget.read() ]) html_string_with_update_widget = html_string_with_widget_url.replace( 'update_widget_string', update_widget_string ) f = open(os.path.join(output_dir, 'pathways.html'), 'w') f.write(html_string_with_update_widget) return html_string_with_update_widget def has_targeter(row): columns_to_check = ( 'stem_loop_name', 'mature_name', 'mirbase', 'mirbase.mature', 'ncbigene', 'hgnc', 'targeter_stem_loop_name', 'targeter_mature_name', 'targeter_mirbase', 'targeter_mirbase.mature', 'targeter_ncbigene', 'targeter_hgnc', ) possible_targeters = set(filter(lambda y: isinstance(y, str), map(lambda x: row[x], columns_to_check))) return len(possible_targeters.intersection(query_value_list)) > 0 def has_target(row): columns_to_check = ( 'targeter_stem_loop_name', 'targeter_mature_name', 'targeter_mirbase', 'targeter_mirbase.mature', 'targeter_ncbigene', 'targeter_hgnc', ) possible_targeters = set(filter(lambda y: isinstance(y, str), map(lambda x: row[x], columns_to_check))) return len(possible_targeters.intersection(query_value_list)) > 0 results_limit = 20 query_value_list = set() if os.path.isfile(query_values): with open(query_values, 'rb') as csvfile: query_value_list_reader = csv.reader( csvfile, delimiter='\t', quotechar='|') for row in query_value_list_reader: query_value_list.add(row[query_value_list_column_index]) else: if hasattr(query_values, '__iter__'): query_value_list = set(query_values) else: query_value_list.add(query_values) query_value_list = map(lambda x: re.sub( '^http:\/\/identifiers.org\/(hgnc|ncbigene|mirbase|mirbase\.mature)\/', '', x ), query_value_list) if mappings_path is None: # TODO remove the date part of the file name mappings_path = os.path.join( current_path, '..', 'wp-mir-table-hs-20160715.tsv') # TODO integrate this old code into the current code. Specifically, # handle if the mapping data is provided as a Python object. # # parse wp-mir-table-hs.csv (or other file, if specified) # # to get mappings between pathways and mirnas, # # including for each pathway: # # * genes: all gene products in the pathway, annotated as genes # # * mirna_hits_as_gene_specified: miRNAs actually existing in pathway, # # annotated as genes # # * mirna_hits_as_mirna_specified: miRNAs actually shown on pathway, # # annotated as miRNAs # # * mirna_hits_as_mirna_inferred: miRNAs NOT actually specified on the # # pathway but # # inferred to exist on the pathway because they target genes or # # proteins that DO actually exist on the pathway # pathway_to_mirna_mappings = mappings_path # pathway_to_mirna_mappings_list = [] # if os.path.isfile(pathway_to_mirna_mappings): # with open(pathway_to_mirna_mappings, 'rb') as csvfile: # pathway_to_mirna_mappings_reader = csv.DictReader(csvfile) # for row in pathway_to_mirna_mappings_reader: # genes = parse_hits_field(row['genes']) # mirna_hits_as_gene_specified = parse_hits_field( # row['ghits']) # mirna_hits_as_mirna_specified = parse_hits_field( # row['mhits']) # mirna_hits_as_mirna_inferred = parse_hits_field( # row['mthits']) # # wp_identifier = re.search('WP\d+', row['link']).group(0) # parsed_row = { # 'name': row['name'], # 'identifier': wp_identifier, # 'id': row['link'], # 'genes': genes, # 'mirna_hits_as_gene_specified': # mirna_hits_as_gene_specified, # 'mirna_hits_as_mirna_specified': # mirna_hits_as_mirna_specified, # 'mirna_hits_as_mirna_inferred': # mirna_hits_as_mirna_inferred, # } # pathway_to_mirna_mappings_list.append(parsed_row) # else: # if hasattr(pathway_to_mirna_mappings, '__iter__'): # pathway_to_mirna_mappings_list += pathway_to_mirna_mappings # else: # pathway_to_mirna_mappings_list.append(pathway_to_mirna_mappings) wp_mirna = pd.read_csv(mappings_path, sep='\t', dtype=str) # TODO remove this. It's just for dev. # wp_mirna = wp_mirna.head(1000) with_targeter = wp_mirna[wp_mirna.apply( lambda d: has_targeter(d), axis=1)] with_targeter_by_pwy = with_targeter.groupby(['wikipathways']) # get targeter count by pathway targeter_n_by_pwy = with_targeter_by_pwy[ 'shown_or_inferred_mature_name'].nunique() # get shown_targeter count by pathway shown_targeter_n_by_pwy = with_targeter_by_pwy[ 'mature_name'].nunique() # get target count by pathway with_target = with_targeter[with_targeter.apply( lambda d: has_target(d), axis=1)] with_target_by_pwy = with_target.groupby(['wikipathways']) target_n_by_pwy = with_target_by_pwy['ncbigene'].nunique() pathways = with_targeter['wikipathways'].unique() d = { 'wikipathways': pathways, 'targeter_count': targeter_n_by_pwy, 'shown_targeter_count': shown_targeter_n_by_pwy, 'target_count': target_n_by_pwy } wp_counts = pd.DataFrame( data=d ).fillna( value=0 ).nlargest( results_limit, ['shown_targeter_count', 'target_count', 'targeter_count']) results = with_targeter.join(wp_counts, on='wikipathways', how='left', lsuffix='', rsuffix='_r', sort=False) pathway_level_columns = [ 'shown_targeter_count', 'target_count', 'targeter_count', 'wikipathways', 'link', 'pathway_name' ] results_sorted = results.sort_values(by=pathway_level_columns, axis=0, ascending=False, inplace=False, kind='quicksort', na_position='last') results_by_pwy = results_sorted.groupby(pathway_level_columns, sort=False) html_template_input = [] for name_by_pwy, group_by_pwy in results_by_pwy: targets_by_targeters = [] shown_targeters = [] mature_names = filter(lambda x: isinstance(x, str), group_by_pwy['mature_name']) if len(mature_names) > 0: shown_targeters = shown_targeters + map( lambda x: {'name': x}, mature_names) result = { 'id': name_by_pwy[pathway_level_columns.index('link')], 'identifier': name_by_pwy[ pathway_level_columns.index('wikipathways')], 'name': name_by_pwy[pathway_level_columns.index('pathway_name')], 'targets_by_targeters': targets_by_targeters, 'shown_targeters': shown_targeters, 'shown_targeter_count': name_by_pwy[ pathway_level_columns.index('shown_targeter_count')], 'target_count': name_by_pwy[ pathway_level_columns.index('target_count')], 'targeter_count': name_by_pwy[ pathway_level_columns.index('targeter_count')] } by_targeter = group_by_pwy.groupby('targeter_mature_name') for name_by_targeter, group_by_targeter in by_targeter: targets_by_targeters.append({ 'targeter': name_by_targeter, 'targets': ', '.join(group_by_targeter[ 'ncbigene'].unique().tolist()) }) html_template_input.append(result) generate_pathways_table(html_template_input, query_value_list)