# -*- coding: utf-8 -*- # Author: Óscar Nájera # License: 3-clause BSD """ Link resolver objects ===================== """ from __future__ import print_function import codecs import gzip import os import posixpath import re import shelve import sys # Try Python 2 first, otherwise load from Python 3 try: import cPickle as pickle except ImportError: import pickle try: import urllib2 as urllib_request from urllib2 import HTTPError, URLError import urlparse as urllib_parse except ImportError: import urllib.request as urllib_request import urllib.parse as urllib_parse from urllib.error import HTTPError, URLError from io import StringIO from . import sphinx_compatibility logger = sphinx_compatibility.getLogger('sphinx-gallery') def _get_data(url): """Helper function to get data over http(s) or from a local file""" if urllib_parse.urlparse(url).scheme in ('http', 'https'): resp = urllib_request.urlopen(url) encoding = resp.headers.get('content-encoding', 'plain') data = resp.read() if encoding == 'plain': data = data.decode('utf-8') elif encoding == 'gzip': data = StringIO(data) data = gzip.GzipFile(fileobj=data).read().decode('utf-8') else: raise RuntimeError('unknown encoding') else: with codecs.open(url, mode='r', encoding='utf-8') as fid: data = fid.read() return data def get_data(url, gallery_dir): """Persistent dictionary usage to retrieve the search indexes""" # shelve keys need to be str in python 2 if sys.version_info[0] == 2 and isinstance(url, unicode): url = url.encode('utf-8') cached_file = os.path.join(gallery_dir, 'searchindex') search_index = shelve.open(cached_file) if url in search_index: data = search_index[url] else: data = _get_data(url) search_index[url] = data search_index.close() return data def _select_block(str_in, start_tag, end_tag): """Select first block delimited by start_tag and end_tag""" start_pos = str_in.find(start_tag) if start_pos < 0: raise ValueError('start_tag not found') depth = 0 for pos in range(start_pos, len(str_in)): if str_in[pos] == start_tag: depth += 1 elif str_in[pos] == end_tag: depth -= 1 if depth == 0: break sel = str_in[start_pos + 1:pos] return sel def _parse_dict_recursive(dict_str): """Parse a dictionary from the search index""" dict_out = dict() pos_last = 0 pos = dict_str.find(':') while pos >= 0: key = dict_str[pos_last:pos] if dict_str[pos + 1] == '[': # value is a list pos_tmp = dict_str.find(']', pos + 1) if pos_tmp < 0: raise RuntimeError('error when parsing dict') value = dict_str[pos + 2: pos_tmp].split(',') # try to convert elements to int for i in range(len(value)): try: value[i] = int(value[i]) except ValueError: pass elif dict_str[pos + 1] == '{': # value is another dictionary subdict_str = _select_block(dict_str[pos:], '{', '}') value = _parse_dict_recursive(subdict_str) pos_tmp = pos + len(subdict_str) else: raise ValueError('error when parsing dict: unknown elem') key = key.strip('"') if len(key) > 0: dict_out[key] = value pos_last = dict_str.find(',', pos_tmp) if pos_last < 0: break pos_last += 1 pos = dict_str.find(':', pos_last) return dict_out def parse_sphinx_searchindex(searchindex): """Parse a Sphinx search index Parameters ---------- searchindex : str The Sphinx search index (contents of searchindex.js) Returns ------- filenames : list of str The file names parsed from the search index. objects : dict The objects parsed from the search index. """ # Make sure searchindex uses UTF-8 encoding if hasattr(searchindex, 'decode'): searchindex = searchindex.decode('UTF-8') # parse objects query = 'objects:' pos = searchindex.find(query) if pos < 0: raise ValueError('"objects:" not found in search index') sel = _select_block(searchindex[pos:], '{', '}') objects = _parse_dict_recursive(sel) # parse filenames query = 'filenames:' pos = searchindex.find(query) if pos < 0: raise ValueError('"filenames:" not found in search index') filenames = searchindex[pos + len(query) + 1:] filenames = filenames[:filenames.find(']')] filenames = [f.strip('"') for f in filenames.split(',')] return filenames, objects class SphinxDocLinkResolver(object): """ Resolve documentation links using searchindex.js generated by Sphinx Parameters ---------- doc_url : str The base URL of the project website. searchindex : str Filename of searchindex, relative to doc_url. extra_modules_test : list of str List of extra module names to test. relative : bool Return relative links (only useful for links to documentation of this package). """ def __init__(self, doc_url, gallery_dir, searchindex='searchindex.js', extra_modules_test=None, relative=False): self.doc_url = doc_url self.gallery_dir = gallery_dir self.relative = relative self._link_cache = {} self.extra_modules_test = extra_modules_test self._page_cache = {} if doc_url.startswith(('http://', 'https://')): if relative: raise ValueError('Relative links are only supported for local ' 'URLs (doc_url cannot be absolute)') searchindex_url = doc_url + '/' + searchindex else: searchindex_url = os.path.join(doc_url, searchindex) # detect if we are using relative links on a Windows system if (os.name.lower() == 'nt' and not doc_url.startswith(('http://', 'https://'))): if not relative: raise ValueError('You have to use relative=True for the local' ' package on a Windows system.') self._is_windows = True else: self._is_windows = False # download and initialize the search index sindex = get_data(searchindex_url, gallery_dir) filenames, objects = parse_sphinx_searchindex(sindex) self._searchindex = dict(filenames=filenames, objects=objects) def _get_link(self, cobj): """Get a valid link, False if not found""" fname_idx = None full_name = cobj['module_short'] + '.' + cobj['name'] if full_name in self._searchindex['objects']: value = self._searchindex['objects'][full_name] if isinstance(value, dict): value = value[next(iter(value.keys()))] fname_idx = value[0] elif cobj['module_short'] in self._searchindex['objects']: value = self._searchindex['objects'][cobj['module_short']] if cobj['name'] in value.keys(): fname_idx = value[cobj['name']][0] if fname_idx is not None: fname = self._searchindex['filenames'][fname_idx] # In 1.5+ Sphinx seems to have changed from .rst.html to only # .html extension in converted files. But URLs could be # built with < 1.5 or >= 1.5 regardless of what we're currently # building with, so let's just check both :( fnames = [fname + '.html', os.path.splitext(fname)[0] + '.html'] for i, fname in enumerate(fnames): try: if self._is_windows: fname = fname.replace('/', '\\') link = os.path.join(self.doc_url, fname) else: link = posixpath.join(self.doc_url, fname) if link in self._page_cache: html = self._page_cache[link] else: html = get_data(link, self.gallery_dir) self._page_cache[link] = html except (HTTPError, URLError, IOError): if i == len(fnames) - 1: raise else: break # test if cobj appears in page comb_names = [cobj['module_short'] + '.' + cobj['name']] if self.extra_modules_test is not None: for mod in self.extra_modules_test: comb_names.append(mod + '.' + cobj['name']) url = False for comb_name in comb_names: if comb_name in html: url = link + u'#' + comb_name link = url else: link = False return link def resolve(self, cobj, this_url): """Resolve the link to the documentation, returns None if not found Parameters ---------- cobj : dict Dict with information about the "code object" for which we are resolving a link. cobj['name'] : function or class name (str) cobj['module_short'] : shortened module name (str) cobj['module'] : module name (str) this_url: str URL of the current page. Needed to construct relative URLs (only used if relative=True in constructor). Returns ------- link : str | None The link (URL) to the documentation. """ full_name = cobj['module_short'] + '.' + cobj['name'] link = self._link_cache.get(full_name, None) if link is None: # we don't have it cached link = self._get_link(cobj) # cache it for the future self._link_cache[full_name] = link if link is False or link is None: # failed to resolve return None if self.relative: link = os.path.relpath(link, start=this_url) if self._is_windows: # replace '\' with '/' so it on the web link = link.replace('\\', '/') # for some reason, the relative link goes one directory too high up link = link[3:] return link def _embed_code_links(app, gallery_conf, gallery_dir): # Add resolvers for the packages for which we want to show links doc_resolvers = {} src_gallery_dir = os.path.join(app.builder.srcdir, gallery_dir) for this_module, url in gallery_conf['reference_url'].items(): try: if url is None: doc_resolvers[this_module] = SphinxDocLinkResolver( app.builder.outdir, src_gallery_dir, relative=True) else: doc_resolvers[this_module] = SphinxDocLinkResolver(url, src_gallery_dir) except HTTPError as e: logger.warning("The following HTTP Error has occurred: %d", e.code) except URLError as e: logger.warning( "Embedding the documentation hyperlinks requires Internet " "access.\nPlease check your network connection.\nUnable to " "continue embedding `%s` links due to a URL Error:\n%s", this_module, str(e.args)) html_gallery_dir = os.path.abspath(os.path.join(app.builder.outdir, gallery_dir)) # patterns for replacement link_pattern = ('%s') orig_pattern = '%s' period = '.' # This could be turned into a generator if necessary, but should be okay flat = [[dirpath, filename] for dirpath, _, filenames in os.walk(html_gallery_dir) for filename in filenames] iterator = sphinx_compatibility.status_iterator( flat, gallery_dir, color='fuchsia', length=len(flat), stringify_func=lambda x: os.path.basename(x[1])) for dirpath, fname in iterator: full_fname = os.path.join(html_gallery_dir, dirpath, fname) subpath = dirpath[len(html_gallery_dir) + 1:] pickle_fname = os.path.join(src_gallery_dir, subpath, fname[:-5] + '_codeobj.pickle') if os.path.exists(pickle_fname): # we have a pickle file with the objects to embed links for with open(pickle_fname, 'rb') as fid: example_code_obj = pickle.load(fid) fid.close() str_repl = {} # generate replacement strings with the links for name, cobj in example_code_obj.items(): this_module = cobj['module'].split('.')[0] if this_module not in doc_resolvers: continue try: link = doc_resolvers[this_module].resolve(cobj, full_fname) except (HTTPError, URLError) as e: if isinstance(e, HTTPError): extra = e.code else: extra = e.reason logger.warning("Error resolving %s.%s: %r (%s)", cobj['module'], cobj['name'], e, extra) continue if link is not None: parts = name.split('.') name_html = period.join(orig_pattern % part for part in parts) full_function_name = '%s.%s' % ( cobj['module'], cobj['name']) str_repl[name_html] = link_pattern % ( link, full_function_name, name_html) # do the replacement in the html file # ensure greediness names = sorted(str_repl, key=len, reverse=True) regex_str = '|'.join(re.escape(name) for name in names) regex = re.compile(regex_str) def substitute_link(match): return str_repl[match.group()] if len(str_repl) > 0: with open(full_fname, 'rb') as fid: lines_in = fid.readlines() with open(full_fname, 'wb') as fid: for line in lines_in: line = line.decode('utf-8') line = regex.sub(substitute_link, line) fid.write(line.encode('utf-8')) def embed_code_links(app, exception): """Embed hyperlinks to documentation into example code""" if exception is not None: return # No need to waste time embedding hyperlinks when not running the examples # XXX: also at the time of writing this fixes make html-noplot # for some reason I don't fully understand if not app.builder.config.plot_gallery: return # XXX: Whitelist of builders for which it makes sense to embed # hyperlinks inside the example html. Note that the link embedding # require searchindex.js to exist for the links to the local doc # and there does not seem to be a good way of knowing which # builders creates a searchindex.js. if app.builder.name not in ['html', 'readthedocs']: return logger.info('Embedding documentation hyperlinks in examples ...', color='white') gallery_conf = app.config.sphinx_gallery_conf gallery_dirs = gallery_conf['gallery_dirs'] if not isinstance(gallery_dirs, list): gallery_dirs = [gallery_dirs] for gallery_dir in gallery_dirs: _embed_code_links(app, gallery_conf, gallery_dir)