swh:1:snp:4e3e7077647a709f15b8c1b32ce7100175d0580b
Tip revision: 7146ca6de4e9aafb344bfa9a035f5f0b640aabca authored by Jean Kossaifi on 27 February 2017, 14:39:12 UTC
DOC: Minor update
DOC: Minor update
Tip revision: 7146ca6
docscrape.py
"""Extract reference documentation from the NumPy source tree.
"""
from __future__ import division, absolute_import, print_function
import inspect
import textwrap
import re
import pydoc
from warnings import warn
import collections
import copy
import sys
class Reader(object):
"""A line-based string reader.
"""
def __init__(self, data):
"""
Parameters
----------
data : str
String with lines separated by '\n'.
"""
if isinstance(data, list):
self._str = data
else:
self._str = data.split('\n') # store string as list of lines
self.reset()
def __getitem__(self, n):
return self._str[n]
def reset(self):
self._l = 0 # current line nr
def read(self):
if not self.eof():
out = self[self._l]
self._l += 1
return out
else:
return ''
def seek_next_non_empty_line(self):
for l in self[self._l:]:
if l.strip():
break
else:
self._l += 1
def eof(self):
return self._l >= len(self._str)
def read_to_condition(self, condition_func):
start = self._l
for line in self[start:]:
if condition_func(line):
return self[start:self._l]
self._l += 1
if self.eof():
return self[start:self._l+1]
return []
def read_to_next_empty_line(self):
self.seek_next_non_empty_line()
def is_empty(line):
return not line.strip()
return self.read_to_condition(is_empty)
def read_to_next_unindented_line(self):
def is_unindented(line):
return (line.strip() and (len(line.lstrip()) == len(line)))
return self.read_to_condition(is_unindented)
def peek(self, n=0):
if self._l + n < len(self._str):
return self[self._l + n]
else:
return ''
def is_empty(self):
return not ''.join(self._str).strip()
class ParseError(Exception):
def __str__(self):
message = self.message
if hasattr(self, 'docstring'):
message = "%s in %r" % (message, self.docstring)
return message
class NumpyDocString(collections.Mapping):
sections = {
'Signature': '',
'Summary': [''],
'Extended Summary': [],
'Parameters': [],
'Returns': [],
'Yields': [],
'Raises': [],
'Warns': [],
'Other Parameters': [],
'Attributes': [],
'Methods': [],
'See Also': [],
'Notes': [],
'Warnings': [],
'References': '',
'Examples': '',
'index': {}
}
def __init__(self, docstring, config={}):
orig_docstring = docstring
docstring = textwrap.dedent(docstring).split('\n')
self._doc = Reader(docstring)
self._parsed_data = copy.deepcopy(self.sections)
try:
self._parse()
except ParseError as e:
e.docstring = orig_docstring
raise
def __getitem__(self, key):
return self._parsed_data[key]
def __setitem__(self, key, val):
if key not in self._parsed_data:
warn("Unknown section %s" % key)
else:
self._parsed_data[key] = val
def __iter__(self):
return iter(self._parsed_data)
def __len__(self):
return len(self._parsed_data)
def _is_at_section(self):
self._doc.seek_next_non_empty_line()
if self._doc.eof():
return False
l1 = self._doc.peek().strip() # e.g. Parameters
if l1.startswith('.. index::'):
return True
l2 = self._doc.peek(1).strip() # ---------- or ==========
return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
def _strip(self, doc):
i = 0
j = 0
for i, line in enumerate(doc):
if line.strip():
break
for j, line in enumerate(doc[::-1]):
if line.strip():
break
return doc[i:len(doc)-j]
def _read_to_next_section(self):
section = self._doc.read_to_next_empty_line()
while not self._is_at_section() and not self._doc.eof():
if not self._doc.peek(-1).strip(): # previous line was empty
section += ['']
section += self._doc.read_to_next_empty_line()
return section
def _read_sections(self):
while not self._doc.eof():
data = self._read_to_next_section()
name = data[0].strip()
if name.startswith('..'): # index section
yield name, data[1:]
elif len(data) < 2:
yield StopIteration
else:
yield name, self._strip(data[2:])
def _parse_param_list(self, content):
r = Reader(content)
params = []
while not r.eof():
header = r.read().strip()
if ' : ' in header:
arg_name, arg_type = header.split(' : ')[:2]
else:
arg_name, arg_type = header, ''
desc = r.read_to_next_unindented_line()
desc = dedent_lines(desc)
params.append((arg_name, arg_type, desc))
return params
_name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|"
r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X)
def _parse_see_also(self, content):
"""
func_name : Descriptive text
continued text
another_func_name : Descriptive text
func_name1, func_name2, :meth:`func_name`, func_name3
"""
items = []
def parse_item_name(text):
"""Match ':role:`name`' or 'name'"""
m = self._name_rgx.match(text)
if m:
g = m.groups()
if g[1] is None:
return g[3], None
else:
return g[2], g[1]
raise ParseError("%s is not a item name" % text)
def push_item(name, rest):
if not name:
return
name, role = parse_item_name(name)
items.append((name, list(rest), role))
del rest[:]
current_func = None
rest = []
for line in content:
if not line.strip():
continue
m = self._name_rgx.match(line)
if m and line[m.end():].strip().startswith(':'):
push_item(current_func, rest)
current_func, line = line[:m.end()], line[m.end():]
rest = [line.split(':', 1)[1].strip()]
if not rest[0]:
rest = []
elif not line.startswith(' '):
push_item(current_func, rest)
current_func = None
if ',' in line:
for func in line.split(','):
if func.strip():
push_item(func, [])
elif line.strip():
current_func = line
elif current_func is not None:
rest.append(line.strip())
push_item(current_func, rest)
return items
def _parse_index(self, section, content):
"""
.. index: default
:refguide: something, else, and more
"""
def strip_each_in(lst):
return [s.strip() for s in lst]
out = {}
section = section.split('::')
if len(section) > 1:
out['default'] = strip_each_in(section[1].split(','))[0]
for line in content:
line = line.split(':')
if len(line) > 2:
out[line[1]] = strip_each_in(line[2].split(','))
return out
def _parse_summary(self):
"""Grab signature (if given) and summary"""
if self._is_at_section():
return
# If several signatures present, take the last one
while True:
summary = self._doc.read_to_next_empty_line()
summary_str = " ".join([s.strip() for s in summary]).strip()
if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
self['Signature'] = summary_str
if not self._is_at_section():
continue
break
if summary is not None:
self['Summary'] = summary
if not self._is_at_section():
self['Extended Summary'] = self._read_to_next_section()
def _parse(self):
self._doc.reset()
self._parse_summary()
sections = list(self._read_sections())
section_names = set([section for section, content in sections])
has_returns = 'Returns' in section_names
has_yields = 'Yields' in section_names
# We could do more tests, but we are not. Arbitrarily.
if has_returns and has_yields:
msg = 'Docstring contains both a Returns and Yields section.'
raise ValueError(msg)
for (section, content) in sections:
if not section.startswith('..'):
section = (s.capitalize() for s in section.split(' '))
section = ' '.join(section)
if self.get(section):
msg = ("The section %s appears twice in the docstring." %
section)
raise ValueError(msg)
if section in ('Parameters', 'Returns', 'Yields', 'Raises',
'Warns', 'Other Parameters', 'Attributes',
'Methods'):
self[section] = self._parse_param_list(content)
elif section.startswith('.. index::'):
self['index'] = self._parse_index(section, content)
elif section == 'See Also':
self['See Also'] = self._parse_see_also(content)
else:
self[section] = content
# string conversion routines
def _str_header(self, name, symbol='-'):
return [name, len(name)*symbol]
def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [' '*indent + line]
return out
def _str_signature(self):
if self['Signature']:
return [self['Signature'].replace('*', '\*')] + ['']
else:
return ['']
def _str_summary(self):
if self['Summary']:
return self['Summary'] + ['']
else:
return []
def _str_extended_summary(self):
if self['Extended Summary']:
return self['Extended Summary'] + ['']
else:
return []
def _str_param_list(self, name):
out = []
if self[name]:
out += self._str_header(name)
for param, param_type, desc in self[name]:
if param_type:
out += ['%s : %s' % (param, param_type)]
else:
out += [param]
out += self._str_indent(desc)
out += ['']
return out
def _str_section(self, name):
out = []
if self[name]:
out += self._str_header(name)
out += self[name]
out += ['']
return out
def _str_see_also(self, func_role):
if not self['See Also']:
return []
out = []
out += self._str_header("See Also")
last_had_desc = True
for func, desc, role in self['See Also']:
if role:
link = ':%s:`%s`' % (role, func)
elif func_role:
link = ':%s:`%s`' % (func_role, func)
else:
link = "`%s`_" % func
if desc or last_had_desc:
out += ['']
out += [link]
else:
out[-1] += ", %s" % link
if desc:
out += self._str_indent([' '.join(desc)])
last_had_desc = True
else:
last_had_desc = False
out += ['']
return out
def _str_index(self):
idx = self['index']
out = []
out += ['.. index:: %s' % idx.get('default', '')]
for section, references in idx.items():
if section == 'default':
continue
out += [' :%s: %s' % (section, ', '.join(references))]
return out
def __str__(self, func_role=''):
out = []
out += self._str_signature()
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Parameters', 'Returns', 'Yields',
'Other Parameters', 'Raises', 'Warns'):
out += self._str_param_list(param_list)
out += self._str_section('Warnings')
out += self._str_see_also(func_role)
for s in ('Notes', 'References', 'Examples'):
out += self._str_section(s)
for param_list in ('Attributes', 'Methods'):
out += self._str_param_list(param_list)
out += self._str_index()
return '\n'.join(out)
def indent(str, indent=4):
indent_str = ' '*indent
if str is None:
return indent_str
lines = str.split('\n')
return '\n'.join(indent_str + l for l in lines)
def dedent_lines(lines):
"""Deindent a list of lines maximally"""
return textwrap.dedent("\n".join(lines)).split("\n")
def header(text, style='-'):
return text + '\n' + style*len(text) + '\n'
class FunctionDoc(NumpyDocString):
def __init__(self, func, role='func', doc=None, config={}):
self._f = func
self._role = role # e.g. "func" or "meth"
if doc is None:
if func is None:
raise ValueError("No function or docstring given")
doc = inspect.getdoc(func) or ''
NumpyDocString.__init__(self, doc)
if not self['Signature'] and func is not None:
func, func_name = self.get_func()
try:
try:
signature = str(inspect.signature(func))
except (AttributeError, ValueError):
# try to read signature, backward compat for older Python
if sys.version_info[0] >= 3:
argspec = inspect.getfullargspec(func)
else:
argspec = inspect.getargspec(func)
signature = inspect.formatargspec(*argspec)
signature = '%s%s' % (func_name, signature.replace('*', '\*'))
except TypeError:
signature = '%s()' % func_name
self['Signature'] = signature
def get_func(self):
func_name = getattr(self._f, '__name__', self.__class__.__name__)
if inspect.isclass(self._f):
func = getattr(self._f, '__call__', self._f.__init__)
else:
func = self._f
return func, func_name
def __str__(self):
out = ''
func, func_name = self.get_func()
signature = self['Signature'].replace('*', '\*')
roles = {'func': 'function',
'meth': 'method'}
if self._role:
if self._role not in roles:
print("Warning: invalid role %s" % self._role)
out += '.. %s:: %s\n \n\n' % (roles.get(self._role, ''),
func_name)
out += super(FunctionDoc, self).__str__(func_role=self._role)
return out
class ClassDoc(NumpyDocString):
extra_public_methods = ['__call__']
def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc,
config={}):
if not inspect.isclass(cls) and cls is not None:
raise ValueError("Expected a class or None, but got %r" % cls)
self._cls = cls
self.show_inherited_members = config.get(
'show_inherited_class_members', True)
if modulename and not modulename.endswith('.'):
modulename += '.'
self._mod = modulename
if doc is None:
if cls is None:
raise ValueError("No class or documentation string given")
doc = pydoc.getdoc(cls)
NumpyDocString.__init__(self, doc)
if config.get('show_class_members', True):
def splitlines_x(s):
if not s:
return []
else:
return s.splitlines()
for field, items in [('Methods', self.methods),
('Attributes', self.properties)]:
if not self[field]:
doc_list = []
for name in sorted(items):
try:
doc_item = pydoc.getdoc(getattr(self._cls, name))
doc_list.append((name, '', splitlines_x(doc_item)))
except AttributeError:
pass # method doesn't exist
self[field] = doc_list
@property
def methods(self):
if self._cls is None:
return []
return [name for name, func in inspect.getmembers(self._cls)
if ((not name.startswith('_')
or name in self.extra_public_methods)
and isinstance(func, collections.Callable)
and self._is_show_member(name))]
@property
def properties(self):
if self._cls is None:
return []
return [name for name, func in inspect.getmembers(self._cls)
if (not name.startswith('_') and
(func is None or isinstance(func, property) or
inspect.isgetsetdescriptor(func))
and self._is_show_member(name))]
def _is_show_member(self, name):
if self.show_inherited_members:
return True # show all class members
if name not in self._cls.__dict__:
return False # class member is inherited, we do not show it
return True