Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

swh:1:snp:4e3e7077647a709f15b8c1b32ce7100175d0580b
  • Code
  • Branches (2)
  • Releases (15)
    • Branches
    • Releases
    • HEAD
    • refs/heads/main
    • refs/tags/0.6.0
    • 0.5.1
    • 0.5.0
    • 0.4.5
    • 0.4.4
    • 0.4.3
    • 0.4.2
    • 0.4.1
    • 0.4.0
    • 0.3.0
    • 0.2.0
    • 0.1.6
    • 0.1.5
    • 0.1.4
    • 0.1.3
    • 0.1.2
  • e823429
  • /
  • doc
  • /
  • sphinx_ext
  • /
  • numpydoc
  • /
  • comment_eater.py
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
  • revision
  • snapshot
  • release
content badge
swh:1:cnt:8cddd3305f0bc17480b1269801f1d43af274c924
directory badge
swh:1:dir:bf831d13dc65d5435f22e1f4bd970e859ce3f216
revision badge
swh:1:rev:23c6d135ae81f12205d4d57c22652628ecc2adf1
snapshot badge
swh:1:snp:4e3e7077647a709f15b8c1b32ce7100175d0580b
release badge
swh:1:rel:275523de3ea9a3d1640ec8f4c4e1eb78650318f8

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
  • revision
  • snapshot
  • release
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Tip revision: 23c6d135ae81f12205d4d57c22652628ecc2adf1 authored by Jean Kossaifi on 06 January 2017, 13:34:08 UTC
Better github ribbon (especially for mobiles)
Tip revision: 23c6d13
comment_eater.py
from __future__ import division, absolute_import, print_function

import sys
if sys.version_info[0] >= 3:
    from io import StringIO
else:
    from io import StringIO

import compiler
import inspect
import textwrap
import tokenize

from .compiler_unparse import unparse


class Comment(object):
    """ A comment block.
    """
    is_comment = True
    def __init__(self, start_lineno, end_lineno, text):
        # int : The first line number in the block. 1-indexed.
        self.start_lineno = start_lineno
        # int : The last line number. Inclusive!
        self.end_lineno = end_lineno
        # str : The text block including '#' character but not any leading spaces.
        self.text = text

    def add(self, string, start, end, line):
        """ Add a new comment line.
        """
        self.start_lineno = min(self.start_lineno, start[0])
        self.end_lineno = max(self.end_lineno, end[0])
        self.text += string

    def __repr__(self):
        return '%s(%r, %r, %r)' % (self.__class__.__name__, self.start_lineno,
            self.end_lineno, self.text)


class NonComment(object):
    """ A non-comment block of code.
    """
    is_comment = False
    def __init__(self, start_lineno, end_lineno):
        self.start_lineno = start_lineno
        self.end_lineno = end_lineno

    def add(self, string, start, end, line):
        """ Add lines to the block.
        """
        if string.strip():
            # Only add if not entirely whitespace.
            self.start_lineno = min(self.start_lineno, start[0])
            self.end_lineno = max(self.end_lineno, end[0])

    def __repr__(self):
        return '%s(%r, %r)' % (self.__class__.__name__, self.start_lineno,
            self.end_lineno)


class CommentBlocker(object):
    """ Pull out contiguous comment blocks.
    """
    def __init__(self):
        # Start with a dummy.
        self.current_block = NonComment(0, 0)

        # All of the blocks seen so far.
        self.blocks = []

        # The index mapping lines of code to their associated comment blocks.
        self.index = {}

    def process_file(self, file):
        """ Process a file object.
        """
        if sys.version_info[0] >= 3:
            nxt = file.__next__
        else:
            nxt = file.next
        for token in tokenize.generate_tokens(nxt):
            self.process_token(*token)
        self.make_index()

    def process_token(self, kind, string, start, end, line):
        """ Process a single token.
        """
        if self.current_block.is_comment:
            if kind == tokenize.COMMENT:
                self.current_block.add(string, start, end, line)
            else:
                self.new_noncomment(start[0], end[0])
        else:
            if kind == tokenize.COMMENT:
                self.new_comment(string, start, end, line)
            else:
                self.current_block.add(string, start, end, line)

    def new_noncomment(self, start_lineno, end_lineno):
        """ We are transitioning from a noncomment to a comment.
        """
        block = NonComment(start_lineno, end_lineno)
        self.blocks.append(block)
        self.current_block = block

    def new_comment(self, string, start, end, line):
        """ Possibly add a new comment.

        Only adds a new comment if this comment is the only thing on the line.
        Otherwise, it extends the noncomment block.
        """
        prefix = line[:start[1]]
        if prefix.strip():
            # Oops! Trailing comment, not a comment block.
            self.current_block.add(string, start, end, line)
        else:
            # A comment block.
            block = Comment(start[0], end[0], string)
            self.blocks.append(block)
            self.current_block = block

    def make_index(self):
        """ Make the index mapping lines of actual code to their associated
        prefix comments.
        """
        for prev, block in zip(self.blocks[:-1], self.blocks[1:]):
            if not block.is_comment:
                self.index[block.start_lineno] = prev

    def search_for_comment(self, lineno, default=None):
        """ Find the comment block just before the given line number.

        Returns None (or the specified default) if there is no such block.
        """
        if not self.index:
            self.make_index()
        block = self.index.get(lineno, None)
        text = getattr(block, 'text', default)
        return text


def strip_comment_marker(text):
    """ Strip # markers at the front of a block of comment text.
    """
    lines = []
    for line in text.splitlines():
        lines.append(line.lstrip('#'))
    text = textwrap.dedent('\n'.join(lines))
    return text


def get_class_traits(klass):
    """ Yield all of the documentation for trait definitions on a class object.
    """
    # FIXME: gracefully handle errors here or in the caller?
    source = inspect.getsource(klass)
    cb = CommentBlocker()
    cb.process_file(StringIO(source))
    mod_ast = compiler.parse(source)
    class_ast = mod_ast.node.nodes[0]
    for node in class_ast.code.nodes:
        # FIXME: handle other kinds of assignments?
        if isinstance(node, compiler.ast.Assign):
            name = node.nodes[0].name
            rhs = unparse(node.expr).strip()
            doc = strip_comment_marker(cb.search_for_comment(node.lineno, default=''))
            yield name, rhs, doc

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API