Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision b4df039c6fe478297e532720e76d1213022410d5 authored by Jesper Nielsen on 26 October 2022, 08:27:38 UTC, committed by GitHub on 26 October 2022, 08:27:38 UTC
Fix mypy error. (#2009)
1 parent dc84ca2
  • Files
  • Changes
  • 03384ba
  • /
  • doc
  • /
  • build_docs.py
Raw File Download
Permalinks

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:b4df039c6fe478297e532720e76d1213022410d5
directory badge Iframe embedding
swh:1:dir:85a70a7795ff96382e80b8173ea324b9cf40d9ed
content badge Iframe embedding
swh:1:cnt:255d8e896c066d58c525932d767d5c3f7cd4a9e6
Citations

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
build_docs.py
# Copyright 2022 The GPflow Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Code for building GPflow's documentation for a specified branch.
"""
import argparse
import json
import shutil
import subprocess
from datetime import datetime
from itertools import chain
from pathlib import Path
from time import perf_counter
from typing import Collection, Optional, Union

from generate_module_rst import generate_module_rst
from tabulate import tabulate
from versions import Branch

import gpflow

_SRC = Path(__file__).parent
_SPHINX_SRC = _SRC / "sphinx"
_NOTEBOOKS_SRC = _SPHINX_SRC / "notebooks"

_TMP = Path("/tmp/gpflow_build_docs")
_BUILD_TMP = _TMP / "build"
_NOTEBOOKS_TMP = _BUILD_TMP / "notebooks"
_DOCTREE_TMP = _TMP / "doctree"


def _post_process_ipynb(ipynb_relative_path: Path) -> None:
    """
    Post processes notebooks, mostly to remove stuff the end-user shouln't see.

    I've kind of got a hunch there should be a prettier way to do this, but this is simple enough
    and it works...
    """
    ipynb_path = _NOTEBOOKS_TMP / ipynb_relative_path

    with open(ipynb_path, "rt") as f_read:
        notebook = json.load(f_read)

    new_cells = []
    execution_count = 0
    hide_input_count = 0
    remove_cell_count = 0

    for cell in notebook["cells"]:
        remove_cell = False

        if cell["cell_type"] == "code":
            print("--------------------------------------------------")
            print("".join(cell["source"]))
            execution_times = cell["metadata"]["execution"]
            start = datetime.fromisoformat(execution_times["iopub.execute_input"][:-1])
            end = datetime.fromisoformat(execution_times["iopub.status.idle"][:-1])
            print("Execution time:", end - start)

            hiding: Union[bool, int] = False
            source = cell["source"]
            new_source = []
            for line in source:
                if line.startswith("#"):
                    tokens = line[1:].split()
                    if tokens[0] == "hide:":
                        if tokens[1] == "begin":
                            assert not hiding, "Missing: # hide: end"
                            hide_input_count += 1
                            hiding = True
                        else:
                            assert hiding, "Missing: # hide: begin"
                            assert tokens[1] == "end"
                            hiding = 1
                    if tokens[0] == "remove-cell":
                        remove_cell = True
                elif line.startswith("_ = "):
                    line = line[4:]

                if not hiding:
                    new_source.append(line)
                elif not isinstance(hiding, bool):
                    hiding -= 1

            assert not hiding, "Missing: # hide: end"

            while new_source and new_source[0].strip() == "":
                new_source.pop(0)
            while new_source and new_source[-1].strip() == "":
                new_source.pop()
                while new_source and new_source[-1][-1] == "\n":
                    new_source[-1] = new_source[-1][:-1]

            cell["source"] = new_source

            if not new_source:
                remove_cell = True

            if not remove_cell:
                # Fix execution counts that may have been distorted by 'remove-cell':
                execution_count += 1
                cell["execution_count"] = execution_count

        if remove_cell:
            print("Removing cell")
            remove_cell_count += 1
        else:
            new_cells.append(cell)

    if hide_input_count > 0:
        print(f"Removed {hide_input_count} sections tagged with `# hide`.")
    if remove_cell_count > 0:
        print(f"Removed {remove_cell_count} cells tagged with `# remove-cell`.")

    notebook["cells"] = new_cells
    with open(ipynb_path, "wt") as f_write:
        json.dump(notebook, f_write, indent=1)


class ShardingStrategy:
    """
    Strategy for how to shard (split) the work.
    """

    def __init__(self, spec: str) -> None:
        """
        Valid ``spec``\s are:

        - ``no``: No sharding will happen, and a single run of this script does all necessary work.
        - ``<i>/<n>``, where 0 <= i < n: Build a subset of notebooks, corresponding to job ``i`` out
          of ``n``.
        - ``collect``: Collect data generated by previous shards, and finish the work.
        """
        self.spec = spec
        if spec == "no":
            self.setup_tmp = True
            self.build_notebooks = True
            self.build_other = True
            self.shard_i = 0
            self.shard_n = 1
        elif spec == "collect":
            self.setup_tmp = False
            self.build_notebooks = False
            self.build_other = True
            self.shard_i = 0
            self.shard_n = 1
        else:
            i_str, n_str = spec.split("/")
            self.setup_tmp = False
            self.build_notebooks = True
            self.build_other = False
            self.shard_i = int(i_str)
            self.shard_n = int(n_str)
        assert 0 <= self.shard_i < self.shard_n, (self.shard_i, self.shard_n)

    def __repr__(self) -> str:
        return self.spec


def _create_fake_notebook(
    destination_relative_path: Path, limit_notebooks: Collection[str]
) -> None:
    limiting_command = f"--limit_notebooks {' '.join(limit_notebooks)}"
    print(f'Generating fake, due to: "{limiting_command}"')

    destination = _NOTEBOOKS_TMP / destination_relative_path
    title = f"Fake {destination.name}"
    title_line = "#" * len(title)

    destination.write_text(
        f"""{title}
{title_line}

Fake {destination.name} due to::

   {limiting_command}
"""
    )


def _build_notebooks(
    limit_notebooks: Optional[Collection[str]], sharding: ShardingStrategy
) -> None:
    # Building the notebooks is really slow. Let's time it so we know which notebooks we can /
    # should optimise.
    timings = []
    all_notebooks = sorted(
        chain(_NOTEBOOKS_TMP.glob("**/*.pct.py"), _NOTEBOOKS_TMP.glob("**/*.md"))
    )
    for i, source_path in enumerate(all_notebooks):
        before = perf_counter()
        print()
        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
        print("Building:", source_path)
        print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")

        source_relative_path = source_path.relative_to(_NOTEBOOKS_TMP)
        destination_relative_path = source_relative_path
        while destination_relative_path.suffix:  # .pct.py has several suffixes. Remove all of them.
            destination_relative_path = destination_relative_path.with_suffix("")
        destination_relative_path = destination_relative_path.with_suffix(".ipynb")

        if i % sharding.shard_n != sharding.shard_i:
            print("Skipping due to sharding...")
        elif limit_notebooks is None or destination_relative_path.stem in limit_notebooks:
            subprocess.run(
                [
                    "jupytext",
                    "--execute",
                    "--to",
                    "notebook",
                    "-o",
                    str(destination_relative_path),
                    str(source_relative_path),
                ],
                cwd=_NOTEBOOKS_TMP,
            ).check_returncode()
            _post_process_ipynb(destination_relative_path)
        else:
            _create_fake_notebook(destination_relative_path, limit_notebooks)

        after = perf_counter()
        timings.append((after - before, source_relative_path))

    timings.sort(reverse=True)
    print()
    print("Notebooks by build-time:")
    print(tabulate(timings, headers=["Time", "Notebook"]))
    print()


def main() -> None:
    parser = argparse.ArgumentParser(description="Build the GPflow documentation.")
    parser.add_argument(
        "branch",
        nargs="?",
        default=None,
        type=str,
        choices=[b.value for b in Branch],
        help="Git branch that is currently being built.",
    )
    parser.add_argument(
        "destination",
        nargs="?",
        default=None,
        type=Path,
        help="Directory to write docs to.",
    )
    parser.add_argument(
        "--limit_notebooks",
        "--limit-notebooks",
        type=str,
        nargs="*",
        help="Only process the notebooks with this base/stem name. Useful when debugging.",
    )
    parser.add_argument(
        "--fail_on_warning",
        "--fail-on-warning",
        default=False,
        action="store_true",
        help="If set, crash if there were any warnings while generating documentation.",
    )
    parser.add_argument(
        "--shard",
        default=ShardingStrategy("no"),
        type=ShardingStrategy,
        help=(
            "Sharding strategy:"
            " If set to 'no' this script performs all necessary work."
            " If set to the format <i>/<n>, where 0 <= i < n then this script only computes"
            f" notebooks for shard <i> out of <n> shards. This requires that {_TMP} has manually"
            " been created, and is empty."
            " If set to 'collect' then this script assumes all notebooks already have been"
            " computed, using the <i>/<n> commands, and finishes the work."
        ),
    )
    args = parser.parse_args()
    sharding = args.shard

    if sharding.setup_tmp:
        shutil.rmtree(_TMP, ignore_errors=True)
        _TMP.mkdir(parents=True)
    else:
        assert _TMP.is_dir()

    # Type-ignore below is because the `dirs_exist_ok` parameter was added in Python 3.8, and we
    # still support Python 3.7. However, we only build our documentation using Python3.10+, so
    # actually this is ok.
    # pylint: disable=unexpected-keyword-arg
    shutil.copytree(_SPHINX_SRC, _BUILD_TMP, dirs_exist_ok=True)  # type: ignore[call-arg]
    # pylint: enable=unexpected-keyword-arg

    if sharding.build_notebooks:
        _build_notebooks(args.limit_notebooks, sharding)

    if sharding.build_other:
        branch = Branch(args.branch)
        assert branch, "'branch' command line argument missing."
        dest = args.destination
        assert dest, "'destination' command line argument missing."
        version_dest = dest / branch.version
        shutil.rmtree(version_dest, ignore_errors=True)

        (_BUILD_TMP / "build_version.txt").write_text(branch.version)
        generate_module_rst(gpflow, _BUILD_TMP / "api")

        sphinx_commands = [
            "sphinx-build",
            "-b",
            "html",
            "-d",
            str(_DOCTREE_TMP),
            str(_BUILD_TMP),
            str(version_dest),
        ]
        if args.fail_on_warning:
            sphinx_commands.extend(
                [
                    "-W",
                    "--keep-going",
                ]
            )

        subprocess.run(sphinx_commands).check_returncode()


if __name__ == "__main__":
    main()
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Contact— JavaScript license information— Web API

back to top