build_docs.py
# Copyright 2022 The GPflow Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Code for building GPflow's documentation for a specified branch.
"""
import argparse
import shutil
import subprocess
from itertools import chain
from pathlib import Path
from time import perf_counter
from typing import Collection, Optional
from generate_module_rst import generate_module_rst
from tabulate import tabulate
from versions import Branch
import gpflow
_SRC = Path(__file__).parent
_SPHINX_SRC = _SRC / "sphinx"
_NOTEBOOKS_SRC = _SPHINX_SRC / "notebooks"
_TMP = Path("/tmp/gpflow_build_docs")
_BUILD_TMP = _TMP / "build"
_NOTEBOOKS_TMP = _BUILD_TMP / "notebooks"
_DOCTREE_TMP = _TMP / "doctree"
class ShardingStrategy:
"""
Strategy for how to shard (split) the work.
"""
def __init__(self, spec: str) -> None:
"""
Valid ``spec``\s are:
- ``no``: No sharding will happen, and a single run of this script does all necessary work.
- ``<i>/<n>``, where 0 <= i < n: Build a subset of notebooks, corresponding to job ``i`` out
of ``n``.
- ``collect``: Collect data generated by previous shards, and finish the work.
"""
self.spec = spec
if spec == "no":
self.setup_tmp = True
self.build_notebooks = True
self.build_other = True
self.shard_i = 0
self.shard_n = 1
elif spec == "collect":
self.setup_tmp = False
self.build_notebooks = False
self.build_other = True
self.shard_i = 0
self.shard_n = 1
else:
i_str, n_str = spec.split("/")
self.setup_tmp = False
self.build_notebooks = True
self.build_other = False
self.shard_i = int(i_str)
self.shard_n = int(n_str)
assert 0 <= self.shard_i < self.shard_n, (self.shard_i, self.shard_n)
def __repr__(self) -> str:
return self.spec
def _create_fake_notebook(
destination_relative_path: Path, limit_notebooks: Collection[str]
) -> None:
limiting_command = f"--limit_notebooks {' '.join(limit_notebooks)}"
print(f"Generating fake, due to {limiting_command}")
destination = _NOTEBOOKS_TMP / destination_relative_path
title = f"Fake {destination.name}"
title_line = "#" * len(title)
destination.write_text(
f"""{title}
{title_line}
Fake {destination.name} due to::
{limiting_command}
"""
)
def _build_notebooks(
limit_notebooks: Optional[Collection[str]], sharding: ShardingStrategy
) -> None:
# Building the notebooks is really slow. Let's time it so we know which notebooks we can /
# should optimise.
timings = []
all_notebooks = sorted(
chain(_NOTEBOOKS_TMP.glob("**/*.pct.py"), _NOTEBOOKS_TMP.glob("**/*.md"))
)
for i, source_path in enumerate(all_notebooks):
before = perf_counter()
print()
print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
print("Building:", source_path)
print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
source_relative_path = source_path.relative_to(_NOTEBOOKS_TMP)
destination_relative_path = source_relative_path
while destination_relative_path.suffix: # .pct.py has several suffixes. Remove all of them.
destination_relative_path = destination_relative_path.with_suffix("")
destination_relative_path = destination_relative_path.with_suffix(".ipynb")
if i % sharding.shard_n != sharding.shard_i:
print("Skipping due to sharding...")
elif limit_notebooks is None or destination_relative_path.stem in limit_notebooks:
subprocess.run(
[
"jupytext",
"--execute",
"--to",
"notebook",
"-o",
str(destination_relative_path),
str(source_relative_path),
],
cwd=_NOTEBOOKS_TMP,
).check_returncode()
else:
_create_fake_notebook(destination_relative_path, limit_notebooks)
after = perf_counter()
timings.append((after - before, source_relative_path))
timings.sort(reverse=True)
print()
print("Notebooks by build-time:")
print(tabulate(timings, headers=["Time", "Notebook"]))
print()
def main() -> None:
parser = argparse.ArgumentParser(description="Build the GPflow documentation.")
parser.add_argument(
"branch",
nargs="?",
default=None,
type=str,
choices=[b.value for b in Branch],
help="Git branch that is currently being built.",
)
parser.add_argument(
"destination",
nargs="?",
default=None,
type=Path,
help="Directory to write docs to.",
)
parser.add_argument(
"--limit_notebooks",
"--limit-notebooks",
type=str,
nargs="*",
help="Only process the notebooks with this base/stem name. Useful when debugging.",
)
parser.add_argument(
"--fail_on_warning",
"--fail-on-warning",
default=False,
action="store_true",
help="If set, crash if there were any warnings while generating documentation.",
)
parser.add_argument(
"--shard",
default=ShardingStrategy("no"),
type=ShardingStrategy,
help=(
"Sharding strategy:"
" If set to 'no' this script performs all necessary work."
" If set to the format <i>/<n>, where 0 <= i < n then this script only computes"
f" notebooks for shard <i> out of <n> shards. This requires that {_TMP} has manually"
" been created, and is empty."
" If set to 'collect' then this script assumes all notebooks already have been"
" computed, using the <i>/<n> commands, and finishes the work."
),
)
args = parser.parse_args()
sharding = args.shard
if sharding.setup_tmp:
shutil.rmtree(_TMP, ignore_errors=True)
_TMP.mkdir(parents=True)
else:
assert _TMP.is_dir()
# Type-ignore below is because the `dirs_exist_ok` parameter was added in Python 3.8, and we
# still support Python 3.7. However, we only build our documentation using Python3.10+, so
# actually this is ok.
shutil.copytree(_SPHINX_SRC, _BUILD_TMP, dirs_exist_ok=True) # type: ignore[call-arg]
if sharding.build_notebooks:
_build_notebooks(args.limit_notebooks, sharding)
if sharding.build_other:
branch = Branch(args.branch)
assert branch, "'branch' command line argument missing."
dest = args.destination
assert dest, "'destination' command line argument missing."
version_dest = dest / branch.version
shutil.rmtree(version_dest, ignore_errors=True)
(_BUILD_TMP / "build_version.txt").write_text(branch.version)
generate_module_rst(gpflow, _BUILD_TMP / "api")
sphinx_commands = [
"sphinx-build",
"-b",
"html",
"-d",
str(_DOCTREE_TMP),
str(_BUILD_TMP),
str(version_dest),
]
if args.fail_on_warning:
sphinx_commands.extend(
[
"-W",
"--keep-going",
]
)
subprocess.run(sphinx_commands).check_returncode()
if __name__ == "__main__":
main()