https://github.com/sagemathinc/cocalc
Raw File
Tip revision: bedca719f64c004e6969642b70d05deda828da90 authored by Harald Schilly on 06 May 2022, 10:49:02 UTC
frontend/latex: introduce support for !TeX cocalc = [hardcoded build command]
Tip revision: bedca71
workspaces.py
#!/usr/bin/env python3
"""
PURPOSE: Automate building, installing, and publishing our modules.
This is like a little clone of "lerna" for our purposes.

NOTES:
 - We cannot run "npm ci" in parallel across modules, since we're using workspaces,
   and doing several npm ci at once totally breaks npm.  Of course, it also makes
   it difficult to understand error messages too.
 - Similar for "npm run build" in parallel -- it subtly breaks.

TEST:
 - This should always work:  "mypy workspaces.py"
"""

import argparse, json, os, platform, shutil, subprocess, sys, time

from typing import Any, Optional, Callable, List

MAX_PACKAGE_LOCK_SIZE_MB = 5


def newest_file(path: str) -> str:
    if platform.system() != 'Darwin':
        # See https://gist.github.com/brwyatt/c21a888d79927cb476a4 for this Linux
        # version:
        cmd = 'find . -type f -printf "%C@ %p\n" | sort -rn | head -n 1 | cut -d" " -f2'
    else:
        # but we had to rewrite this as suggested at
        # https://unix.stackexchange.com/questions/272491/bash-error-find-printf-unknown-primary-or-operator
        # etc to work on MacOS.
        cmd = 'find . -type f -print0 | xargs -0r stat -f "%Fc %N" | sort -rn | head -n 1 | cut -d" " -f2'
    return os.popen(f'cd "{path}" && {cmd}').read().strip()


SUCCESSFUL_BUILD = ".successful-build"


def needs_build(package: str) -> bool:
    # Code below was hopelessly naive, e.g, a failed build would not get retried.
    # We only need to do a build if the newest file in the tree is not
    # in the dist directory.
    path = os.path.join(os.path.dirname(__file__), package)
    newest = newest_file(path)
    return not newest.startswith('./' + SUCCESSFUL_BUILD)


def handle_path(s: str,
                path: Optional[str] = None,
                verbose: bool = True) -> None:
    desc = s
    if path is not None:
        os.chdir(path)
        desc += " # in '%s'" % path
    if verbose:
        print(desc)


def cmd(s: str, path: Optional[str] = None, verbose: bool = True) -> None:
    home: str = os.path.abspath(os.curdir)
    try:
        handle_path(s, path, verbose)
        if os.system(s):
            raise RuntimeError("Error executing '%s'" % s)
    finally:
        os.chdir(home)


def run(s: str, path: Optional[str] = None, verbose: bool = True) -> str:
    home = os.path.abspath(os.curdir)
    try:
        handle_path(s, path, verbose)
        a = subprocess.run(s, shell=True, stdout=subprocess.PIPE)
        out = a.stdout.decode('utf8')
        if a.returncode:
            raise RuntimeError("Error executing '%s'" % s)
        return out
    finally:
        os.chdir(home)


def thread_map(callable: Callable,
               inputs: List[Any],
               nb_threads: int = 10) -> List:
    if len(inputs) == 0:
        return []
    if nb_threads == 1:
        return [callable(x) for x in inputs]
    from multiprocessing.pool import ThreadPool
    tp = ThreadPool(nb_threads)
    return tp.map(callable, inputs)


def all_packages() -> List[str]:
    # Compute all the packages.  Explicit order in some cases *does* matter as noted in comments.
    v = [
        'packages/cdn',  # packages/hub assumes this is built
        'packages/util',
        'packages/sync',
        'packages/backend',
        'packages/hub',
        'packages/frontend',
        'packages/project',
        'packages/assets',
        'packages/server',  # packages/next assumes this is built
        'packages/database',  # packages/next also assumes this is built
    ]
    for x in os.listdir('packages'):
        path = os.path.join("packages", x)
        if path not in v and os.path.isdir(path) and os.path.exists(
                os.path.join(path, 'package.json')):
            v.append(path)
    return v


def packages(args) -> List[str]:
    v = all_packages()
    # Filter to only the ones in packages (if given)
    if args.packages:
        packages = set(args.packages.split(','))
        v = [x for x in v if x.split('/')[-1] in packages]

    # Only take things not in exclude
    if args.exclude:
        exclude = set(args.exclude.split(','))
        v = [x for x in v if x.split('/')[-1] not in exclude]

    print("Packages: ", ', '.join(v))
    return v


def package_json(package: str) -> dict:
    return json.loads(open(f'{package}/package.json').read())


def write_package_json(package: str, x: dict) -> None:
    open(f'{package}/package.json', 'w').write(json.dumps(x, indent=2))


def dependent_packages(package: str) -> List[str]:
    # Get a list of the packages
    # it depends on by reading package.json
    x = package_json(package)
    if "workspaces" not in x:
        # no workspaces
        return []
    v: List[str] = []
    for path in x["workspaces"]:
        # path is a relative path
        npath = os.path.normpath(os.path.join(package, path))
        if npath != package:
            v.append(npath)
    return v


def get_package_version(package: str) -> str:
    return package_json(package)["version"]


def get_package_npm_name(package: str) -> str:
    return package_json(package)["name"]


def update_dependent_versions(package: str) -> None:
    """
    Update the versions of all of the workspaces that this
    package depends on.  The versions are set to whatever the
    current version is in the dependent packages package.json.

    There is a problem here, if you are publishing two
    packages A and B with versions vA and vB.  If you first publish
    A, then you set it as depending on B@vB.  However, when you then
    publish B you set its new version as vB+1, so A got published
    with the wrong version.  It's thus important to first
    update all the versions of the packages that will be published
    in a single phase, then update the dependent version numbers, and
    finally actually publish the packages to npm.  There will unavoidably
    be an interval of time when some of the packages are impossible to
    install (e.g., because A got published and depends on B@vB+1, but B
    isn't yet published).
    """
    x = package_json(package)
    changed = False
    for dependent in dependent_packages(package):
        print(f"Considering '{dependent}'")
        try:
            package_version = '^' + get_package_version(dependent)
        except:
            print(f"Skipping '{dependent}' since package not available")
            continue
        npm_name = get_package_npm_name(dependent)
        dev = npm_name in x.get("devDependencies", {})
        if dev:
            current_version = x.get("devDependencies", {}).get(npm_name, '')
        else:
            current_version = x.get("dependencies", {}).get(npm_name, '')
        # print(dependent, npm_name, current_version, package_version)
        if current_version != package_version:
            print(
                f"{package}: {dependent} changed from '{current_version}' to '{package_version}'"
            )
            x['devDependencies' if dev else 'dependencies'][
                npm_name] = package_version
            changed = True
    if changed:
        write_package_json(package, x)


def update_all_dependent_versions() -> None:
    for package in all_packages():
        update_dependent_versions(package)


def banner(s: str) -> None:
    print("\n" + "=" * 70)
    print("|| " + s)
    print("=" * 70 + "\n")


def ci(args) -> None:
    v = packages(args)
    # First do npm ci not in parallel (which doesn't work with workspaces):
    for path in v:
        cmd("npm ci", path)


# Build all the packages that need to be built.
def build(args) -> None:
    v = [package for package in packages(args) if needs_build(package)]
    CUR = os.path.abspath('.')

    def f(path: str) -> None:
        if not args.parallel and path != 'packages/static':
            # NOTE: in parallel mode we don't delete or there is no
            # hope of this working.
            dist = os.path.join(CUR, path, 'dist')
            if os.path.exists(dist):
                # clear dist/ dir
                shutil.rmtree(dist)
        package_path = os.path.join(CUR, path)
        cmd("npm run build", package_path)
        # The build succeeded, so touch a file
        # to indicate this, so we won't build again
        # until something is newer than this file
        cmd("touch " + SUCCESSFUL_BUILD, package_path)

    if args.parallel:
        thread_map(f, v)
    else:
        thread_map(f, v, 1)


def clean(args) -> None:
    v = packages(args)

    if args.dist_only:
        folders = ['dist']
    elif args.node_modules_only:
        folders = ['node_modules']
    else:
        folders = ['node_modules', 'dist']

    paths = []
    for path in v:
        for x in folders:
            y = os.path.abspath(os.path.join(path, x))
            if os.path.exists(y):
                paths.append(y)

    def f(path):
        print("rm -rf '%s'" % path)
        shutil.rmtree(path)

    if (len(paths) == 0):
        banner("No node_modules or dist directories")
    else:
        banner("Deleting " + ', '.join(paths))
        thread_map(f, paths, nb_threads=10)

    banner("Running 'npm run clean' if it exists...")

    def g(path):
        cmd("npm run clean --if-present", path)

    thread_map(g, [os.path.abspath(path) for path in v],
               nb_threads=3 if args.parallel else 1)


def delete_package_lock(args) -> None:

    def f(path: str) -> None:
        p = os.path.join(path, 'package-lock.json')
        if os.path.exists(p):
            os.unlink(p)

    thread_map(f, [os.path.abspath(path) for path in packages(args)],
               nb_threads=10)


def npm(args) -> None:
    v = packages(args)
    inputs: List[List[str]] = []
    for path in v:
        s = 'npm ' + ' '.join(['%s' % x for x in args.args])
        inputs.append([s, os.path.abspath(path)])

    def f(args) -> None:
        cmd(*args)

    if args.parallel:
        thread_map(f, inputs, 3)
    else:
        thread_map(f, inputs, 1)


def version_check(args):
    ensure_package_lock_isnt_huge()
    cmd("scripts/check_npm_packages.py")


NEVER = '0000000000'


def last_commit_when_version_changed(package: str) -> str:
    return run('git blame package.json |grep \'  "version":\'', package,
               False).split()[0]


def package_status(args, package: str) -> None:
    commit = last_commit_when_version_changed(package)
    print("\nPackage:", package)
    sys.stdout.flush()
    if commit == NEVER:
        print("never committed")
        return
    cmd("git diff  --name-status %s ." % commit, package, False)


def package_diff(args, package: str) -> None:
    commit = last_commit_when_version_changed(package)
    print("\nPackage:", package)
    sys.stdout.flush()
    if commit == NEVER:
        print("never committed")
        return
    cmd("git diff  %s ." % commit, package, False)


# Returns true if the package version in package.json if different
# from what was last committed to git.  More precisely, the 'version:'
# line has changed since the last git commit.
def package_version_is_modified_from_last_git_commit(package: str) -> bool:
    # If the version: line in package.json has changed since the
    # last commit (or never commited), then git says the commit
    # where it changed is '0000000000'.   We thus bump the version
    # precisely when the commit where that line last changed is
    # something other than '0000000000'.
    return run("git blame package.json|grep '\"version\":'",
               package).startswith(NEVER)


# Increase the package version, unless it has already
# changed from what it was when the package.json file
# was last commited to git.  NOTE: we're comparing to
# the last git commit, NOT what is published on npmjs since:
#   - it is slow to get that info from npmjs
#   - it's difficult to deal with tags there
def bump_package_version_if_necessary(package: str, newversion: str) -> None:
    print(f"Check if we need to bump version of {package}")
    if package_version_is_modified_from_last_git_commit(package):
        print(f"No, version of {package} already changed")
        return
    print(f"Yes, bumping version of {package} via {newversion}")
    cmd(f"npm --no-git-tag-version version {newversion}", package)


# Once, probably due to circular dependencies (not sure) a package-lock.json
# file jumped from 1.5MB to 50MB-75MB in size!  Sadly nobody noticed for a bit, and
# this big package-lock got commited forever to our repositor :-(.  Thus
# we often check that no package lock files have blown up.
def ensure_package_lock_isnt_huge(package: str = '') -> None:
    if not package:
        for pkg in all_packages():
            ensure_package_lock_isnt_huge(pkg)
        return

    lock = f'{package}/package-lock.json'
    if os.path.getsize(lock) > 1000000 * MAX_PACKAGE_LOCK_SIZE_MB:
        raise RuntimeError(
            f"{lock} is HUGE! Refusing to do anything further.  Please investigate."
        )


def publish_package(args, package: str) -> None:
    print("\nPackage:", package)
    sys.stdout.flush()
    ensure_package_lock_isnt_huge(package)

    if not package_version_is_modified_from_last_git_commit(package):
        print(
            f"WARNING: You *might* need to first run update-version for '{package}', or somehow update the version in {package}/package.json."
        )
    # Do the build
    #  First ensure BASE_PATH is not set; we only want to publish to
    #  npm with no custom base path.
    if 'BASE_PATH' in os.environ:
        del os.environ['BASE_PATH']
    cmd("npm run build", package)
    # And now publish it:
    if args.tag:
        cmd(f"npm publish --tag {args.tag}", package)
    else:
        cmd("npm publish", package)
    try:
        cmd(
            f"git commit -v . -m 'Publish new version of package {package} to npmjs package repo.'",
            package)
    except:
        print(f"Didn't commit {package}; this may be fine.")
    cmd("git pull && git push")


def status(args) -> None:
    for package in packages(args):
        package_status(args, package)


def diff(args) -> None:
    for package in packages(args):
        package_diff(args, package)


def update_version(args) -> None:
    if not args.newversion:
        raise RuntimeError(
            "newversion must be specified (e.g. 'patch', 'minor', 'major')")

    # First we bump the package versions, if necessary
    print("Updating versions if necessary...")
    for package in packages(args):
        bump_package_version_if_necessary(package, args.newversion)


def publish(args) -> None:
    # We first update all the explicit workspace version dependencies.
    # I.e., we make it so all the @cocalc/packagename:"^x.y.z" lines
    # in package.json are correct and reflect the versions of our packages here.
    print("Updating dependent versions...")
    for package in packages(args):
        update_dependent_versions(package)

    # Finally, we build and publish all of our packages to npm.
    for package in packages(args):
        publish_package(args, package)


def node_version_check() -> None:
    version = int(os.popen('node --version').read().split('.')[0][1:])
    if version < 14:
        err = f"CoCalc requires node.js v14, but you're using node v{version}."
        if os.environ.get("COCALC_USERNAME",
                          '') == 'user' and 'COCALC_PROJECT_ID' in os.environ:
            err += '\nIf you are using https://cocalc.com, put ". /cocalc/nvm/nvm.sh" in ~/.bashrc\nto get an appropriate version of node.'
        raise RuntimeError(err)


def main() -> None:
    node_version_check()

    def packages_arg(parser):
        parser.add_argument(
            '--packages',
            type=str,
            default='',
            help=
            '(default: ""=everything) "foo,bar" means only the packages named foo and bar'
        )
        parser.add_argument(
            '--exclude',
            type=str,
            default='',
            help=
            '(default: ""=exclude nothing) "foo,bar" means exclude foo and bar'
        )
        parser.add_argument(
            '--parallel',
            action="store_const",
            const=True,
            help=
            'if given, do all in parallel; this will not work in some cases and may be ignored in others'
        )

    parser = argparse.ArgumentParser(prog='workspaces')
    subparsers = parser.add_subparsers(help='sub-command help')

    subparser = subparsers.add_parser('ci',
                                      help='install deps for all modules')
    packages_arg(subparser)
    subparser.set_defaults(func=ci)

    subparser = subparsers.add_parser('build',
                                      help='build all modules (use ci first)')
    packages_arg(subparser)
    subparser.set_defaults(func=build)

    subparser = subparsers.add_parser(
        'clean', help='delete dist and node_modules folders')
    packages_arg(subparser)
    subparser.add_argument('--dist-only',
                           action="store_const",
                           const=True,
                           help="only delete dist directory")
    subparser.add_argument('--node-modules-only',
                           action="store_const",
                           const=True,
                           help="only delete node_modules directory")
    subparser.set_defaults(func=clean)

    subparser = subparsers.add_parser(
        'delete-package-lock',
        help='delete package lock files so they can be recreated')
    packages_arg(subparser)
    subparser.set_defaults(func=delete_package_lock)

    subparser = subparsers.add_parser(
        'npm', help='do "npm ..." in each package; e.g., use for "npm ci"')
    packages_arg(subparser)
    subparser.add_argument('args',
                           type=str,
                           nargs='*',
                           default='',
                           help='arguments to npm')
    subparser.set_defaults(func=npm)

    subparser = subparsers.add_parser(
        'version-check', help='version consistency checks across packages')
    subparser.set_defaults(func=version_check)

    subparser = subparsers.add_parser(
        'status', help='files changed in package since last version change')
    packages_arg(subparser)
    subparser.set_defaults(func=status)

    subparser = subparsers.add_parser(
        'diff', help='diff in package since last version change')
    packages_arg(subparser)
    subparser.set_defaults(func=diff)

    subparser = subparsers.add_parser('update-version',
                                      help='update version of packages')
    packages_arg(subparser)
    subparser.add_argument(
        "--newversion",
        type=str,
        help=
        "major | minor | patch | premajor | preminor | prepatch | prerelease")
    subparser.set_defaults(func=update_version)

    subparser = subparsers.add_parser(
        'publish',
        help=
        'publish to npm and commit and changes to git in directory containing the package.   You must call update-version for the package(s) you wish to publish first, or manually edit package.json.'
    )
    packages_arg(subparser)
    subparser.add_argument(
        "--tag",
        type=str,
        help=
        "Registers the published package with the given tag, such that npm install <name>@<tag> will install this version."
    )
    subparser.set_defaults(func=publish)

    args = parser.parse_args()
    if hasattr(args, 'func'):
        args.func(args)


if __name__ == '__main__':
    main()
back to top