Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 736f37ee72597e8653b2d26e1cd09f0af60fd2aa authored by Jan-Michael Rye on 27 November 2023, 19:19:43 UTC, committed by Jan-Michael Rye on 27 November 2023, 19:19:43 UTC
Restrict CI to main branch
1 parent 9920265
  • Files
  • Changes
  • 611f8bc
  • /
  • src
  • /
  • mlflow_extra
  • /
  • metadata.py
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:736f37ee72597e8653b2d26e1cd09f0af60fd2aa
directory badge
swh:1:dir:ebeb7281ef8cd79cb40c65d44d154d07fe369cb8
content badge
swh:1:cnt:c5bd881e3ab2458639717483bfb1c052736bcf7d

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
metadata.py
#!/usr/bin/env python3
'''
Find and manipulate MLflow metadata files.
'''

import logging
import os
import pathlib
import shutil
import tempfile

import yaml

from mlflow_extra.uri import URITransformer

LOGGER = logging.getLogger(__name__)


class Metadata():
    '''
    Metadata wrapper.
    '''
    FILENAME = 'meta.yaml'

    FIELD_ARTIFACT_LOCATION = 'artifact_location'  # experiment metadata
    FIELD_ARTIFACT_URI = 'artifact_uri'  # run metadata
    FIELD_CREATION_TIME = 'creation_time'
    FIELD_EXPERIMENT_ID = 'experiment_id'
    FIELD_LAST_UPDATE_TIME = 'last_update_time'
    FIELD_LIFECYCLE_STAGE = 'lifecycle_stage'
    FIELD_NAME = 'name'

    def __init__(self, path=None, data=None):
        '''
        Args:
            path:
                The path to a metadata file, or a directory that contains one.
                If not None, then the data will be loaded from the path unless
                data is also passed via the data parameter.

            data:
                The data. If not None, then the path will not be loaded.
        '''
        if path is not None:
            path = pathlib.Path(path).resolve()
            if path.is_dir():
                path /= self.FILENAME
        self.path = path

        self.data = {} if data is None else data

        if self.path and data is None:
            self.load()

    def load(self, path=None):
        '''
        Load data from a YAML file.

        Args:
            path:
                The filepath. If None, the current path will be used.
        '''
        path = self.path if path is None else pathlib.Path(path).resolve()
        if path is None:
            raise ValueError('no path set')
        if self.path is None:
            self.path = path
        with path.open('rb') as handle:
            self.data = yaml.safe_load(handle)

    def save(self, path=None):
        '''
        Save data to a YAML file.

        Args:
            path:
                The filepath. If None, the current path will be used.
        '''
        # Attempt to write to a temporary file first to ensure that the metadata
        # file is not destroyed if the data cannot be converted to YAML.
        with tempfile.TemporaryDirectory() as tmp_dir:
            tmp_dir = pathlib.Path(tmp_dir)
            tmp_path = tmp_dir / self.FILENAME
            with tmp_path.open('w', encoding='utf-8') as handle:
                yaml.dump(self.data, handle)
            path = self.path if path is None else pathlib.Path(path).resolve()
            path.parent.mkdir(parents=True, exist_ok=True)
            LOGGER.debug('Saving data to %s', path)
            shutil.move(tmp_path, path)

    @staticmethod
    def non_negative_int(value):
        '''
        Check that the given value is a non-negative integer.

        Args:
            value:
                The value to check.

        Returns:
            The value as an int.

        Raises:
            ValueError:
                The value could not be converted to a non-negative int.
        '''
        value = int(value)
        if value < 0:
            raise ValueError('negative value')
        return value

    def _get_experiment_id(self):
        return self.data.get(self.FIELD_EXPERIMENT_ID)

    def _set_experiment_id(self, value):
        self.data[self.FIELD_EXPERIMENT_ID] = self.non_negative_int(value)

    experiment_id = property(
        fget=_get_experiment_id,
        fset=_set_experiment_id,
        doc='The experiment ID'
    )

    def _get_name(self):
        return self.data.get(self.FIELD_NAME)

    def _set_name(self, value):
        value = str(value)
        if not value:
            raise ValueError('empty name')
        self.data[self.FIELD_NAME] = value

    name = property(
        fget=_get_name,
        fset=_set_name,
        doc='The name'
    )

    def _get_creation_time(self):
        return self.data.get(self.FIELD_CREATION_TIME)

    def _set_creation_time(self, value):
        self.data[self.FIELD_CREATION_TIME] = self.non_negative_int(value)

    creation_time = property(
        fget=_get_creation_time,
        fset=_set_creation_time,
        doc='The creation time'
    )

    def _get_last_update_time(self):
        return self.data.get(self.FIELD_LAST_UPDATE_TIME)

    def _set_last_update_time(self, value):
        self.data[self.FIELD_LAST_UPDATE_TIME] = self.non_negative_int(value)

    last_update_time = property(
        fget=_get_last_update_time,
        fset=_set_last_update_time,
        doc='The last update time'
    )

    def _get_lifecycle_stage(self):
        return self.data.get(self.FIELD_LIFECYCLE_STAGE)

    def _set_lifecycle_stage(self, value):
        value = str(value)
        if not value:
            raise ValueError('empty lifecycle_stage')
        self.data[self.FIELD_LIFECYCLE_STAGE] = value

    lifecycle_stage = property(
        fget=_get_lifecycle_stage,
        fset=_set_lifecycle_stage,
        doc='The lifecycle stage'
    )

    @property
    def is_experiment(self):
        '''
        True if the current metadata appears to be for an experiment and not a
        run.
        '''
        return self.FIELD_ARTIFACT_LOCATION in self.data

    @property
    def _artifact_uri_field(self):
        '''
        Get the artifact URI field or equivalent depending on the type of file.
        '''
        return self.FIELD_ARTIFACT_LOCATION \
            if self.FIELD_ARTIFACT_LOCATION in self.data \
            else self.FIELD_ARTIFACT_URI

    def _get_artifact_uri(self):
        return URITransformer(self.data.get(self._artifact_uri_field))

    def _set_artifact_uri(self, value):
        if not isinstance(value, URITransformer):
            value = URITransformer(value)
        if not value.parts[2]:
            raise ValueError('empty artifact path')
        self.data[self._artifact_uri_field] = str(value)

    artifact_uri = property(
        fget=_get_artifact_uri,
        fset=_set_artifact_uri,
        doc='The artifact URI or equivalent'
    )

    def find_meta(self, path):
        '''
        Find all MLflow metadata files in the given directory.

        Args:
            path:
                The directory path.

        Returns:
            A generator over the metadata file paths as pathlib.Path objects.
        '''
        path = pathlib.Path(path).resolve()
        for root, _dirs, files in os.walk(path):
            if self.FILENAME in files:
                yield self.__class__(path=root)
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API