Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 3233d06e85a9aa04fc362201f8154552d6b5b308 authored by Ramy-Badr-Ahmed on 24 July 2024, 16:59:37 UTC, committed by GitHub on 24 July 2024, 16:59:37 UTC
Merge pull request #1 from physikerwelt/patch-1
Fix typo in SWH_TOKEN_PROD
2 parent s 8d1239c + 4531537
  • Files
  • Changes
  • 1c4ca38
  • /
  • Module
  • /
  • Archival
  • /
  • Archive.php
Raw File Download
Permalinks

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:3233d06e85a9aa04fc362201f8154552d6b5b308
directory badge Iframe embedding
swh:1:dir:77b913a739ce71ab20cd69fd7f5a82b14eda145e
content badge Iframe embedding
swh:1:cnt:df5b2c6d932185885320f4ade2153c7f26d79c77
Citations

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Archive.php
<?php

/**
 * @Author: Ramy-Badr-Ahmed
 * @Desc: SWH API Client
 * @Repo: https://github.com/Ramy-Badr-Ahmed/swh-client
 */

namespace Module\Archival;


use Error;
use Module\Repositories\Repository;
use Exception;
use Illuminate\Http\Client\RequestException;
use Illuminate\Support\Collection;
use Module\DAGModel\GraphTraversal;
use Module\DataType\SwhCoreID;
use Module\Globals\Formatting;
use Module\Globals\Helper;
use Module\HTTPConnector\SyncHTTP;
use Module\OriginVisits\SwhOrigins;
use Module\OriginVisits\SwhVisits;
use stdClass;
use Throwable;
use TypeError;
use UnhandledMatchError;

class Archive extends SyncHTTP implements SwhArchive
{
    public const SUPPORTED_OPTIONS = ['withHeaders', 'distinct', 'withTracking'];
    private const FULL_VISIT = 'full';
    private const NOT_FOUND_VISIT = 'not_found';
    private const ARCHIVAL_SUCCEEDED = 'succeeded';
    private const ARCHIVAL_FAILED = 'failed';
    public array $decomposedURL = [];
    public array $nodeHits = [];
    protected stdClass $swhIDs;
    protected SwhVisits $visitObject;
    protected SwhOrigins $originObject;
    protected Archivable $archivable;


    /**
     * @param string $url
     * @param string|null $visitType
     * @param ...$options
     * @throws Exception|UnhandledMatchError
     */
    public function __construct(public string $url, public ?string $visitType = null, ...$options)
    {
        Repository::analysis($this->url, $this->visitType, $this->nodeHits, $this->decomposedURL);

        $this->archivable = new Archivable($this->url, $this->visitType);

        $this->visitObject = new SwhVisits($this->url);
        $this->originObject = new SwhOrigins($this->url);

        parent::__construct();

        self::setOptions(...$options);
    }

    /**
     * @throws Exception|UnhandledMatchError
     */
    public static function repository(string $url, ?string $visitType = NULL, ...$flags): iterable|Collection|stdClass|Throwable // Archive::repository($url)
    {
        $newArchival = new self($url, $visitType, ...$flags);

        $currentResponseType = $newArchival::$responseType;
        $newArchival::$responseType = $newArchival::RESPONSE_TYPE_ARRAY;

        $archivalInitialResponse = $newArchival->save2Swh();

        if($archivalInitialResponse instanceof Throwable){
            return $archivalInitialResponse;
        }

        return $flags['withTracking'] ?? false
            ? Formatting::reCastTo($newArchival->trackArchivalStatus($archivalInitialResponse['id']), $currentResponseType)
            : $archivalInitialResponse;
    }

    /**
     * @param string $url
     * @return Archivable
     * @throws Exception
     */
    public static function of(string $url): Archivable
    {
        $archive = new self($url);

        return new Archivable($archive->url, $archive->visitType);
    }

    /**
     * @param ...$flags
     * @return iterable|Collection|stdClass|Throwable
     */
    public function save2Swh(...$flags): iterable|Collection|stdClass|Throwable
    {
        $responseType = self::$responseType;
        try{
            Helper::validateOptions($flags);

            $responseSave = $this->invokeEndpoint("POST",'save', collect([$this->url, $this->visitType]), ...$flags);

            if($responseSave instanceof Throwable){
                return $responseSave;
            }

            return $flags['withHeaders'] ?? false
                ? collect(["response"=>$responseSave->$responseType(), "headers" => $responseSave->headers()])
                : $responseSave->$responseType();

        }catch (RequestException $e){
            $this->addErrors($e->getCode().": " . match ($e->response->status()){
                400 => "An invalid Visit Type or URL: ". $this->visitType ." <--> ". $this->url,
                403 => "Origin URL: ". $this->url ." is black listed in SWH",
                default => $e->response->json()['reason'] ?? $e->response->body()
            });
            return $e;
        }catch(Exception $e){
            $this->addErrors($e->getCode().": ".$e->getMessage());
            return $e;
        }
    }


    /**
     * @param string|int $saveRequestDateOrID
     * @param ...$flags
     * @return iterable|Collection|stdClass|Throwable
     */
    public function getArchivalStatus(string|int $saveRequestDateOrID, ...$flags): iterable|Collection|stdClass|Throwable
    {
        try {
            $archivalRequest = $this->archivable->getFullArchivalRequest($saveRequestDateOrID, ...$flags);

            if ($archivalRequest instanceof Throwable) {
                return $archivalRequest;
            }

            $archivalRequest = Formatting::reCastTo($archivalRequest, self::RESPONSE_TYPE_ARRAY);

            if ($archivalRequest['save_task_status'] === self::ARCHIVAL_SUCCEEDED) {

                $traverseToDirectory = GraphTraversal::traverseFromSnp(new SwhCoreID($archivalRequest['snapshot_swhid']), $this->nodeHits);

                if($traverseToDirectory instanceof Throwable){
                    return $traverseToDirectory;
                }

                $originID = $this->originObject->getOriFromURL();

                $this->swhIDs = Helper::object_merge($originID instanceof SwhCoreID ? $originID : new stdClass(), $traverseToDirectory);

                $archivalRequest['swh_id_list'] = $this->swhIDs;

                $archivalRequest['contextual_swh_ids'] = Formatting::getContexts($this->swhIDs, $this->url, $this->nodeHits["path"] ?? null);
            }
            return $archivalRequest['save_task_status'] === self::ARCHIVAL_FAILED
                ? throw new Exception("Archival has failed with id: {$archivalRequest['id']} and save_request_date: {$archivalRequest['save_request_date']}", 55)
                : Formatting::reCastTo($archivalRequest, self::$responseType);

        }catch (TypeError|Exception|Error $e){
            $this->addErrors($e->getCode().": ".$e->getMessage());
            return $e;
        }
    }

    /**
     * @param string|int $saveRequestDateOrID
     * @param ...$flags
     * @return iterable|Collection|stdClass|Throwable
     */
    public function trackArchivalStatus(string|int $saveRequestDateOrID, ...$flags): iterable|Collection|stdClass|Throwable
    {
        do{
            $archivalRequest = $this->getArchivalStatus($saveRequestDateOrID, ...$flags);

            if ($archivalRequest instanceof Throwable) {
                if(is_a($archivalRequest, TypeError::class)){
                    $done = false;
                    continue;
                }
                else{
                    return $archivalRequest;
                }
            }
            $archivalRequest = Formatting::reCastTo($archivalRequest, self::RESPONSE_TYPE_ARRAY);
            self::addLogs("\tRequest Status --> ". $archivalRequest['save_request_status']);
            self::addLogs("\tTask Status --> ". $archivalRequest['save_task_status']);
            self::addLogs("\tVisit Status --> ". $archivalRequest['visit_status']);

            $done = $archivalRequest['save_task_status'] === self::ARCHIVAL_SUCCEEDED;
            self::addLogs("Done --> ".var_export($done, true)."\n");
        }while(!$done);

        return Formatting::reCastTo($archivalRequest, self::$responseType);
    }

    /**
     * @param string|int $saveRequestDateOrID
     * @return SwhCoreID|Throwable|Null
     */
    public function getSnpFromSaveRequest(string|int $saveRequestDateOrID): SwhCoreID|Null|Throwable
    {
        try {
            $archivalRequest = $this->archivable->getFullArchivalRequest($saveRequestDateOrID);

            if($archivalRequest instanceof Throwable){
                return $archivalRequest;
            }

            $archivalRequest = Formatting::reCastTo($archivalRequest, self::RESPONSE_TYPE_ARRAY);

            return $archivalRequest['save_task_status'] === self::ARCHIVAL_SUCCEEDED && $archivalRequest['visit_status'] === self::FULL_VISIT
                ? new SwhCoreID($archivalRequest['snapshot_swhid'])
                : $this->archivable->getSnpFromSaveRequestID($archivalRequest['id']);

        }catch (TypeError|Exception $e){
            $this->addErrors($e->getMessage());
            return $e;
        }
    }

    /**
     * @param ...$flags
     * @return iterable|Collection|stdClass|Throwable
     */
    public function getLatestArchivalAttempt(...$flags) : iterable|Collection|stdClass|Throwable
    {
        try {
            $latestVisit = $this->visitObject->getVisit("latest", ...$flags);

            if($latestVisit instanceof Throwable){
                return $latestVisit;
            }
            $latestVisit = Formatting::reCastTo($latestVisit, self::RESPONSE_TYPE_ARRAY);

            if($latestVisit['status'] === self::NOT_FOUND_VISIT){
                throw new Exception("Failed archival attempt for URL: $this->url");
            }
            $visitSnapshot = new SwhCoreID(Formatting::formatSwhIDs(Formatting::SWH_SNAPSHOT, $latestVisit['snapshot']));
            $visitDate = $latestVisit["date"];

            $allArchives = $this->archivable->getAllArchives();

            if($allArchives instanceof Throwable){
                return $allArchives;
            }
            $matchingArchival = Helper::grabMatching($allArchives, $visitDate);

            $traverseToDirectory = GraphTraversal::traverseFromSnp($visitSnapshot, $this->nodeHits);

            $originID = $this->originObject->getOriFromURL();

            $this->swhIDs = Helper::object_merge($originID instanceof SwhCoreID ? $originID : new stdClass(), $traverseToDirectory);

            $matchingArchival['swh_id_list'] = $this->swhIDs;
            $matchingArchival['contextual_swh_ids'] = Formatting::getContexts($this->swhIDs, $this->url, $this->nodeHits["path"] ?? null);

            return Formatting::reCastTo($matchingArchival, self::$responseType);

        }catch (TypeError|Exception $e){
            $this->addErrors($e->getCode().": ".$e->getMessage());
            return $e;
        }
    }
}
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Contact— JavaScript license information— Web API