Revision 8e21d46af73cb0cef0e42f20d6bf1e2736f89c13 authored by Marge Bot on 13 May 2022, 09:40:30 UTC, committed by Marge Bot on 13 May 2022, 09:40:30 UTC
CI: generate opam-ci.yml statically

See merge request tezos/tezos!5251
2 parent s 849a044 + 0d8e73a
Raw File
dep_graph.ml
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2018 Dynamic Ledger Solutions, Inc. <contact@tezos.com>     *)
(* Copyright (c) 2018 Nomadic Labs. <contact@nomadic-labs.com>               *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

(* Models associated to benchmarks have free variables. Some of these
   variables are to be inferred from the empirical data, but some others
   must be provided by other models and correspond to _dependencies_
   of a model upon the result of another one.

   The goal of this module is to take as input a set of models seen as
   sets of free variables and infer back a partial dependency ordering
   among them. In particular, a topological sort of this partial ordering
   yields a scheduling for the inference process that respects cross-model
   dependencies.

   Such a problem does not always have a solution, or can have several
   solutions (ie it is in general ill-posed). When there's more than
   one possible solution, we use a simple heuristic to pick one.
*)

module Fv_map = Free_variable.Map
module Fv_set = Free_variable.Set
module Fv_set_set = Set.Make (Free_variable.Set)

module Directed_graph = Graph.Imperative.Digraph.Concrete (struct
  type t = string

  let hash = Hashtbl.hash

  let equal = String.equal

  let compare = String.compare
end)

module Directed_graph_with_attributes = struct
  include Directed_graph

  let edge_attributes _ = []

  let default_edge_attributes _ = []

  let vertex_attributes s = [`Label (String.escaped s)]

  let default_vertex_attributes _ = []

  let graph_attributes _ = []

  let get_subgraph _ = None

  let vertex_name s = String.escaped s
end

module G = Directed_graph_with_attributes

(* Topological sort *)
module T = Graph.Topological.Make (G)

(* Graphviz output *)
module D = Graph.Graphviz.Dot (G)

module Solver = struct
  (* We proceed iteratively on a set of _nodes_.

     Nodes correspond to sets of free variables.

     A node is _solved_ when its variables can be partitioned in two subsets:
     - the set of _dependencies_
     - the set of _provided_ variables

     A node is _unsolved_ when this partition cannot be decided yet:
     - some free variables are kwown to be _dependencies_
     - some other are still undecided.

     A node is _redundant_ when it is solved and its set of _provided_ variables
     is empty. *)

  type 'a meta = {data : 'a; uid : int}

  type 'a unsolved = {
    dependencies : Fv_set.t;
    undecided_variables : Fv_set.t;
    meta : 'a meta;
  }

  type 'a solved = {
    dependencies : Fv_set.t;
    provides : Fv_set.t;
    meta : 'a meta;
  }

  type 'a node =
    | Solved of 'a solved
    | Redundant of 'a solved
    | Unsolved of 'a unsolved

  type 'meta state = {
    solved : 'meta solved list;
    unsolved : 'meta unsolved list;
  }

  let empty_state = {solved = []; unsolved = []}

  let force_solved {dependencies; undecided_variables; meta} =
    {dependencies; provides = undecided_variables; meta}

  let pp_list =
    Format.pp_print_list
      ~pp_sep:(fun fmtr () -> Format.fprintf fmtr ",")
      Free_variable.pp

  let pp_node fmtr (from, to_) =
    Format.fprintf
      fmtr
      "%a -> %a"
      pp_list
      (Fv_set.elements from)
      pp_list
      (Fv_set.elements to_)

  (* Sets free variable [v] to be 'solved' in node [n] *)
  let set_variable_as_solved (n : 'a unsolved) (v : Free_variable.t) =
    if not (Fv_set.mem v n.undecided_variables) then Unsolved n
    else
      let undecided = Fv_set.remove v n.undecided_variables in
      let deps = Fv_set.add v n.dependencies in
      let card = Fv_set.cardinal undecided in
      if card = 0 then
        Redundant {dependencies = deps; provides = Fv_set.empty; meta = n.meta}
      else if card = 1 then
        let () = Format.eprintf "Solved: %a@." pp_node (deps, undecided) in
        (* If there's only one variable left in [undecided], it must
           in fact be constrained by the model and becomes [provided]. *)
        Solved {dependencies = deps; provides = undecided; meta = n.meta}
      else
        Unsolved
          {dependencies = deps; undecided_variables = undecided; meta = n.meta}

  let rec propagate_solved state (n : 'a solved) solved_but_not_propagated =
    let (solved_but_not_propagated, unsolved) =
      List.fold_left
        (fun (solved_acc, unsolved_acc) unsolved ->
          Fv_set.fold
            (fun provided_var (solved_acc, unsolved_acc) ->
              let node = set_variable_as_solved unsolved provided_var in
              match node with
              | Redundant node | Solved node ->
                  (node :: solved_acc, unsolved_acc)
              | Unsolved node -> (solved_acc, node :: unsolved_acc))
            n.provides
            (solved_acc, unsolved_acc))
        (solved_but_not_propagated, [])
        state.unsolved
    in
    let state = {solved = n :: state.solved; unsolved} in
    propagate_solved_loop state solved_but_not_propagated

  and propagate_solved_loop state solved_but_not_propagated =
    match solved_but_not_propagated with
    | [] -> state
    | solved :: solved_list -> propagate_solved state solved solved_list

  let solve {solved; unsolved} =
    assert (solved = []) ;
    let (roots, others) =
      List.partition
        (fun (node : 'a unsolved) ->
          Fv_set.is_empty node.dependencies
          && Fv_set.cardinal node.undecided_variables = 1)
        unsolved
    in
    (* Set the roots as solved. *)
    let roots =
      List.map
        (fun root ->
          {
            dependencies = Fv_set.empty;
            provides = root.undecided_variables;
            meta = root.meta;
          })
        roots
    in
    List.iter
      (fun {provides; _} ->
        Format.eprintf
          "Root: %a@."
          Free_variable.pp
          (WithExceptions.Option.get ~loc:__LOC__ (Fv_set.choose provides)))
      roots ;
    (* Propagate iteratively. *)
    let state = {solved = []; unsolved = others} in
    propagate_solved_loop state roots

  let solve ~force state =
    let least_constrained = solve state in
    match state.unsolved with
    | [] -> least_constrained
    | _ ->
        if force then (
          Format.eprintf
            "Dep_graph.Solver.solve: forcing remaining unconstrained variables \
             as solved.@." ;
          List.iter
            (fun {dependencies; undecided_variables; _} ->
              Format.eprintf
                "Forced: %a@."
                pp_node
                (dependencies, undecided_variables))
            least_constrained.unsolved ;
          let set_solved = List.map force_solved least_constrained.unsolved in
          {solved = least_constrained.solved @ set_solved; unsolved = []})
        else
          Stdlib.failwith
            "Dep_graph.Solver.solve: state is not completely solved, \
             aborting.@."

  let unsolved_of_fvs =
    let c = ref 0 in
    fun fvs data ->
      let uid = !c in
      incr c ;
      {
        dependencies = Fv_set.empty;
        undecided_variables = fvs;
        meta = {data; uid};
      }

  let add_node state fvs data =
    let node = unsolved_of_fvs fvs data in
    {state with unsolved = node :: state.unsolved}
end

module Hashtbl = Stdlib.Hashtbl

let pp_print_set fmtr (set : Free_variable.Set.t) =
  let elts = Free_variable.Set.elements set in
  Format.fprintf fmtr "{ " ;
  Format.pp_print_list
    ~pp_sep:(fun fmtr () -> Format.fprintf fmtr ";")
    Free_variable.pp
    fmtr
    elts ;
  Format.fprintf fmtr " }"

let pp_print_set_set fmtr (set_set : Fv_set_set.t) =
  let elts = Fv_set_set.elements set_set in
  Format.fprintf fmtr "{ " ;
  Format.pp_print_list
    ~pp_sep:(fun fmtr () -> Format.fprintf fmtr ";")
    pp_print_set
    fmtr
    elts ;
  Format.fprintf fmtr " }"

let get_free_variables (type workload) (model : workload Model.t)
    (workload : workload) : Free_variable.Set.t =
  let applied = Model.apply model workload in
  let module M = (val applied) in
  let module T0 = Costlang.Fold_constants (Costlang.Free_variables) in
  let module T1 = Costlang.Beta_normalize (T0) in
  let module R = M (T1) in
  T0.prj @@ T1.prj R.applied

let add_names (state : string Solver.state) (filename : string)
    (names : Free_variable.Set.t) : string Solver.state =
  Format.eprintf "for %s, adding names %a@." filename pp_print_set names ;
  Solver.add_node state names filename

exception
  Variable_solved_by_several_datasets of {
    free_var : Free_variable.t;
    filename : string;
    other_file : string;
  }

exception Missing_file_for_free_variable of {free_var : Free_variable.t}

let () =
  Printexc.register_printer (function
      | Variable_solved_by_several_datasets {free_var; filename; other_file} ->
          let error =
            Format.asprintf
              "Variable %a has conflicting constraints from datasets %s and %s.\n\
               Try to remove one?\n"
              Free_variable.pp
              free_var
              filename
              other_file
          in
          Some error
      | Missing_file_for_free_variable {free_var} ->
          let error =
            Format.asprintf
              "Bug found: variable %a is not associated to any dataset. Please \
               report.\n"
              Free_variable.pp
              free_var
          in
          Some error
      | _ -> None)

let to_graph (solved : string Solver.solved list) =
  let len = List.length solved in
  let g = G.create ~size:len () in
  let solved_to_file =
    List.fold_left
      (fun map {Solver.provides; meta; dependencies} ->
        Fv_set.fold
          (fun free_var map ->
            match Fv_map.find free_var map with
            | None ->
                Format.eprintf
                  "%s is set as data source to solve %a@."
                  meta.data
                  Free_variable.pp
                  free_var ;
                Fv_map.add free_var (meta.data, dependencies) map
            | Some (other_file, other_deps) ->
                Format.eprintf
                  "%s is a potential alternative dataset to %s for %a@."
                  meta.data
                  other_file
                  pp_print_set
                  provides ;
                let this_card = Fv_set.cardinal dependencies in
                let other_card = Fv_set.cardinal other_deps in
                if this_card < other_card then (
                  Format.eprintf
                    "Picking new dataset as it induces lower-dimensional \
                     problem@." ;
                  Fv_map.add free_var (meta.data, dependencies) map)
                else (
                  Format.eprintf
                    "Keeping former dataset as it induces lower-dimensional \
                     problem@." ;
                  map))
          provides
          map)
      Fv_map.empty
      solved
  in
  List.iter
    (fun {Solver.dependencies; meta; _} ->
      if Fv_set.is_empty dependencies then G.add_vertex g meta.data
      else
        Fv_set.iter
          (fun dep ->
            match Fv_map.find dep solved_to_file with
            | None -> raise (Missing_file_for_free_variable {free_var = dep})
            | Some (dep_file, _) -> G.add_edge g dep_file meta.data)
          dependencies)
    solved ;
  g

let find_model_or_generic model_name model_list =
  match List.assoc_opt ~equal:String.equal model_name model_list with
  | None -> List.assoc_opt ~equal:String.equal "*" model_list
  | res -> res

let load_files (model_name : string) (files : string list) =
  (* Use a table to store loaded measurements *)
  let table = Hashtbl.create 51 in
  let prune filename =
    (* We assume filenames are of the form <dir>/<name>.workload, where <dir>
       is common amongst all files. This function extracts only the <name> component,
       and raises an exception if the suffix does not match.
    *)
    Filename.basename filename
    |> Filename.chop_suffix_opt ~suffix:".workload"
    |> WithExceptions.Option.get ~loc:__LOC__
  in
  let state =
    List.fold_left
      (fun graph filename ->
        let filename_short = prune filename in
        let measurement = Measure.load ~filename in
        match measurement with
        | Tezos_benchmark.Measure.Measurement ((module Bench), m) -> (
            match find_model_or_generic model_name Bench.models with
            | None -> graph
            | Some model ->
                let () =
                  Format.eprintf "Loading %s in dependency graph@." filename
                in
                Hashtbl.add table filename_short measurement ;
                let names =
                  List.fold_left
                    (fun acc {Measure.workload; _} ->
                      let names = get_free_variables model workload in
                      Free_variable.Set.union names acc)
                    Free_variable.Set.empty
                    m.Measure.workload_data
                in
                add_names graph filename_short names))
      Solver.empty_state
      files
  in
  let state = Solver.solve ~force:true state in
  (to_graph state.solved, table)
back to top