Content - dd4a6c2ad8afd6dbffd2c762828ce7aaeb15982f - 791af92/src/lib_shell_benchmarks/io_benchmarks.ml

visit type:
Tip revision: 676e8dd5c02ce4156f6e9d3d00a34423f86b5bee authored by Gauthier SEBILLE on 03 April 2024, 09:41:51 UTC
Tezt: update expected of views.ml
Tip revision: 676e8dd
io_benchmarks.ml
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2020-2021 Nomadic Labs. <contact@nomadic-labs.com>          *)
(* Copyright (c) 2023 Marigold <contact@marigold.dev>                        *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

open Benchmarks_shell
module Context = Tezos_protocol_environment.Context
module Shell_monad = Tezos_error_monad.Error_monad
module Key_map = Io_helpers.Key_map

let purpose =
  Benchmark.Other_purpose "Measuring the time to access context file system"

let ns = Namespace.make Shell_namespace.ns "io"

let ns2 s1 s2 = Namespace.(cons (cons (Shell_namespace.ns "io") s1) s2)

let fv s1 s2 = Free_variable.of_namespace (ns2 s1 s2)

let read_model ~name =
  Model.bilinear_affine
    ~name:(ns2 name "read_model")
    ~intercept:(fv name "read_latency")
    ~coeff1:(fv name "depth")
    ~coeff2:(fv name "storage_bytes_read")

let write_model ~name =
  Model.bilinear_affine
    ~name:(ns2 name "write_model")
    ~intercept:(fv name "write_latency")
    ~coeff1:(fv name "keys_written")
    ~coeff2:(fv name "storage_bytes_write")

module Helpers = struct
  (* Samples keys in an alphabet of [card] elements. *)
  let sample_key ~card =
    assert (card > 0) ;
    let i = string_of_int (Random.int card) in
    "key" ^ i

  let random_key rng_state ~card ~depth =
    let depth = Base_samplers.sample_in_interval rng_state ~range:depth in
    Stdlib.List.init depth (fun _ -> sample_key ~card)

  (* Initializes a context by setting random bytes for each key in the
     given [key_set]. *)
  let random_contents rng_state base_dir index context key_set commit_batch_size
      =
    let open Lwt_syntax in
    let* index, context, _ =
      Key_map.fold_lwt
        (fun path size (index, context, current_commit_batch_size) ->
          let* context =
            Io_helpers.initialize_key rng_state context path size
          in
          if current_commit_batch_size < commit_batch_size then
            Lwt.return (index, context, current_commit_batch_size + 1)
          else
            (* save and proceed with fresh diff *)
            let* context, index =
              Io_helpers.commit_and_reload base_dir index context
            in
            Lwt.return (index, context, 0))
        key_set
        (index, context, 0)
    in
    Io_helpers.commit_and_reload base_dir index context

  let random_key_set rng_state ~depth ~key_card ~insertions =
    let rec loop remaining acc =
      if remaining = 0 then acc
      else
        let key = random_key rng_state ~card:key_card ~depth in
        match Key_map.does_not_collide key acc with
        | `Key_exists | `Key_has_prefix | `Key_has_suffix -> loop remaining acc
        | `Key_does_not_collide ->
            let size = 1000 in
            let acc = Key_map.insert key size acc in
            loop (remaining - 1) acc
    in
    let initial =
      let key = random_key rng_state ~card:key_card ~depth in
      let size = 1000 in
      Key_map.insert key size Key_map.empty
    in
    loop insertions initial

  let prepare_random_context rng_state base_dir commit_batch_size keys =
    let context_hash =
      Io_helpers.assert_ok ~msg:"Io_helpers.prepare_empty_context"
      @@ Lwt_main.run (Io_helpers.prepare_empty_context base_dir)
    in
    let context, index =
      Io_helpers.load_context_from_disk base_dir context_hash
    in
    Lwt_main.run
      (let open Lwt_syntax in
      let* context, index =
        random_contents rng_state base_dir index context keys commit_batch_size
      in
      Io_helpers.commit_and_reload base_dir index context)
end

module Context_size_dependent_shared = struct
  (* ----------------------------------------------------------------------- *)
  (* Config *)

  open Base_samplers

  type config = {
    depth : range;
    storage_chunk_bytes : int;
    storage_chunks : range;
    insertions : range;
    key_card : int;
    commit_batch_size : int;
    temp_dir : string option;
  }

  (* This config creates:
     - 1 target file of at most 1MB
     - At most 65536 files of 1KB

     - Files are scattered in directories of depth 10 to 1000
     - Commit for each 10_000 file additions

     In total, 66.5MB of max file contents. Produces a context file of 2GB.
  *)
  let default_config =
    {
      depth = {min = 10; max = 1000};
      storage_chunk_bytes = 1000;
      storage_chunks = {min = 10; max = 1000};
      insertions = {min = 100; max = 65536};
      key_card = 16;
      commit_batch_size = 10_000;
      temp_dir = None;
    }

  let config_encoding =
    let open Data_encoding in
    let int = int31 in
    conv
      (fun {
             depth;
             storage_chunk_bytes;
             storage_chunks;
             insertions;
             key_card;
             commit_batch_size;
             temp_dir;
           } ->
        ( depth,
          storage_chunk_bytes,
          storage_chunks,
          insertions,
          key_card,
          commit_batch_size,
          temp_dir ))
      (fun ( depth,
             storage_chunk_bytes,
             storage_chunks,
             insertions,
             key_card,
             commit_batch_size,
             temp_dir ) ->
        {
          depth;
          storage_chunk_bytes;
          storage_chunks;
          insertions;
          key_card;
          commit_batch_size;
          temp_dir;
        })
      (obj7
         (req "depth" range_encoding)
         (req "storage_chunk_bytes" int)
         (req "storage_chunks" range_encoding)
         (req "insertions" range_encoding)
         (req "key_card" int)
         (req "commit_batch_size" int)
         (opt "temp_dir" string))

  let rec sample_accessed_key rng_state cfg keys =
    let key =
      Helpers.random_key rng_state ~card:cfg.key_card ~depth:cfg.depth
    in
    match Key_map.does_not_collide key keys with
    | `Key_exists | `Key_has_prefix | `Key_has_suffix ->
        sample_accessed_key rng_state cfg keys
    | `Key_does_not_collide ->
        let size =
          Base_samplers.sample_in_interval rng_state ~range:cfg.storage_chunks
          * cfg.storage_chunk_bytes
        in
        (key, size)

  type workload =
    | Random_context_random_access of {
        depth : int;
        storage_bytes : int;
        context_size : int;
      }

  let workload_encoding =
    let open Data_encoding in
    conv
      (fun (Random_context_random_access {depth; storage_bytes; context_size}) ->
        (depth, storage_bytes, context_size))
      (fun (depth, storage_bytes, context_size) ->
        Random_context_random_access {depth; storage_bytes; context_size})
      (tup3 int31 int31 int31)

  let workload_to_vector = function
    | Random_context_random_access {depth; storage_bytes; context_size} ->
        let keys =
          [
            ("depth", float_of_int depth);
            ("storage_bytes", float_of_int storage_bytes);
            ("context_size", float_of_int context_size);
          ]
        in
        Sparse_vec.String.of_list keys
end

module Context_size_dependent_read_bench = struct
  (* ----------------------------------------------------------------------- *)
  (* Benchmark def *)

  let name = ns "CONTEXT_SIZE_DEPENDENT_READ"

  let info =
    "Benchmarking the read accesses with contexts of various sizes (with fixed \
     storage size except for the accessed key)"

  let tags = ["io"]

  let module_filename = __FILE__

  let purpose = purpose

  include Context_size_dependent_shared

  let create_benchmark ~rng_state cfg =
    let insertions =
      Base_samplers.sample_in_interval rng_state ~range:cfg.insertions
    in
    let keys =
      Helpers.random_key_set
        rng_state
        ~depth:cfg.depth
        ~key_card:cfg.key_card
        ~insertions
    in
    let random_key, value_size = sample_accessed_key rng_state cfg keys in
    let keys = Key_map.insert random_key value_size keys in
    Format.eprintf "preparing bench: insertions = %d@." insertions ;
    let closure context =
      match
        Lwt_main.run
          (Tezos_protocol_environment.Context.find context random_key)
      with
      | Some _ -> ()
      | None ->
          let s = String.concat "/" random_key in
          Format.eprintf "key %s not found@." s ;
          exit 1
    in
    let workload =
      Random_context_random_access
        {
          depth = List.length random_key;
          storage_bytes = value_size;
          (* context_size !=  insertions, but there should
             be a linear relationship. *)
          context_size = insertions;
        }
    in
    let with_context f =
      let base_dir =
        Filename.temp_file ?temp_dir:cfg.temp_dir (Namespace.basename name) ""
      in
      Io_helpers.prepare_base_dir base_dir ;
      let context, index =
        Helpers.prepare_random_context
          rng_state
          base_dir
          cfg.commit_batch_size
          keys
      in
      let finalizer () =
        Gc.compact () ;
        Lwt_main.run
          (let open Lwt_syntax in
          let* () = Tezos_context.Context.close index in
          Tezos_stdlib_unix.Lwt_utils_unix.remove_dir base_dir)
      in
      let result =
        try f context
        with _ ->
          finalizer () ;
          exit 1
      in
      finalizer () ;
      result
    in
    Generator.With_context {workload; closure; with_context}

  let group = Benchmark.Group "io"

  let model =
    Model.make
      ~conv:(function
        | Random_context_random_access {depth; storage_bytes; _} ->
            (depth, (storage_bytes, ())))
      (read_model ~name:"context_dependent")
end

let () = Registration.register_simple (module Context_size_dependent_read_bench)

module Context_size_dependent_write_bench = struct
  include Context_size_dependent_shared

  (* ----------------------------------------------------------------------- *)
  (* Benchmark def *)

  let name = ns "CONTEXT_SIZE_DEPENDENT_WRITE"

  let info =
    "Benchmarking the write accesses with contexts of various sizes (with \
     fixed storage size except for the written key)"

  let module_filename = __FILE__

  let purpose = purpose

  let tags = ["io"]

  let write_storage context key bytes =
    Lwt_main.run (Tezos_protocol_environment.Context.add context key bytes)

  let group = Benchmark.Group "io"

  let model =
    Model.make
      ~conv:(function
        | Random_context_random_access {depth; storage_bytes; _} ->
            (depth, (storage_bytes, ())))
      (write_model ~name:"context_dependent")

  let create_benchmark ~rng_state cfg =
    let insertions =
      Base_samplers.sample_in_interval rng_state ~range:cfg.insertions
    in
    let keys =
      Helpers.random_key_set
        rng_state
        ~depth:cfg.depth
        ~key_card:cfg.key_card
        ~insertions
    in
    let random_key, value_size = sample_accessed_key rng_state cfg keys in
    Format.eprintf "preparing bench: insertions = %d@." insertions ;
    let closure context =
      Lwt_main.run
        (let open Lwt_syntax in
        let* _ = Io_helpers.commit context in
        Lwt.return_unit)
    in
    let workload =
      Random_context_random_access
        {
          depth = List.length random_key;
          storage_bytes = value_size;
          (* context_size !=  insertions, but there should
             be a linear relationship. *)
          context_size = insertions;
        }
    in
    let with_context f =
      let base_dir =
        Filename.temp_file ?temp_dir:cfg.temp_dir (Namespace.basename name) ""
      in
      Io_helpers.prepare_base_dir base_dir ;
      let context, index =
        Helpers.prepare_random_context
          rng_state
          base_dir
          cfg.commit_batch_size
          keys
      in
      let bytes = Base_samplers.uniform_bytes rng_state ~nbytes:value_size in
      let context = write_storage context random_key bytes in
      let finalizer () =
        Gc.compact () ;
        Lwt_main.run
          (let open Lwt_syntax in
          let* () = Tezos_context.Context.close index in
          Tezos_stdlib_unix.Lwt_utils_unix.remove_dir base_dir)
      in
      let result =
        try f context
        with _ ->
          finalizer () ;
          exit 1
      in
      finalizer () ;
      result
    in
    Generator.With_context {workload; closure; with_context}
end

let () =
  Registration.register_simple (module Context_size_dependent_write_bench)

module Irmin_pack_shared = struct
  open Base_samplers

  type config = {
    depth : range;
    insertions : range;
    key_card : int;
    irmin_pack_max_width : int;
    storage_chunk_bytes : int;
    storage_chunks : range;
    default_storage_bytes : int;
    commit_batch_size : int;
    temp_dir : string option;
  }

  let config_encoding =
    let open Data_encoding in
    let int = int31 in
    conv
      (fun {
             depth;
             insertions;
             key_card;
             irmin_pack_max_width;
             storage_chunk_bytes;
             storage_chunks;
             default_storage_bytes;
             commit_batch_size;
             temp_dir;
           } ->
        ( depth,
          insertions,
          key_card,
          irmin_pack_max_width,
          storage_chunk_bytes,
          storage_chunks,
          default_storage_bytes,
          commit_batch_size,
          temp_dir ))
      (fun ( depth,
             insertions,
             key_card,
             irmin_pack_max_width,
             storage_chunk_bytes,
             storage_chunks,
             default_storage_bytes,
             commit_batch_size,
             temp_dir ) ->
        {
          depth;
          insertions;
          key_card;
          irmin_pack_max_width;
          storage_chunk_bytes;
          storage_chunks;
          default_storage_bytes;
          commit_batch_size;
          temp_dir;
        })
      (obj9
         (req "depth" range_encoding)
         (req "insertions" range_encoding)
         (req "key_card" int)
         (req "irmin_pack_max_width" int)
         (req "storage_chunk_bytes" int)
         (req "storage_chunks" range_encoding)
         (req "default_storage_bytes" int)
         (req "commit_batch_size" int)
         (opt "temp_dir" string))

  (* This config creates:
     - 1 big directory with [256, 8192] items
       - 1 of the item of the big directory is the target file of at most 50KB
       - The other files in the big directory have 1KB each.
     - and at most 65536 files of 1KB each

     - Files and the big directory are scattered in directories of depth 3 to 30.
     - Commit for each 10_000 file additions

     In total, out 73.85MB of file contents. Produces a context file around 2.2GB.
  *)
  let default_config =
    {
      depth = {min = 3; max = 30};
      insertions = {min = 100; max = 65536};
      key_card = 64;
      irmin_pack_max_width = 8192;
      storage_chunk_bytes = 1000;
      storage_chunks = {min = 1; max = 50};
      default_storage_bytes = 1000;
      commit_batch_size = 10_000;
      temp_dir = None;
    }

  let rec sample_irmin_directory_key rng_state (cfg : config) keys =
    let key =
      Helpers.random_key rng_state ~card:cfg.key_card ~depth:cfg.depth
    in
    match Key_map.does_not_collide key keys with
    | `Key_exists | `Key_has_prefix | `Key_has_suffix ->
        sample_irmin_directory_key rng_state cfg keys
    | `Key_does_not_collide -> key

  let irmin_pack_key i = "pack_" ^ string_of_int i

  let sample_irmin_directory rng_state ~cfg ~key_set =
    if cfg.irmin_pack_max_width < 256 then
      Stdlib.failwith
        "Irmin_pack_read_bench: irmin_pack_max_width < 256, invalid \
         configuration"
    else
      let prefix = sample_irmin_directory_key rng_state cfg key_set in
      let dir_width =
        Base_samplers.sample_in_interval
          rng_state
          ~range:{min = 256; max = cfg.irmin_pack_max_width}
      in
      let files_under_big_directory =
        Array.init dir_width (fun i -> prefix @ [irmin_pack_key i])
      in
      (prefix, files_under_big_directory)
end

module Irmin_pack_read_bench = struct
  include Irmin_pack_shared

  let prepare_irmin_directory rng_state ~cfg ~key_set =
    if cfg.irmin_pack_max_width < 256 then
      Stdlib.failwith
        "Irmin_pack_read_bench: irmin_pack_max_width < 256, invalid \
         configuration"
    else
      let _prefix, files_under_big_directory =
        sample_irmin_directory rng_state ~cfg ~key_set
      in
      let dir_width = Array.length files_under_big_directory in
      let target_index = Random.int dir_width in
      let target_key = files_under_big_directory.(target_index) in
      let value_size =
        Base_samplers.sample_in_interval rng_state ~range:cfg.storage_chunks
        * cfg.storage_chunk_bytes
      in
      let key_set =
        let acc = ref key_set in
        Array.iteri
          (fun index key ->
            if index = target_index then
              acc := Key_map.insert key value_size !acc
            else acc := Key_map.insert key cfg.default_storage_bytes !acc)
          files_under_big_directory ;
        !acc
      in
      (target_key, value_size, key_set, files_under_big_directory)

  let name = ns "IRMIN_PACK_READ"

  let info = "Benchmarking read accesses in irmin-pack directories"

  let module_filename = __FILE__

  let purpose = purpose

  let tags = ["io"]

  type workload =
    | Irmin_pack_read of {
        depth : int;
        irmin_width : int;
        storage_bytes : int;
        context_size : int;
      }

  let workload_to_vector = function
    | Irmin_pack_read {depth; irmin_width; storage_bytes; context_size} ->
        let keys =
          [
            ("depth", float_of_int depth);
            ("irmin_width", float_of_int irmin_width);
            ("storage_bytes", float_of_int storage_bytes);
            ("context_size", float_of_int context_size);
          ]
        in
        Sparse_vec.String.of_list keys

  let model =
    Model.make
      ~conv:(function
        | Irmin_pack_read {depth; storage_bytes; _} ->
            (depth, (storage_bytes, ())))
      (read_model ~name:"irmin")

  let group = Benchmark.Group "io"

  let workload_encoding =
    let open Data_encoding in
    conv
      (fun (Irmin_pack_read {depth; irmin_width; storage_bytes; context_size}) ->
        (depth, irmin_width, storage_bytes, context_size))
      (fun (depth, irmin_width, storage_bytes, context_size) ->
        Irmin_pack_read {depth; irmin_width; storage_bytes; context_size})
      (tup4 int31 int31 int31 int31)

  let create_benchmark ~rng_state (cfg : config) =
    let insertions =
      Base_samplers.sample_in_interval rng_state ~range:cfg.insertions
    in
    let keys =
      Helpers.random_key_set
        rng_state
        ~depth:cfg.depth
        ~key_card:cfg.key_card
        ~insertions
    in
    let target_key, value_size, keys, irmin_pack_paths =
      prepare_irmin_directory rng_state ~cfg ~key_set:keys
    in
    let irmin_width = Array.length irmin_pack_paths in
    let stats = Io_stats.tree_statistics keys in
    Format.eprintf
      "preparing bench: insertions = %d, stats = %a@."
      (insertions + irmin_width)
      Io_stats.pp
      stats ;
    let closure context =
      match Lwt_main.run (Context.find context target_key) with
      | Some _ -> ()
      | None ->
          let s = String.concat "/" target_key in
          Format.eprintf "key %s not found@." s ;
          exit 1
    in
    let workload =
      Irmin_pack_read
        {
          depth = List.length target_key;
          irmin_width;
          storage_bytes = value_size;
          context_size = stats.total;
        }
    in
    let with_context f =
      let base_dir =
        Filename.temp_file ?temp_dir:cfg.temp_dir (Namespace.basename name) ""
      in
      Io_helpers.prepare_base_dir base_dir ;
      let context, index =
        Helpers.prepare_random_context
          rng_state
          base_dir
          cfg.commit_batch_size
          keys
      in
      let finalizer () =
        Gc.compact () ;
        Lwt_main.run
          (let open Lwt_syntax in
          let* () = Tezos_context.Context.close index in
          Tezos_stdlib_unix.Lwt_utils_unix.remove_dir base_dir)
      in
      let result =
        try f context
        with _ ->
          finalizer () ;
          exit 1
      in
      finalizer () ;
      result
    in
    Generator.With_context {workload; closure; with_context}
end

let () = Registration.register_simple (module Irmin_pack_read_bench)

module Irmin_pack_write_bench = struct
  include Irmin_pack_shared

  let prepare_irmin_directory rng_state ~cfg ~key_set ~bench_init =
    if cfg.irmin_pack_max_width < 256 then
      Stdlib.failwith
        "Irmin_pack_read_bench: irmin_pack_max_width < 256, invalid \
         configuration"
    else
      let _prefix, directories =
        sample_irmin_directory rng_state ~cfg ~key_set
      in
      let total_keys_in_pack = Array.length directories in
      let number_of_keys_written = Random.int total_keys_in_pack in
      let keys_written_to, keys_not_written_to =
        Io_helpers.sample_without_replacement
          number_of_keys_written
          (Array.to_list directories)
      in
      let key_set =
        (* Initialize keys not written to with random bytes of fixed size *)
        List.fold_left
          (fun key_set key ->
            Key_map.insert key cfg.default_storage_bytes key_set)
          key_set
          keys_not_written_to
      in
      let key_set =
        if bench_init then
          (* If we wish to benchmark writing to fresh keys, we should not
             add the keys written to in the initial context *)
          key_set
        else
          (* Else, if we wish to benchmark overwriting existing keys,
             we initialize them to bytes of fixed size. *)
          List.fold_left
            (fun key_set key ->
              Key_map.insert key cfg.default_storage_bytes key_set)
            key_set
            keys_written_to
      in
      ( number_of_keys_written,
        keys_written_to,
        keys_not_written_to,
        key_set,
        total_keys_in_pack )

  let name = ns "IRMIN_PACK_WRITE"

  let info = "Benchmarking write accesses in irmin-pack directories"

  let module_filename = __FILE__

  let purpose = purpose

  let tags = ["io"]

  type workload =
    | Irmin_pack_write of {
        keys_written : int;
        irmin_width : int;
        storage_bytes : int;
        context_size : int;
      }

  let workload_encoding =
    let open Data_encoding in
    conv
      (fun (Irmin_pack_write
             {keys_written; irmin_width; storage_bytes; context_size}) ->
        (keys_written, irmin_width, storage_bytes, context_size))
      (fun (keys_written, irmin_width, storage_bytes, context_size) ->
        Irmin_pack_write
          {keys_written; irmin_width; storage_bytes; context_size})
      (tup4 int31 int31 int31 int31)

  let workload_to_vector = function
    | Irmin_pack_write {keys_written; irmin_width; storage_bytes; context_size}
      ->
        let keys =
          [
            ("keys_written", float_of_int keys_written);
            ("irmin_width", float_of_int irmin_width);
            ("storage_bytes", float_of_int storage_bytes);
            ("context_size", float_of_int context_size);
          ]
        in
        Sparse_vec.String.of_list keys

  let model =
    Model.make
      ~conv:(function
        | Irmin_pack_write {keys_written; storage_bytes; _} ->
            (keys_written, (storage_bytes, ())))
      (write_model ~name:"irmin")

  let group = Benchmark.Group "io"

  let write_storage context key bytes =
    Lwt_main.run (Context.add context key bytes)

  let create_benchmark ~rng_state (cfg : config) =
    let insertions =
      Base_samplers.sample_in_interval rng_state ~range:cfg.insertions
    in
    let keys =
      Helpers.random_key_set
        rng_state
        ~depth:cfg.depth
        ~key_card:cfg.key_card
        ~insertions
    in
    let ( number_of_keys_written,
          keys_written_to,
          _keys_not_written_to,
          key_set,
          total_keys_in_pack ) =
      prepare_irmin_directory rng_state ~cfg ~key_set:keys ~bench_init:true
    in
    let stats = Io_stats.tree_statistics keys in
    Format.eprintf
      "preparing bench: insertions = %d, stats = %a@."
      (insertions + total_keys_in_pack)
      Io_stats.pp
      stats ;
    let base_dir =
      Filename.temp_file ?temp_dir:cfg.temp_dir (Namespace.basename name) ""
    in
    let value_size =
      Base_samplers.sample_in_interval rng_state ~range:cfg.storage_chunks
      * cfg.storage_chunk_bytes
    in
    let with_context f =
      Io_helpers.prepare_base_dir base_dir ;
      let context, index =
        Helpers.prepare_random_context
          rng_state
          base_dir
          cfg.commit_batch_size
          key_set
      in
      let context =
        List.fold_left
          (fun context key ->
            let bytes =
              Base_samplers.uniform_bytes rng_state ~nbytes:value_size
            in
            write_storage context key bytes)
          context
          keys_written_to
      in
      let finalizer () =
        Gc.compact () ;
        Lwt_main.run
          (let open Lwt_syntax in
          let* () = Tezos_context.Context.close index in
          Tezos_stdlib_unix.Lwt_utils_unix.remove_dir base_dir)
      in
      let result =
        try f context
        with _ ->
          finalizer () ;
          exit 1
      in
      finalizer () ;
      result
    in
    let closure context =
      Lwt_main.run
        (let open Lwt_syntax in
        let* _ = Io_helpers.commit context in
        Lwt.return_unit)
    in
    let workload =
      Irmin_pack_write
        {
          keys_written = number_of_keys_written;
          irmin_width = total_keys_in_pack;
          storage_bytes = value_size;
          context_size = stats.total;
        }
    in
    Generator.With_context {workload; closure; with_context}
end

let () = Registration.register_simple (module Irmin_pack_write_bench)

module Read_random_key_bench = struct
  type config = {
    existing_context : string * Context_hash.t;
    subdirectory : string;
  }

  let default_config =
    {
      existing_context = ("/no/such/directory", Context_hash.zero);
      subdirectory = "/no/such/key";
    }

  let config_encoding =
    let open Data_encoding in
    conv
      (fun {existing_context; subdirectory} -> (existing_context, subdirectory))
      (fun (existing_context, subdirectory) -> {existing_context; subdirectory})
      (obj2
         (req "existing_context" (tup2 string Context_hash.encoding))
         (req "subdirectory" string))

  let name = ns "READ_RANDOM_KEY"

  let info = "Benchmarking random read accesses in a subdirectory"

  let module_filename = __FILE__

  let purpose = purpose

  let tags = ["io"]

  type workload = Read_random_key of {depth : int; storage_bytes : int}

  let workload_encoding =
    let open Data_encoding in
    conv
      (function
        | Read_random_key {depth; storage_bytes} -> (depth, storage_bytes))
      (fun (depth, storage_bytes) -> Read_random_key {depth; storage_bytes})
      (tup2 int31 int31)

  let workload_to_vector = function
    | Read_random_key {depth; storage_bytes} ->
        let keys =
          [
            ("depth", float_of_int depth);
            ("storage_bytes", float_of_int storage_bytes);
          ]
        in
        Sparse_vec.String.of_list keys

  let group = Benchmark.Group "io"

  let model =
    Model.make
      ~conv:(function
        | Read_random_key {depth; storage_bytes} -> (depth, (storage_bytes, ())))
      (read_model ~name:"random")

  let make_bench rng_state config keys () =
    let card = Array.length keys in
    assert (card > 0) ;
    let key, value_size = keys.(Random.State.int rng_state card) in
    let with_context f =
      let context, index =
        let base_dir, context_hash = config.existing_context in
        Io_helpers.load_context_from_disk base_dir context_hash
      in
      let finalizer () =
        Gc.compact () ;
        Lwt_main.run (Tezos_context.Context.close index)
      in
      let result =
        try f context
        with _ ->
          finalizer () ;
          exit 1
      in
      finalizer () ;
      result
    in
    let closure context =
      match Lwt_main.run (Context.find context key) with
      | Some _ -> ()
      | None ->
          let s = String.concat "/" key in
          Format.eprintf "key %s not found@." s ;
          exit 1
    in
    let workload =
      Read_random_key {depth = List.length key; storage_bytes = value_size}
    in
    Generator.With_context {workload; closure; with_context}

  let create_benchmarks ~rng_state ~bench_num config =
    let base_dir, context_hash = config.existing_context in
    (* files under [config.subdirectory] *)
    let tree =
      Io_helpers.with_context ~base_dir ~context_hash (fun context ->
          Io_stats.load_tree context
          @@ Option.value_f ~default:(fun () ->
                 Stdlib.failwith
                   (Format.asprintf
                      "%a: invalid config subdirectory"
                      Namespace.pp
                      name))
          @@ Io_helpers.split_absolute_path config.subdirectory)
    in
    let keys = Array.of_seq @@ Io_helpers.Key_map.to_seq tree in
    List.repeat bench_num (make_bench rng_state config keys)
end

let () = Registration.register_simple_with_num (module Read_random_key_bench)

module Write_random_keys_bench = struct
  open Base_samplers

  type config = {
    existing_context : string * Context_hash.t;
    storage_chunk_bytes : int;
    storage_chunks : range;
    max_written_keys : int;
    temp_dir : string option;
    subdirectory : string;
  }

  let default_config =
    {
      existing_context = ("/no/such/directory", Context_hash.zero);
      storage_chunk_bytes = 1000;
      storage_chunks = {min = 1; max = 1000};
      max_written_keys = 10_000;
      temp_dir = None;
      subdirectory = "/no/such/key";
    }

  let config_encoding =
    let open Data_encoding in
    let int = int31 in
    conv
      (fun {
             existing_context;
             storage_chunk_bytes;
             storage_chunks;
             max_written_keys;
             temp_dir;
             subdirectory;
           } ->
        ( existing_context,
          storage_chunk_bytes,
          storage_chunks,
          max_written_keys,
          temp_dir,
          subdirectory ))
      (fun ( existing_context,
             storage_chunk_bytes,
             storage_chunks,
             max_written_keys,
             temp_dir,
             subdirectory ) ->
        {
          existing_context;
          storage_chunk_bytes;
          storage_chunks;
          max_written_keys;
          temp_dir;
          subdirectory;
        })
      (obj6
         (req "existing_context" (tup2 string Context_hash.encoding))
         (req "storage_chunk_bytes" int)
         (req "storage_chunks" range_encoding)
         (req "max_written_keys" int)
         (req "temp_dir" (option string))
         (req "subdirectory" string))

  let name = ns "WRITE_RANDOM_KEYS"

  let info = "Benchmarking random read accesses in a subdirectory"

  let module_filename = __FILE__

  let purpose = purpose

  let tags = ["io"]

  type workload =
    | Write_random_keys of {keys_written : int; storage_bytes : int}

  let workload_encoding =
    let open Data_encoding in
    conv
      (function
        | Write_random_keys {keys_written; storage_bytes} ->
            (keys_written, storage_bytes))
      (fun (keys_written, storage_bytes) ->
        Write_random_keys {keys_written; storage_bytes})
      (tup2 int31 int31)

  let workload_to_vector = function
    | Write_random_keys {keys_written; storage_bytes} ->
        let keys =
          [
            ("keys_written", float_of_int keys_written);
            ("storage_bytes", float_of_int storage_bytes);
          ]
        in
        Sparse_vec.String.of_list keys

  let group = Benchmark.Group "io"

  let model =
    Model.make
      ~conv:(function
        | Write_random_keys {keys_written; storage_bytes; _} ->
            (keys_written, (storage_bytes, ())))
      (write_model ~name:"random")

  let write_storage context key bytes =
    Lwt_main.run (Context.add context key bytes)

  let make_bench rng_state (cfg : config) (keys : (string list * int) list) () =
    let total_keys_under_directory = List.length keys in
    let number_of_keys_written =
      min
        total_keys_under_directory
        (Random.State.int rng_state cfg.max_written_keys)
    in
    let keys_written_to, _keys_not_written_to =
      Io_helpers.sample_without_replacement number_of_keys_written keys
    in
    let source_base_dir, context_hash = cfg.existing_context in
    let value_size =
      Base_samplers.sample_in_interval rng_state ~range:cfg.storage_chunks
      * cfg.storage_chunk_bytes
    in
    let with_context f =
      let target_base_dir =
        let temp_dir = Option.value cfg.temp_dir ~default:"/tmp" in
        Format.asprintf
          "%s/%s_%d"
          temp_dir
          (Namespace.basename name)
          (Random.int 65536)
      in
      (* copying the original context for EACH test *)
      Io_helpers.copy_rec source_base_dir target_base_dir ;
      Format.eprintf "Finished copying original context to %s@." target_base_dir ;
      let context, index =
        Io_helpers.load_context_from_disk target_base_dir context_hash
      in
      (* overwrite [keys_written_to].  The times of the writes are not measured. *)
      let context =
        List.fold_left
          (fun context (key, _) ->
            let bytes =
              Base_samplers.uniform_bytes rng_state ~nbytes:value_size
            in
            write_storage context key bytes)
          context
          keys_written_to
      in
      let finalizer () =
        Gc.compact () ;
        Lwt_main.run
          (let open Lwt_syntax in
          let* () = Tezos_context.Context.close index in
          Tezos_stdlib_unix.Lwt_utils_unix.remove_dir target_base_dir)
      in
      let result =
        try f context
        with _ ->
          finalizer () ;
          exit 1
      in
      finalizer () ;
      result
    in
    (* This only measure the time to commit *)
    let closure context =
      Lwt_main.run
        (let open Lwt_syntax in
        let* _context_hash = Io_helpers.commit context in
        Lwt.return_unit)
    in
    let workload =
      Write_random_keys
        {keys_written = number_of_keys_written; storage_bytes = value_size}
    in
    Generator.With_context {workload; closure; with_context}

  let create_benchmarks ~rng_state ~bench_num config =
    let base_dir, context_hash = config.existing_context in
    (* files under [config.subdirectory] *)
    let tree =
      Io_helpers.with_context ~base_dir ~context_hash (fun context ->
          Io_stats.load_tree context
          @@ Option.value_f ~default:(fun () ->
                 Stdlib.failwith
                   (Format.asprintf
                      "%a: invalid config subdirectory"
                      Namespace.pp
                      name))
          @@ Io_helpers.split_absolute_path config.subdirectory)
    in
    let keys = List.of_seq @@ Io_helpers.Key_map.to_seq tree in
    List.repeat bench_num (make_bench rng_state config keys)
end

let () = Registration.register_simple_with_num (module Write_random_keys_bench)
Browse the archive

https://gitlab.com/tezos/tezos