Raw File
binary_description.ml
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2018 Dynamic Ledger Solutions, Inc. <contact@tezos.com>     *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

type recursives = string list

type references = {
  descriptions: (string * Binary_schema.toplevel_encoding) list;
}
[@@unwrapped]

(* Simple Union find implementation, there are several optimizations
   that give UF it's usual time complexity that could be added.
   If this is a bottleneck, they're easy to add. *)
module UF : sig
  type t

  val add : t -> Binary_schema.description -> unit

  val find : t -> string -> Binary_schema.description

  val union :
    t -> new_canonical:Binary_schema.description -> existing:string -> unit

  val empty : unit -> t
end = struct
  open Binary_schema

  type ele = Ref of string | Root of description

  type t = (string, ele) Hashtbl.t

  let add t x = Hashtbl.replace t x.title (Root x)

  let rec find tbl key =
    match Hashtbl.find tbl key with Ref s -> find tbl s | Root desc -> desc

  let union tbl ~new_canonical ~existing =
    add tbl new_canonical;
    let root = find tbl existing in
    if root.title = new_canonical.title then ()
    else Hashtbl.replace tbl root.title (Ref new_canonical.title)

  let empty () = Hashtbl.create 128
end

let fixup_references uf =
  let open Binary_schema in
  let rec fixup_layout = function
    | Ref s -> Ref (UF.find uf s).title
    | Enum (i, name) -> Enum (i, (UF.find uf name).title)
    | Seq (layout, len) -> Seq (fixup_layout layout, len)
    | ( Zero_width | Int _ | Bool
      | RangedInt (_, _)
      | RangedFloat (_, _)
      | Float | Bytes | String | Padding ) as enc ->
        enc
  in
  let field = function
    | Named_field (name, kind, layout) ->
        Named_field (name, kind, fixup_layout layout)
    | Anonymous_field (kind, layout) ->
        Anonymous_field (kind, fixup_layout layout)
    | (Dynamic_size_field _ | Optional_field _) as field -> field
  in
  function
  | Obj {fields} -> Obj {fields = List.map field fields}
  | Cases ({cases; _} as x) ->
      Cases
        {
          x with
          cases =
            List.map
              (fun (i, name, fields) -> (i, name, List.map field fields))
              cases;
        }
  | Int_enum _ as ie -> ie

let z_reference_name = "Z.t"

let z_reference_description =
  "A variable-length sequence of bytes encoding a Zarith integer. Each byte \
   has a running unary size bit: the most significant bit of each byte \
   indicates whether this is the last byte in the sequence (0) or whether the \
   sequence continues (1). The second most significant bit of the first byte \
   is reserved for the sign (0 for positive, 1 for negative). Size and sign \
   bits ignored, the data is the binary representation of the absolute value \
   of the number in little-endian order."

let z_encoding =
  Binary_schema.Obj {fields = [Named_field ("Z.t", `Dynamic, Bytes)]}

let add_z_reference uf {descriptions} =
  UF.add
    uf
    {title = z_reference_name; description = Some z_reference_description};
  {descriptions = (z_reference_name, z_encoding) :: descriptions}

let n_reference_name = "N.t"

let n_reference_description =
  "A variable-length sequence of bytes encoding a Zarith natural number. Each \
   byte has a running unary size bit: the most significant bit of each byte \
   indicates whether this is the last byte in the sequence (0) or whether the \
   sequence continues (1). Size bits ignored, the data is the binary \
   representation of the number in little-endian order."

let n_encoding =
  Binary_schema.Obj {fields = [Named_field ("N.t", `Dynamic, Bytes)]}

let add_n_reference uf {descriptions} =
  UF.add
    uf
    {title = n_reference_name; description = Some n_reference_description};
  {descriptions = (n_reference_name, n_encoding) :: descriptions}

let dedup_canonicalize uf =
  let tbl :
      (Binary_schema.toplevel_encoding, Binary_schema.description) Hashtbl.t =
    Hashtbl.create 100
  in
  let rec help prev_len acc = function
    | [] ->
        let fixedup =
          List.map
            (fun (desc, layout) -> (desc, fixup_references uf layout))
            acc
        in
        if List.length fixedup = prev_len then
          List.map (fun (name, layout) -> (UF.find uf name, layout)) fixedup
        else (
          Hashtbl.clear tbl;
          help (List.length fixedup) [] fixedup )
    | (name, layout) :: tl -> (
        match Hashtbl.find_opt tbl layout with
        | None ->
            let desc = UF.find uf name in
            Hashtbl.add tbl layout desc;
            help prev_len ((desc.title, layout) :: acc) tl
        | Some original_desc ->
            UF.union uf ~new_canonical:original_desc ~existing:name;
            help prev_len acc tl )
  in
  help 0 []

type pdesc = P : 'x Encoding.desc -> pdesc

let describe (type x) (encoding : x Encoding.t) =
  let open Encoding in
  let uf = UF.empty () in
  let uf_add_name title = UF.add uf {title; description = None} in
  let add_reference name description {descriptions} =
    {descriptions = (name, description) :: descriptions}
  in
  let new_reference =
    let x = ref ~-1 in
    fun () ->
      x := !x + 1;
      let name = "X_" ^ string_of_int !x in
      uf_add_name name;
      name
  in
  let may_new_reference = function
    | None -> new_reference ()
    | Some name ->
        uf_add_name name;
        name
  in
  let rec extract_dynamic :
      type x.
      string option ->
      x Encoding.desc ->
      Binary_size.unsigned_integer option * string option * pdesc =
   fun ref_name -> function
    | Conv {encoding; _} -> extract_dynamic ref_name encoding.encoding
    | Describe {id = ref_name; encoding; _} ->
        extract_dynamic (Some ref_name) encoding.encoding
    | Splitted {encoding; _} -> extract_dynamic ref_name encoding.encoding
    | Delayed f -> extract_dynamic ref_name (f ()).encoding
    | Dynamic_size {kind; encoding} -> (Some kind, ref_name, P encoding.encoding)
    | enc -> (None, ref_name, P enc)
  in
  let rec field_descr :
      type a.
      recursives ->
      references ->
      a Encoding.field ->
      Binary_schema.field_descr list * references =
   fun recursives references -> function
    | Req {name; encoding = {encoding; _}; _}
    | Dft {name; encoding = {encoding; _}; _} -> (
        let (dynamics, ref_name, P field) = extract_dynamic None encoding in
        let (layout, references) =
          layout ref_name recursives references field
        in
        if layout = Zero_width then ([], references)
        else
          let field_descr =
            Binary_schema.Named_field (name, classify_desc field, layout)
          in
          match dynamics with
          | Some kind ->
              ([Dynamic_size_field (ref_name, 1, kind); field_descr], references)
          | None -> ([field_descr], references) )
    | Opt {kind = `Variable; name; encoding = {encoding; _}; _} ->
        let (layout, references) = layout None recursives references encoding in
        ([Named_field (name, `Variable, layout)], references)
    | Opt {kind = `Dynamic; name; encoding = {encoding; _}; _} ->
        let (layout, references) = layout None recursives references encoding in
        ( [
            Binary_schema.Optional_field name;
            Named_field (name, classify_desc encoding, layout);
          ],
          references )
  and obj fields = Binary_schema.Obj {fields}
  and union :
      type a.
      string option ->
      recursives ->
      references ->
      Kind.t ->
      Binary_size.tag_size ->
      a case list ->
      string * references =
   fun ref_name recursives references kind size cases ->
    let cases =
      List.sort (fun (t1, _) (t2, _) -> (compare : int -> int -> int) t1 t2)
      @@ List.fold_left
           (fun acc (Case {tag; _} as case) ->
             if Uint_option.is_some tag then (Uint_option.get tag, case) :: acc
             else acc)
           []
           cases
    in
    let tag_field =
      Binary_schema.Named_field
        ( "Tag",
          `Fixed (Binary_size.tag_size size),
          Int (size :> Binary_schema.integer_extended) )
    in
    let (cases, references) =
      List.fold_right
        (fun (tag, Case case) (cases, references) ->
          let (fields, references) =
            fields None recursives references case.encoding.encoding
          in
          ((tag, Some case.title, tag_field :: fields) :: cases, references))
        cases
        ([], references)
    in
    let name = may_new_reference ref_name in
    let references =
      add_reference name (Cases {kind; tag_size = size; cases}) references
    in
    (name, references)
  and describe :
      type b.
      ?description:string ->
      title:string ->
      string ->
      recursives ->
      references ->
      b desc ->
      string * references =
   fun ?description ~title name recursives references encoding ->
    let new_canonical = {Binary_schema.title; description} in
    UF.add uf new_canonical;
    let (layout, references) = layout None recursives references encoding in
    match layout with
    | Ref ref_name ->
        UF.union uf ~existing:ref_name ~new_canonical;
        (ref_name, references)
    | layout ->
        UF.add uf new_canonical;
        ( name,
          add_reference
            name
            (obj [Anonymous_field (classify_desc encoding, layout)])
            references )
  and enum : type a. (a, _) Hashtbl.t -> a array -> _ =
   fun tbl encoding_array ->
    ( Binary_size.range_to_size ~minimum:0 ~maximum:(Array.length encoding_array),
      List.map
        (fun i -> (i, fst @@ Hashtbl.find tbl encoding_array.(i)))
        (List.init (Array.length encoding_array) (fun i -> i)) )
  and fields :
      type b.
      string option ->
      recursives ->
      references ->
      b Encoding.desc ->
      Binary_schema.fields * references =
   fun ref_name recursives references -> function
    | Obj field -> field_descr recursives references field
    | Objs {left; right; _} ->
        let (left_fields, references) =
          fields None recursives references left.encoding
        in
        let (right_fields, references) =
          fields None recursives references right.encoding
        in
        (left_fields @ right_fields, references)
    | Null -> ([Anonymous_field (`Fixed 0, Zero_width)], references)
    | Empty -> ([Anonymous_field (`Fixed 0, Zero_width)], references)
    | Ignore -> ([Anonymous_field (`Fixed 0, Zero_width)], references)
    | Constant _ -> ([Anonymous_field (`Fixed 0, Zero_width)], references)
    | Dynamic_size {kind; encoding} ->
        let (fields, refs) =
          fields None recursives references encoding.encoding
        in
        (Dynamic_size_field (None, List.length fields, kind) :: fields, refs)
    | Check_size {encoding; _} ->
        fields ref_name recursives references encoding.encoding
    | Conv {encoding; _} ->
        fields ref_name recursives references encoding.encoding
    | Describe {id = name; encoding; _} ->
        fields (Some name) recursives references encoding.encoding
    | Splitted {encoding; _} ->
        fields ref_name recursives references encoding.encoding
    | Delayed func -> fields ref_name recursives references (func ()).encoding
    | List (len, {encoding; _}) ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (`Variable, Seq (layout, len))], references)
    | Array (len, {encoding; _}) ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (`Variable, Seq (layout, len))], references)
    | Bytes kind -> ([Anonymous_field ((kind :> Kind.t), Bytes)], references)
    | String kind -> ([Anonymous_field ((kind :> Kind.t), String)], references)
    | Padded ({encoding = e; _}, n) ->
        let (fields, references) = fields ref_name recursives references e in
        (fields @ [Named_field ("padding", `Fixed n, Padding)], references)
    | String_enum (tbl, encoding_array) as encoding ->
        let (size, cases) = enum tbl encoding_array in
        let name = may_new_reference ref_name in
        ( [Anonymous_field (classify_desc encoding, Ref name)],
          add_reference name (Int_enum {size; cases}) references )
    | Tup {encoding; _} ->
        let (layout, references) =
          layout ref_name recursives references encoding
        in
        if layout = Zero_width then ([], references)
        else ([Anonymous_field (classify_desc encoding, layout)], references)
    | Tups {left; right; _} ->
        let (fields1, references) =
          fields None recursives references left.encoding
        in
        let (fields2, references) =
          fields None recursives references right.encoding
        in
        (fields1 @ fields2, references)
    | Union {kind; tag_size; cases; _} ->
        let (name, references) =
          union None recursives references kind tag_size cases
        in
        ([Anonymous_field (kind, Ref name)], references)
    | Mu {kind; name; title; description; fix} as encoding ->
        let kind = (kind :> Kind.t) in
        let title = match title with Some title -> title | None -> name in
        if List.mem name recursives then
          ([Anonymous_field (kind, Ref name)], references)
        else
          let {encoding; _} = fix {encoding; json_encoding = None} in
          let (name, references) =
            describe
              ~title
              ?description
              name
              (name :: recursives)
              references
              encoding
          in
          ([Anonymous_field (kind, Ref name)], references)
    | Bool as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Int8 as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Uint8 as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Int16 as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Uint16 as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Int31 as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Int32 as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Int64 as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | N as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Z as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | RangedInt _ as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | RangedFloat _ as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
    | Float as encoding ->
        let (layout, references) = layout None recursives references encoding in
        ([Anonymous_field (classify_desc encoding, layout)], references)
  and layout :
      type c.
      string option ->
      recursives ->
      references ->
      c Encoding.desc ->
      Binary_schema.layout * references =
   fun ref_name recursives references -> function
    | Null -> (Zero_width, references)
    | Empty -> (Zero_width, references)
    | Ignore -> (Zero_width, references)
    | Constant _ -> (Zero_width, references)
    | Bool -> (Bool, references)
    | Int8 -> (Int `Int8, references)
    | Uint8 -> (Int `Uint8, references)
    | Int16 -> (Int `Int16, references)
    | Uint16 -> (Int `Uint16, references)
    | Int31 -> (RangedInt (~-1073741824, 1073741823), references)
    | Int32 -> (Int `Int32, references)
    | Int64 -> (Int `Int64, references)
    | N -> (Ref n_reference_name, add_n_reference uf references)
    | Z -> (Ref z_reference_name, add_z_reference uf references)
    | RangedInt {minimum; maximum} -> (RangedInt (minimum, maximum), references)
    | RangedFloat {minimum; maximum} ->
        (RangedFloat (minimum, maximum), references)
    | Float -> (Float, references)
    | Bytes _kind -> (Bytes, references)
    | String _kind -> (String, references)
    | Padded _ as enc ->
        let name = may_new_reference ref_name in
        let (fields, references) = fields None recursives references enc in
        let references = add_reference name (obj fields) references in
        (Ref name, references)
    | String_enum (tbl, encoding_array) ->
        let name = may_new_reference ref_name in
        let (size, cases) = enum tbl encoding_array in
        let references =
          add_reference name (Int_enum {size; cases}) references
        in
        (Enum (size, name), references)
    | Array (len, data) ->
        let (descr, references) =
          layout None recursives references data.encoding
        in
        (Seq (descr, len), references)
    | List (len, data) ->
        let (layout, references) =
          layout None recursives references data.encoding
        in
        (Seq (layout, len), references)
    | Obj (Req {encoding = {encoding; _}; _})
    | Obj (Dft {encoding = {encoding; _}; _}) ->
        layout ref_name recursives references encoding
    | Obj (Opt _) as enc ->
        let name = may_new_reference ref_name in
        let (fields, references) = fields None recursives references enc in
        let references = add_reference name (obj fields) references in
        (Ref name, references)
    | Objs {left; right; _} ->
        let name = may_new_reference ref_name in
        let (fields1, references) =
          fields None recursives references left.encoding
        in
        let (fields2, references) =
          fields None recursives references right.encoding
        in
        let references =
          add_reference name (obj (fields1 @ fields2)) references
        in
        (Ref name, references)
    | Tup {encoding; _} -> layout ref_name recursives references encoding
    | Tups _ as descr ->
        let name = may_new_reference ref_name in
        let (fields, references) = fields None recursives references descr in
        let references = add_reference name (obj fields) references in
        (Ref name, references)
    | Union {kind; tag_size; cases; _} ->
        let (name, references) =
          union ref_name recursives references kind tag_size cases
        in
        (Ref name, references)
    | Mu {name; title; description; fix; _} as encoding ->
        let title = match title with Some title -> title | None -> name in
        if List.mem name recursives then (Ref name, references)
        else
          let {encoding; _} = fix {encoding; json_encoding = None} in
          let (name, references) =
            describe
              name
              ~title
              ?description
              (name :: recursives)
              references
              encoding
          in
          (Ref name, references)
    | Conv {encoding; _} ->
        layout ref_name recursives references encoding.encoding
    | Describe {id = name; encoding; _} ->
        layout (Some name) recursives references encoding.encoding
    | Splitted {encoding; _} ->
        layout ref_name recursives references encoding.encoding
    | Dynamic_size _ as encoding ->
        let name = may_new_reference ref_name in
        let (fields, references) = fields None recursives references encoding in
        UF.add uf {title = name; description = None};
        (Ref name, add_reference name (obj fields) references)
    | Check_size {encoding; _} ->
        layout ref_name recursives references encoding.encoding
    | Delayed func -> layout ref_name recursives references (func ()).encoding
  in
  let (fields, references) =
    fields None [] {descriptions = []} encoding.encoding
  in
  uf_add_name "";
  let (_, toplevel) = List.hd (dedup_canonicalize uf [("", obj fields)]) in
  let filtered =
    List.filter
      (fun (name, encoding) ->
        match encoding with
        | Binary_schema.Obj {fields = [Anonymous_field (_, Ref reference)]} ->
            UF.union uf ~new_canonical:(UF.find uf name) ~existing:reference;
            false
        | _ -> true)
      references.descriptions
  in
  let fields = List.rev (dedup_canonicalize uf filtered) in
  {Binary_schema.toplevel; fields}
back to top