Revision cdd79af82674b3e7fa87e9b3e8ca8a4d818459de authored by Raphaël Proust on 26 April 2022, 16:09:44 UTC, committed by Raphaël Proust on 31 May 2022, 10:22:53 UTC
1 parent 4975d07
binary_slicer.ml
(*****************************************************************************)
(* *)
(* Open Source License *)
(* Copyright (c) 2021 Nomadic Labs. <contact@nomadic-labs.com> *)
(* *)
(* Permission is hereby granted, free of charge, to any person obtaining a *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)
(* and/or sell copies of the Software, and to permit persons to whom the *)
(* Software is furnished to do so, subject to the following conditions: *)
(* *)
(* The above copyright notice and this permission notice shall be included *)
(* in all copies or substantial portions of the Software. *)
(* *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)
(* DEALINGS IN THE SOFTWARE. *)
(* *)
(*****************************************************************************)
open Binary_error_types
let raise e = raise (Read_error e)
type slice = {name : string; value : string; pretty_printed : string}
(* state management *)
type slicer_state = {
buffer : string;
mutable offset : int;
mutable remaining_bytes : int;
mutable allowed_bytes : int option;
mutable slices : slice list;
}
let make_slicer_state buffer ~offset ~length =
if length < 0 || length > String.length buffer - offset then None
else
Some
{
buffer;
offset;
remaining_bytes = length;
allowed_bytes = None;
slices = [];
}
let check_allowed_bytes state size =
match state.allowed_bytes with
| Some len when len < size -> raise Size_limit_exceeded
| Some len -> Some (len - size)
| None -> None
let check_remaining_bytes state size =
if state.remaining_bytes < size then raise Not_enough_data ;
state.remaining_bytes - size
let read_atom ?(pp = fun _ -> "") size conv name state =
let offset = state.offset in
state.remaining_bytes <- check_remaining_bytes state size ;
state.allowed_bytes <- check_allowed_bytes state size ;
state.offset <- state.offset + size ;
let value = String.sub state.buffer offset size in
let result = conv state.buffer offset in
state.slices <- {name; value; pretty_printed = pp result} :: state.slices ;
result
(** Reader for all the atomic types. *)
module Atom = struct
let read_byte state =
let size = Binary_size.int8 in
let offset = state.offset in
state.remaining_bytes <- check_remaining_bytes state size ;
state.allowed_bytes <- check_allowed_bytes state size ;
state.offset <- state.offset + size ;
TzEndian.get_int8_string state.buffer offset
let uint8 =
read_atom ~pp:string_of_int Binary_size.uint8 TzEndian.get_uint8_string
let uint16 =
read_atom ~pp:string_of_int Binary_size.int16 TzEndian.get_uint16_string
let int8 =
read_atom ~pp:string_of_int Binary_size.int8 TzEndian.get_int8_string
let int16 =
read_atom ~pp:string_of_int Binary_size.int16 TzEndian.get_int16_string
let int32 =
read_atom ~pp:Int32.to_string Binary_size.int32 TzEndian.get_int32_string
let int64 =
read_atom ~pp:Int64.to_string Binary_size.int64 TzEndian.get_int64_string
let float =
read_atom ~pp:string_of_float Binary_size.float TzEndian.get_double_string
let bool state name =
read_atom
~pp:(fun x -> string_of_bool (x <> 0))
Binary_size.int8
TzEndian.get_int8_string
state
name
<> 0
let uint30 =
read_atom ~pp:string_of_int Binary_size.uint30 @@ fun buffer ofs ->
let v = Int32.to_int (TzEndian.get_int32_string buffer ofs) in
if v < 0 then raise (Invalid_int {min = 0; v; max = (1 lsl 30) - 1}) ;
v
let int31 =
read_atom ~pp:string_of_int Binary_size.int31 @@ fun buffer ofs ->
Int32.to_int (TzEndian.get_int32_string buffer ofs)
let int = function
| `Int31 -> int31
| `Int16 -> int16
| `Int8 -> int8
| `Uint30 -> uint30
| `Uint16 -> uint16
| `Uint8 -> uint8
let ranged_int ~minimum ~maximum name state =
let read_int =
match Binary_size.range_to_size ~minimum ~maximum with
| `Int8 -> int8
| `Int16 -> int16
| `Int31 -> int31
| `Uint8 -> uint8
| `Uint16 -> uint16
| `Uint30 -> uint30
in
let ranged = read_int name state in
let ranged = if minimum > 0 then ranged + minimum else ranged in
if not (minimum <= ranged && ranged <= maximum) then
raise (Invalid_int {min = minimum; v = ranged; max = maximum}) ;
ranged
let ranged_float ~minimum ~maximum name state =
let ranged = float name state in
if not (minimum <= ranged && ranged <= maximum) then
raise (Invalid_float {min = minimum; v = ranged; max = maximum}) ;
ranged
let rec read_z res value bit_in_value name state initial_offset =
let byte = read_byte state in
let value = value lor ((byte land 0x7F) lsl bit_in_value) in
let bit_in_value = bit_in_value + 7 in
let bit_in_value, value =
if bit_in_value < 8 then (bit_in_value, value)
else (
Buffer.add_char res (Char.unsafe_chr (value land 0xFF)) ;
(bit_in_value - 8, value lsr 8))
in
if byte land 0x80 = 0x80 then
read_z res value bit_in_value name state initial_offset
else (
if bit_in_value > 0 then Buffer.add_char res (Char.unsafe_chr value) ;
if byte = 0x00 then raise Trailing_zero ;
let result = Z.of_bits (Buffer.contents res) in
let pretty_printed = Z.to_string result in
let value =
String.sub state.buffer initial_offset (state.offset - initial_offset)
in
state.slices <- {name; value; pretty_printed} :: state.slices ;
result)
let n name state =
let initial_offset = state.offset in
let first = read_byte state in
let first_value = first land 0x7F in
if first land 0x80 = 0x80 then
read_z (Buffer.create 100) first_value 7 name state initial_offset
else
let result = Z.of_int first_value in
let pretty_printed = Z.to_string result in
let value =
String.sub state.buffer initial_offset (state.offset - initial_offset)
in
state.slices <- {name; value; pretty_printed} :: state.slices ;
result
let z name state =
let initial_offset = state.offset in
let first = read_byte state in
let first_value = first land 0x3F in
let sign = first land 0x40 <> 0 in
if first land 0x80 = 0x80 then
let n =
read_z (Buffer.create 100) first_value 6 name state initial_offset
in
if sign then Z.neg n else n
else
let n = Z.of_int first_value in
if sign then Z.neg n else n
let string_enum arr name state =
let read_index =
match Binary_size.enum_size arr with
| `Uint8 -> uint8
| `Uint16 -> uint16
| `Uint30 -> uint30
in
let index = read_index name state in
if index >= Array.length arr then raise No_case_matched ;
arr.(index)
let fixed_length_bytes length =
read_atom length @@ fun buf ofs ->
Bytes.unsafe_of_string @@ String.sub buf ofs length
let fixed_length_string length =
read_atom ~pp:(Format.sprintf "%S") length @@ fun buf ofs ->
String.sub buf ofs length
let tag = function `Uint8 -> uint8 | `Uint16 -> uint16
end
(** Main recursive reading function, in continuation passing style. *)
let rec read_rec :
type ret. ret Encoding.t -> ?name:string -> slicer_state -> ret =
fun e ?name state ->
let ( !! ) x =
match name with None -> x | Some name -> Format.sprintf "%S (%s)" name x
in
let open Encoding in
match e.encoding with
| Null -> ()
| Empty -> ()
| Constant _ -> ()
| Ignore -> ()
| Bool -> Atom.bool !!"bool" state
| Int8 -> Atom.int8 !!"int8" state
| Uint8 -> Atom.uint8 !!"uint8" state
| Int16 -> Atom.int16 !!"int16" state
| Uint16 -> Atom.uint16 !!"uint16" state
| Int31 -> Atom.int31 !!"int31" state
| Int32 -> Atom.int32 !!"int32" state
| Int64 -> Atom.int64 !!"int64" state
| N -> Atom.n !!"N" state
| Z -> Atom.z !!"Z" state
| Float -> Atom.float !!"float" state
| Bytes (`Fixed n) -> Atom.fixed_length_bytes n !!"bytes" state
| Bytes `Variable ->
Atom.fixed_length_bytes state.remaining_bytes !!"bytes" state
| String (`Fixed n) -> Atom.fixed_length_string n !!"string" state
| String `Variable ->
Atom.fixed_length_string state.remaining_bytes !!"string" state
| Padded (e, n) ->
let v = read_rec e ?name state in
ignore (Atom.fixed_length_string n "padding" state : string) ;
v
| RangedInt {minimum; maximum} ->
Atom.ranged_int ~minimum ~maximum !!"ranged int" state
| RangedFloat {minimum; maximum} ->
Atom.ranged_float ~minimum ~maximum !!"ranged float" state
| String_enum (_, arr) -> Atom.string_enum arr !!"enum" state
| Array {length_limit; elts = e} ->
let l =
match length_limit with
| No_limit -> read_list Array_too_long max_int e ?name state
| At_most max_length ->
read_list Array_too_long max_length e ?name state
| Exactly exact_length -> read_fixed_list exact_length e ?name state
in
Array.of_list l
| List {length_limit; elts = e} -> (
match length_limit with
| No_limit -> read_list List_too_long max_int e ?name state
| At_most max_length -> read_list List_too_long max_length e ?name state
| Exactly exact_length -> read_fixed_list exact_length e ?name state)
| Obj (Req {encoding = e; name; _}) -> read_rec e ~name state
| Obj (Dft {encoding = e; name; _}) -> read_rec e ~name state
| Obj (Opt {kind = `Dynamic; encoding = e; name; _}) ->
let present = Atom.bool (name ^ " presence flag") state in
if not present then None else Some (read_rec e ~name:!!name state)
| Obj (Opt {kind = `Variable; encoding = e; name; _}) ->
if state.remaining_bytes = 0 then None
else Some (read_rec e ~name:!!name state)
| Objs {kind = `Fixed sz; left; right} ->
ignore (check_remaining_bytes state sz : int) ;
ignore (check_allowed_bytes state sz : int option) ;
let left = read_rec left ?name state in
let right = read_rec right ?name state in
(left, right)
| Objs {kind = `Dynamic; left; right} ->
let left = read_rec left ?name state in
let right = read_rec right ?name state in
(left, right)
| Objs {kind = `Variable; left; right} ->
read_variable_pair left right ?name state
| Tup e -> read_rec e ?name state
| Tups {kind = `Fixed sz; left; right} ->
ignore (check_remaining_bytes state sz : int) ;
ignore (check_allowed_bytes state sz : int option) ;
let left = read_rec left ?name state in
let right = read_rec right ?name state in
(left, right)
| Tups {kind = `Dynamic; left; right} ->
let left = read_rec left ?name state in
let right = read_rec right ?name state in
(left, right)
| Tups {kind = `Variable; left; right} ->
read_variable_pair left right ?name state
| Conv {inj; encoding; _} -> inj (read_rec encoding ?name state)
| Union {tag_size; cases; _} ->
let ctag = Atom.tag tag_size "DUMMY" state in
let (Case {encoding; inj; _}) =
try
List.find
(function
| Case {tag = tg; title; _} ->
if Uint_option.is_some tg && Uint_option.get tg = ctag then (
let {value; pretty_printed; _} = List.hd state.slices in
state.slices <-
{name = title ^ " tag"; value; pretty_printed}
:: List.tl state.slices ;
true)
else false)
cases
with Not_found -> raise (Unexpected_tag ctag)
in
inj (read_rec encoding ?name state)
| Dynamic_size {kind; encoding = e} ->
let sz = Atom.int kind "dynamic length" state in
let remaining = check_remaining_bytes state sz in
state.remaining_bytes <- sz ;
ignore (check_allowed_bytes state sz : int option) ;
let v = read_rec e ?name state in
if state.remaining_bytes <> 0 then raise Extra_bytes ;
state.remaining_bytes <- remaining ;
v
| Check_size {limit; encoding = e} ->
let old_allowed_bytes = state.allowed_bytes in
let limit =
match state.allowed_bytes with
| None -> limit
| Some current_limit -> min current_limit limit
in
state.allowed_bytes <- Some limit ;
let v = read_rec e ?name state in
let allowed_bytes =
match old_allowed_bytes with
| None -> None
| Some old_limit ->
let remaining =
match state.allowed_bytes with
| None -> assert false
| Some remaining -> remaining
in
let read = limit - remaining in
Some (old_limit - read)
in
state.allowed_bytes <- allowed_bytes ;
v
| Describe {encoding = e; id; _} -> read_rec e ~name:!!id state
| Splitted {encoding = e; _} -> read_rec e ?name state
| Mu {fix; name; _} -> read_rec (fix e) ~name:!!name state
| Delayed f -> read_rec (f ()) ?name state
and read_variable_pair :
type left right.
left Encoding.t ->
right Encoding.t ->
?name:string ->
slicer_state ->
left * right =
fun e1 e2 ?name state ->
match (Encoding.classify e1, Encoding.classify e2) with
| (`Dynamic | `Fixed _), `Variable ->
let left = read_rec e1 ?name state in
let right = read_rec e2 ?name state in
(left, right)
| `Variable, `Fixed n ->
if n > state.remaining_bytes then raise Not_enough_data ;
state.remaining_bytes <- state.remaining_bytes - n ;
let left = read_rec e1 ?name state in
assert (state.remaining_bytes = 0) ;
state.remaining_bytes <- n ;
let right = read_rec e2 ?name state in
assert (state.remaining_bytes = 0) ;
(left, right)
| _ -> assert false
and read_list :
type a.
read_error -> int -> a Encoding.t -> ?name:string -> slicer_state -> a list
=
fun error max_length e ?name state ->
let name = Option.map (fun name -> name ^ " element") name in
let rec loop max_length acc =
if state.remaining_bytes = 0 then List.rev acc
else if max_length = 0 then raise error
else
let v = read_rec e ?name state in
loop (max_length - 1) (v :: acc)
in
loop max_length []
and read_fixed_list :
type a. int -> a Encoding.t -> ?name:string -> slicer_state -> a list =
fun exact_length e ?name state ->
let name = Option.map (fun name -> name ^ " element") name in
let rec loop exact_length acc =
if exact_length = 0 then List.rev acc
else if state.remaining_bytes = 0 then raise Not_enough_data
else
let v = read_rec e ?name state in
loop (exact_length - 1) (v :: acc)
in
loop exact_length []
(** Various entry points *)
let slice_exn encoding state =
let _ = read_rec encoding state in
List.rev state.slices
let slice encoding state =
try Ok (slice_exn encoding state) with Read_error e -> Error e
let slice_opt encoding state =
try Some (slice_exn encoding state) with Read_error _ -> None
let slice_string_exn encoding buffer =
let len = String.length buffer in
let state =
{
buffer;
offset = 0;
slices = [];
remaining_bytes = len;
allowed_bytes = None;
}
in
let _ = read_rec encoding state in
if state.offset <> len then raise Extra_bytes ;
List.rev state.slices
let slice_string encoding buffer =
try Ok (slice_string_exn encoding buffer) with Read_error e -> Error e
let slice_string_opt encoding buffer =
try Some (slice_string_exn encoding buffer) with Read_error _ -> None
let slice_bytes e b = slice_string e (Bytes.unsafe_to_string b)
let slice_bytes_opt e b = slice_string_opt e (Bytes.unsafe_to_string b)
let slice_bytes_exn e b = slice_string_exn e (Bytes.unsafe_to_string b)
Computing file changes ...