https://gitlab.com/nomadic-labs/data-encoding/
Tip revision: 891a8cbde930fd0123a236d6d9fbf4ab7aa1af53 authored by Peter Duchovni on 04 June 2021, 22:37:29 UTC
Autoformat for CI
Autoformat for CI
Tip revision: 891a8cb
binary_stream_reader.ml
(*****************************************************************************)
(* *)
(* Open Source License *)
(* Copyright (c) 2018 Dynamic Ledger Solutions, Inc. <contact@tezos.com> *)
(* *)
(* Permission is hereby granted, free of charge, to any person obtaining a *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)
(* and/or sell copies of the Software, and to permit persons to whom the *)
(* Software is furnished to do so, subject to the following conditions: *)
(* *)
(* The above copyright notice and this permission notice shall be included *)
(* in all copies or substantial portions of the Software. *)
(* *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)
(* DEALINGS IN THE SOFTWARE. *)
(* *)
(*****************************************************************************)
open Binary_error_types
(* Do not leak [Local_read_error] outside of this module.
It is intended as a local control-flow mechanism only. *)
exception Local_read_error of read_error
let raise_read_error e = raise (Local_read_error e)
(** Persistent state of the binary reader. *)
type state = {
stream: Binary_stream.t; (** All the remaining data to be read. *)
remaining_bytes: int option;
(** Total number of bytes that should be from 'stream' (None =
unlimited). Reading less bytes should raise [Extra_bytes] and
trying to read more bytes should raise [Not_enough_data]. *)
allowed_bytes: int option;
(** Maximum number of bytes that are allowed to be read from 'stream'
before to fail (None = unlimited). *)
total_read: int;
(** Total number of bytes that has been read from [stream] since the
beginning. *)
}
(** Return type for the function [read_rec]. See [Data_encoding] for its
description. *)
type 'ret status =
| Success of {result: 'ret; size: int; stream: Binary_stream.t}
| Await of (Bytes.t -> 'ret status)
| Error of read_error
let check_remaining_bytes state size =
match state.remaining_bytes with
| Some len when len < size -> raise_read_error Not_enough_data
| Some len -> Some (len - size)
| None -> None
let check_allowed_bytes state size =
match state.allowed_bytes with
| Some len when len < size -> raise_read_error Size_limit_exceeded
| Some len -> Some (len - size)
| None -> None
(** [read_atom resume size conv state k] reads [size] bytes from [state],
pass it to [conv] to be decoded, and finally call the continuation [k]
with the decoded value and the updated state.
The function [conv] is also allowed to raise [Local_read_error err].
In that case the exception is caught and [Error err] is returned.
If there is not enough [remaining_bytes] to be read in [state], the
function returns [Error Not_enough_data] instead of calling
the continuation.
If there is not enough [allowed_bytes] to be read in [state], the
function returns [Error Size_limit_exceeded] instead of calling
the continuation.
If there is not enough bytes to be read in [state], the function
returns [Await resume] instead of calling the continuation. *)
let read_atom resume size conv state k =
match
let remaining_bytes = check_remaining_bytes state size in
let allowed_bytes = check_allowed_bytes state size in
let (res, stream) = Binary_stream.read state.stream size in
( conv res.buffer res.ofs,
{
remaining_bytes;
allowed_bytes;
stream;
total_read = state.total_read + size;
} )
with
| exception Local_read_error error -> Error error
| exception Binary_stream.Need_more_data -> Await resume
| v -> k v
(* tail call *)
(** Reader for all the atomic types. *)
module Atom = struct
let uint8 r = read_atom r Binary_size.uint8 TzEndian.get_uint8
let uint16 r = read_atom r Binary_size.int16 TzEndian.get_uint16
let int8 r = read_atom r Binary_size.int8 TzEndian.get_int8
let int16 r = read_atom r Binary_size.int16 TzEndian.get_int16
let int32 r = read_atom r Binary_size.int32 TzEndian.get_int32
let int64 r = read_atom r Binary_size.int64 TzEndian.get_int64
let float r = read_atom r Binary_size.float TzEndian.get_double
let bool resume state k =
int8 resume state @@ fun (v, state) -> k (v <> 0, state)
let uint30 r =
read_atom r Binary_size.uint30 @@ fun buffer ofs ->
let v = Int32.to_int (TzEndian.get_int32 buffer ofs) in
if v < 0 then
raise_read_error (Invalid_int {min = 0; v; max = (1 lsl 30) - 1});
v
let int31 r =
read_atom r Binary_size.int31 @@ fun buffer ofs ->
let r32 = TzEndian.get_int32 buffer ofs in
let r = Int32.to_int r32 in
if not (-0x4000_0000l <= r32 && r32 <= 0x3fff_ffffl) then
raise_read_error
(Invalid_int {min = -0x4000_0000; v = r; max = 0x3fff_ffff});
r
let int = function
| `Int31 -> int31
| `Int16 -> int16
| `Int8 -> int8
| `Uint30 -> uint30
| `Uint16 -> uint16
| `Uint8 -> uint8
let ranged_int ~minimum ~maximum resume state k =
let read_int =
match Binary_size.range_to_size ~minimum ~maximum with
| `Int8 -> int8
| `Int16 -> int16
| `Int31 -> int31
| `Uint8 -> uint8
| `Uint16 -> uint16
| `Uint30 -> uint30
in
read_int resume state @@ fun (ranged, state) ->
let ranged = if minimum > 0 then ranged + minimum else ranged in
if not (minimum <= ranged && ranged <= maximum) then
Error (Invalid_int {min = minimum; v = ranged; max = maximum})
else k (ranged, state)
let ranged_float ~minimum ~maximum resume state k =
float resume state @@ fun (ranged, state) ->
if not (minimum <= ranged && ranged <= maximum) then
Error (Invalid_float {min = minimum; v = ranged; max = maximum})
else k (ranged, state)
let rec read_z res value bit_in_value state k =
let resume buffer =
let stream = Binary_stream.push buffer state.stream in
read_z res value bit_in_value {state with stream} k
in
uint8 resume state @@ fun (byte, state) ->
let value = value lor ((byte land 0x7F) lsl bit_in_value) in
let bit_in_value = bit_in_value + 7 in
let (bit_in_value, value) =
if bit_in_value < 8 then (bit_in_value, value)
else (
Buffer.add_char res (Char.unsafe_chr (value land 0xFF));
(bit_in_value - 8, value lsr 8))
in
if byte land 0x80 = 0x80 then read_z res value bit_in_value state k
else (
if bit_in_value > 0 then Buffer.add_char res (Char.unsafe_chr value);
if byte = 0x00 then raise_read_error Trailing_zero;
k (Z.of_bits (Buffer.contents res), state))
let n resume state k =
uint8 resume state @@ fun (first, state) ->
let first_value = first land 0x7F in
if first land 0x80 = 0x80 then
read_z (Buffer.create 100) first_value 7 state k
else k (Z.of_int first_value, state)
let z resume state k =
uint8 resume state @@ fun (first, state) ->
let first_value = first land 0x3F in
let sign = first land 0x40 <> 0 in
if first land 0x80 = 0x80 then
read_z (Buffer.create 100) first_value 6 state @@ fun (n, state) ->
k ((if sign then Z.neg n else n), state)
else
let n = Z.of_int first_value in
k ((if sign then Z.neg n else n), state)
let string_enum arr resume state k =
let read_index =
match Binary_size.enum_size arr with
| `Uint8 -> uint8
| `Uint16 -> uint16
| `Uint30 -> uint30
in
read_index resume state @@ fun (index, state) ->
if index >= Array.length arr then Error No_case_matched
else k (arr.(index), state)
let fixed_length_bytes length r =
read_atom r length @@ fun buf ofs -> Bytes.sub buf ofs length
let fixed_length_string length r =
read_atom r length @@ fun buf ofs -> Bytes.sub_string buf ofs length
let tag = function `Uint8 -> uint8 | `Uint16 -> uint16
end
let rec skip n state k =
let resume buffer =
let stream = Binary_stream.push buffer state.stream in
try skip n {state with stream} k with Local_read_error err -> Error err
in
Atom.fixed_length_string n resume state @@ fun ((_, state) : string * _) ->
k state
(** Main recursive reading function, in continuation passing style. *)
let rec read_rec :
type next ret.
bool ->
next Encoding.t ->
state ->
(next * state -> ret status) ->
ret status =
fun whole e state k ->
let resume buffer =
let stream = Binary_stream.push buffer state.stream in
try read_rec whole e {state with stream} k
with Local_read_error err -> Error err
in
let open Encoding in
assert (Encoding.classify e <> `Variable || state.remaining_bytes <> None);
match e.encoding with
| Null -> k ((), state)
| Empty -> k ((), state)
| Constant _ -> k ((), state)
| Ignore -> k ((), state)
| Bool -> Atom.bool resume state k
| Int8 -> Atom.int8 resume state k
| Uint8 -> Atom.uint8 resume state k
| Int16 -> Atom.int16 resume state k
| Uint16 -> Atom.uint16 resume state k
| Int31 -> Atom.int31 resume state k
| Int32 -> Atom.int32 resume state k
| Int64 -> Atom.int64 resume state k
| N -> Atom.n resume state k
| Z -> Atom.z resume state k
| Float -> Atom.float resume state k
| Bytes (`Fixed n) -> Atom.fixed_length_bytes n resume state k
| Bytes `Variable ->
let size = remaining_bytes state in
Atom.fixed_length_bytes size resume state k
| String (`Fixed n) -> Atom.fixed_length_string n resume state k
| String `Variable ->
let size = remaining_bytes state in
Atom.fixed_length_string size resume state k
| Padded (e, n) ->
read_rec false e state @@ fun (v, state) ->
skip n state @@ fun state -> k (v, state)
| RangedInt {minimum; maximum} ->
Atom.ranged_int ~minimum ~maximum resume state k
| RangedFloat {minimum; maximum} ->
Atom.ranged_float ~minimum ~maximum resume state k
| String_enum (_, arr) -> Atom.string_enum arr resume state k
| Array (max_length, e) ->
let max_length = match max_length with Some l -> l | None -> max_int in
read_list Array_too_long max_length e state @@ fun (l, state) ->
k (Array.of_list l, state)
| List (max_length, e) ->
let max_length = match max_length with Some l -> l | None -> max_int in
read_list List_too_long max_length e state k
| Obj (Req {encoding = e; _}) -> read_rec whole e state k
| Obj (Dft {encoding = e; _}) -> read_rec whole e state k
| Obj (Opt {kind = `Dynamic; encoding = e; _}) ->
Atom.bool resume state @@ fun (present, state) ->
if not present then k (None, state)
else read_rec whole e state @@ fun (v, state) -> k (Some v, state)
| Obj (Opt {kind = `Variable; encoding = e; _}) ->
let size = remaining_bytes state in
if size = 0 then k (None, state)
else read_rec whole e state @@ fun (v, state) -> k (Some v, state)
| Objs {kind = `Fixed sz; left; right} ->
ignore (check_remaining_bytes state sz : int option);
ignore (check_allowed_bytes state sz : int option);
read_rec false left state @@ fun (left, state) ->
read_rec whole right state @@ fun (right, state) ->
k ((left, right), state)
| Objs {kind = `Dynamic; left; right} ->
read_rec false left state @@ fun (left, state) ->
read_rec whole right state @@ fun (right, state) ->
k ((left, right), state)
| Objs {kind = `Variable; left; right} ->
read_variable_pair left right state k
| Tup e -> read_rec whole e state k
| Tups {kind = `Fixed sz; left; right} ->
ignore (check_remaining_bytes state sz : int option);
ignore (check_allowed_bytes state sz : int option);
read_rec false left state @@ fun (left, state) ->
read_rec whole right state @@ fun (right, state) ->
k ((left, right), state)
| Tups {kind = `Dynamic; left; right} ->
read_rec false left state @@ fun (left, state) ->
read_rec whole right state @@ fun (right, state) ->
k ((left, right), state)
| Tups {kind = `Variable; left; right} ->
read_variable_pair left right state k
| Conv {inj; encoding; _} ->
read_rec whole encoding state @@ fun (v, state) -> k (inj v, state)
| Union {tag_size; cases; _} -> (
Atom.tag tag_size resume state @@ fun (ctag, state) ->
match
List.find_opt
(fun (Case {tag; _}) ->
Uint_option.fold tag ~none:false ~some:(fun tag -> tag = ctag))
cases
with
| None -> Error (Unexpected_tag ctag)
| Some (Case {encoding; inj; _}) ->
read_rec whole encoding state @@ fun (v, state) -> k (inj v, state))
| Dynamic_size {kind; encoding = e} ->
Atom.int kind resume state @@ fun (sz, state) ->
let remaining = check_remaining_bytes state sz in
let state = {state with remaining_bytes = Some sz} in
ignore (check_allowed_bytes state sz : int option);
read_rec true e state @@ fun (v, state) ->
if state.remaining_bytes <> Some 0 then Error Extra_bytes
else k (v, {state with remaining_bytes = remaining})
| Check_size {limit; encoding = e} ->
let old_allowed_bytes = state.allowed_bytes in
let limit =
match state.allowed_bytes with
| None -> limit
| Some current_limit -> min current_limit limit
in
(match state.remaining_bytes with
| Some remaining when whole && limit < remaining ->
raise_read_error Size_limit_exceeded
| _ -> ());
let state = {state with allowed_bytes = Some limit} in
read_rec whole e state @@ fun (v, state) ->
let allowed_bytes =
match old_allowed_bytes with
| None -> None
| Some old_limit ->
let remaining =
match state.allowed_bytes with
| None -> assert false
| Some remaining -> remaining
in
let read = limit - remaining in
Some (old_limit - read)
in
k (v, {state with allowed_bytes})
| Describe {encoding = e; _} -> read_rec whole e state k
| Splitted {encoding = e; _} -> read_rec whole e state k
| Mu {fix; _} ->
let e = fix e in
read_rec whole e state k
| Delayed f ->
let e = f () in
read_rec whole e state k
and remaining_bytes {remaining_bytes; _} =
match remaining_bytes with
| None ->
(* This function should only be called with a variable encoding,
for which the `remaining_bytes` should never be `None`. *)
assert false
| Some len -> len
and read_variable_pair :
type left right ret.
left Encoding.t ->
right Encoding.t ->
state ->
((left * right) * state -> ret status) ->
ret status =
fun e1 e2 state k ->
let size = remaining_bytes state in
match (Encoding.classify e1, Encoding.classify e2) with
| ((`Dynamic | `Fixed _), `Variable) ->
read_rec false e1 state @@ fun (left, state) ->
read_rec true e2 state @@ fun (right, state) -> k ((left, right), state)
| (`Variable, `Fixed n) ->
if n > size then Error Not_enough_data
else
let state = {state with remaining_bytes = Some (size - n)} in
read_rec true e1 state @@ fun (left, state) ->
assert (state.remaining_bytes = Some 0);
let state = {state with remaining_bytes = Some n} in
read_rec true e2 state @@ fun (right, state) ->
assert (state.remaining_bytes = Some 0);
k ((left, right), state)
| _ -> assert false
(* Should be rejected by [Encoding.Kind.combine] *)
and read_list :
type a ret.
read_error ->
int ->
a Encoding.t ->
state ->
(a list * state -> ret status) ->
ret status =
fun error max_length e state k ->
let rec loop state acc max_length =
let size = remaining_bytes state in
if size = 0 then k (List.rev acc, state)
else if max_length = 0 then raise_read_error error
else
read_rec false e state @@ fun (v, state) ->
loop state (v :: acc) (max_length - 1)
in
loop state [] max_length
let read_rec e state k =
try read_rec false e state k with
| (Out_of_memory | Stack_overflow) as exc -> raise exc
| Invariant_guard s -> Error (User_invariant_guard s)
| Local_read_error re -> Error re
| exc ->
let s = Printexc.to_string exc in
Error (Exception_raised_in_user_function s)
(** ******************** *)
(** Various entry points *)
let success (v, state) =
Success {result = v; size = state.total_read; stream = state.stream}
let read_stream ?(init = Binary_stream.empty) encoding =
match Encoding.classify encoding with
| `Variable ->
invalid_arg "Data_encoding.Binary.read_stream: variable encoding"
| `Dynamic | `Fixed _ ->
(* No hardcoded read limit in a stream. *)
let state =
{
remaining_bytes = None;
allowed_bytes = None;
stream = init;
total_read = 0;
}
in
read_rec encoding state success