(*****************************************************************************) (* *) (* Open Source License *) (* Copyright (c) 2018 Dynamic Ledger Solutions, Inc. *) (* Copyright (c) 2018 Nomadic Labs. *) (* *) (* Permission is hereby granted, free of charge, to any person obtaining a *) (* copy of this software and associated documentation files (the "Software"),*) (* to deal in the Software without restriction, including without limitation *) (* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) (* and/or sell copies of the Software, and to permit persons to whom the *) (* Software is furnished to do so, subject to the following conditions: *) (* *) (* The above copyright notice and this permission notice shall be included *) (* in all copies or substantial portions of the Software. *) (* *) (* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) (* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) (* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) (* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) (* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) (* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) (* DEALINGS IN THE SOFTWARE. *) (* *) (*****************************************************************************) open Error_monad open Micheline type 'a parsing_result = 'a * error list let compare compare (aa, ael) (ba, bel) = Compare.or_else (compare aa ba) (fun () -> (* FIXME: we need error comparison *) Stdlib.compare ael bel) type point = {point : int; byte : int; line : int; column : int} let point_zero = {point = 0; byte = 0; line = 0; column = 0} let point_encoding = let open Data_encoding in conv (fun {line; column; point; byte} -> (line, column, point, byte)) (fun (line, column, point, byte) -> {line; column; point; byte}) (obj4 (req "line" uint16) (req "column" uint16) (req "point" uint16) (req "byte" uint16)) type location = {start : point; stop : point} let location_zero = {start = point_zero; stop = point_zero} let location_encoding = let open Data_encoding in conv (fun {start; stop} -> (start, stop)) (fun (start, stop) -> {start; stop}) (obj2 (req "start" point_encoding) (req "stop" point_encoding)) type token_value = | String of string | Bytes of string | Int of string | Ident of string | Annot of string | Comment of string | Eol_comment of string | Semi | Open_paren | Close_paren | Open_brace | Close_brace let token_value_encoding = let open Data_encoding in union [ case (Tag 0) ~title:"String" (obj1 (req "string" string)) (function String s -> Some s | _ -> None) (fun s -> String s); case (Tag 1) ~title:"Int" (obj1 (req "int" string)) (function Int s -> Some s | _ -> None) (fun s -> Int s); case (Tag 2) ~title:"Annot" (obj1 (req "annot" string)) (function Annot s -> Some s | _ -> None) (fun s -> Annot s); case (Tag 3) ~title:"Comment" (obj2 (req "comment" string) (dft "end_of_line" bool false)) (function | Comment s -> Some (s, false) | Eol_comment s -> Some (s, true) | _ -> None) (function s, false -> Comment s | s, true -> Eol_comment s); case (Tag 4) ~title:"Punctuation" (obj1 (req "punctuation" (string_enum [ ("(", Open_paren); (")", Close_paren); ("{", Open_brace); ("}", Close_brace); (";", Semi); ]))) (fun t -> Some t) (fun t -> t); case (Tag 5) ~title:"Bytes" (obj1 (req "bytes" string)) (function Bytes s -> Some s | _ -> None) (fun s -> Bytes s); case (Tag 6) ~title:"Ident" (obj1 (req "ident" string)) (function Ident s -> Some s | _ -> None) (fun s -> Ident s); ] type token = {token : token_value; loc : location} let max_annot_length = 255 type error += Invalid_utf8_sequence of point * string type error += Unexpected_character of point * string type error += Undefined_escape_sequence of point * string type error += Missing_break_after_number of point type error += Unterminated_string of location type error += Unterminated_integer of location type error += Invalid_hex_bytes of location type error += Unterminated_comment of location type error += Annotation_length of location let tokenize source = let decoder = Uutf.decoder ~encoding:`UTF_8 (`String source) in let here () = { point = Uutf.decoder_count decoder; byte = Uutf.decoder_byte_count decoder; line = Uutf.decoder_line decoder; column = Uutf.decoder_col decoder; } in let tok start stop token = {loc = {start; stop}; token} in let stack = ref [] in let errors = ref [] in let rec next () = match !stack with | charloc :: charlocs -> stack := charlocs ; charloc | [] -> ( let loc = here () in match Uutf.decode decoder with | `Await -> assert false | `Malformed s -> errors := Invalid_utf8_sequence (loc, s) :: !errors ; next () | (`Uchar _ | `End) as other -> (other, loc)) in let back charloc = stack := charloc :: !stack in let uchar_to_char c = if Uchar.is_char c then Some (Uchar.to_char c) else None in let allowed_ident_char c = match uchar_to_char c with | Some ('a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9') -> true | Some _ | None -> false in let allowed_annot_char c = match uchar_to_char c with | Some ('a' .. 'z' | 'A' .. 'Z' | '_' | '.' | '%' | '@' | '0' .. '9') -> true | Some _ | None -> false in let rec skip acc = match next () with | `End, _ -> List.rev acc | `Uchar c, start -> ( match uchar_to_char c with | Some ('a' .. 'z' | 'A' .. 'Z' | '_') -> ident acc start (fun s _ -> Ident s) | Some ('@' | ':' | '$' | '&' | '%' | '!' | '?') -> annot acc start (fun str stop -> if String.length str > max_annot_length then errors := Annotation_length {start; stop} :: !errors ; Annot str) | Some '-' -> ( match next () with | `End, stop -> errors := Unterminated_integer {start; stop} :: !errors ; List.rev acc | (`Uchar c, stop) as first -> ( match uchar_to_char c with | Some '0' -> base acc start | Some '1' .. '9' -> integer acc start | Some _ | None -> errors := Unterminated_integer {start; stop} :: !errors ; back first ; skip acc)) | Some '0' -> base acc start | Some '1' .. '9' -> integer acc start | Some (' ' | '\n') -> skip acc | Some ';' -> skip (tok start (here ()) Semi :: acc) | Some '{' -> skip (tok start (here ()) Open_brace :: acc) | Some '}' -> skip (tok start (here ()) Close_brace :: acc) | Some '(' -> skip (tok start (here ()) Open_paren :: acc) | Some ')' -> skip (tok start (here ()) Close_paren :: acc) | Some '"' -> string acc [] start | Some '#' -> eol_comment acc start | Some '/' -> ( match next () with | `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') -> comment acc start 0 | ((`Uchar _ | `End), _) as charloc -> errors := Unexpected_character (start, "/") :: !errors ; back charloc ; skip acc) | Some _ | None -> let byte = Uutf.decoder_byte_count decoder in let s = String.sub source start.byte (byte - start.byte) in errors := Unexpected_character (start, s) :: !errors ; skip acc) and base acc start = match next () with | (`Uchar c, stop) as charloc -> ( match uchar_to_char c with | Some '0' .. '9' -> integer acc start | Some 'x' -> bytes acc start | Some ('a' .. 'w' | 'y' | 'z' | 'A' .. 'Z' | '_') -> errors := Missing_break_after_number stop :: !errors ; back charloc ; skip (tok start stop (Int "0") :: acc) | Some _ | None -> back charloc ; skip (tok start stop (Int "0") :: acc)) | (_, stop) as other -> back other ; skip (tok start stop (Int "0") :: acc) and integer acc start = let tok stop = let value = String.sub source start.byte (stop.byte - start.byte) in tok start stop (Int value) in match next () with | (`Uchar c, stop) as charloc -> ( let missing_break () = errors := Missing_break_after_number stop :: !errors ; back charloc ; skip (tok stop :: acc) in match Uchar.to_char c with | '0' .. '9' -> integer acc start | 'a' .. 'z' | 'A' .. 'Z' | '_' -> missing_break () | _ -> back charloc ; skip (tok stop :: acc)) | (`End, stop) as other -> back other ; skip (tok stop :: acc) and bytes acc start = let tok stop = let value = String.sub source start.byte (stop.byte - start.byte) in tok start stop (Bytes value) in match next () with | (`Uchar c, stop) as charloc -> ( let missing_break () = errors := Missing_break_after_number stop :: !errors ; back charloc ; skip (tok stop :: acc) in match Uchar.to_char c with | '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> bytes acc start | 'g' .. 'z' | 'G' .. 'Z' -> missing_break () | _ -> back charloc ; skip (tok stop :: acc)) | (`End, stop) as other -> back other ; skip (tok stop :: acc) and string acc sacc start = let tok () = tok start (here ()) (String (String.concat "" (List.rev sacc))) in match next () with | `End, stop -> errors := Unterminated_string {start; stop} :: !errors ; skip (tok () :: acc) | `Uchar c, stop -> ( match uchar_to_char c with | Some '"' -> skip (tok () :: acc) | Some ('\n' | '\r') -> errors := Unterminated_string {start; stop} :: !errors ; skip (tok () :: acc) | Some '\\' -> ( match next () with | `End, stop -> errors := Unterminated_string {start; stop} :: !errors ; skip (tok () :: acc) | `Uchar c, loc -> ( match uchar_to_char c with | Some '"' -> string acc ("\"" :: sacc) start | Some 'r' -> string acc ("\r" :: sacc) start | Some 'n' -> string acc ("\n" :: sacc) start | Some 't' -> string acc ("\t" :: sacc) start | Some 'b' -> string acc ("\b" :: sacc) start | Some '\\' -> string acc ("\\" :: sacc) start | Some _ | None -> let byte = Uutf.decoder_byte_count decoder in let s = String.sub source loc.byte (byte - loc.byte) in errors := Undefined_escape_sequence (loc, s) :: !errors ; string acc sacc start)) | Some _ | None -> let byte = Uutf.decoder_byte_count decoder in let s = String.sub source stop.byte (byte - stop.byte) in string acc (s :: sacc) start) and generic_ident allow_char acc start (ret : string -> point -> token_value) = let tok stop = let name = String.sub source start.byte (stop.byte - start.byte) in tok start stop (ret name stop) in match next () with | (`Uchar c, stop) as charloc -> if allow_char c then generic_ident allow_char acc start ret else ( back charloc ; skip (tok stop :: acc)) | (_, stop) as other -> back other ; skip (tok stop :: acc) and ident acc start ret = generic_ident allowed_ident_char acc start ret and annot acc start ret = generic_ident allowed_annot_char acc start ret and comment acc start lvl = match next () with | `End, stop -> errors := Unterminated_comment {start; stop} :: !errors ; let text = String.sub source start.byte (stop.byte - start.byte) in skip (tok start stop (Comment text) :: acc) | `Uchar c, _ -> ( match uchar_to_char c with | Some '*' -> ( match next () with | `Uchar c, _ when Uchar.equal c (Uchar.of_char '/') -> if lvl = 0 then let stop = here () in let text = String.sub source start.byte (stop.byte - start.byte) in skip (tok start stop (Comment text) :: acc) else comment acc start (lvl - 1) | other -> back other ; comment acc start lvl) | Some '/' -> ( match next () with | `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') -> comment acc start (lvl + 1) | other -> back other ; comment acc start lvl) | Some _ | None -> comment acc start lvl) and eol_comment acc start = let tok stop = let text = String.sub source start.byte (stop.byte - start.byte) in tok start stop (Eol_comment text) in match next () with | `Uchar c, stop -> ( match uchar_to_char c with | Some '\n' -> skip (tok stop :: acc) | Some _ | None -> eol_comment acc start) | (_, stop) as other -> back other ; skip (tok stop :: acc) in let tokens = skip [] in (tokens, List.rev !errors) type node = (location, string) Micheline.node (* Beginning of a sequence of consecutive primitives *) let min_point : node list -> point = function | [] -> point_zero | Int ({start; _}, _) :: _ | String ({start; _}, _) :: _ | Bytes ({start; _}, _) :: _ | Prim ({start; _}, _, _, _) :: _ | Seq ({start; _}, _) :: _ -> start (* End of a sequence of consecutive primitives *) let rec max_point : node list -> point = function | [] -> point_zero | _ :: (_ :: _ as rest) -> max_point rest | [Int ({stop; _}, _)] | [String ({stop; _}, _)] | [Bytes ({stop; _}, _)] | [Prim ({stop; _}, _, _, _)] | [Seq ({stop; _}, _)] -> stop (* An item in the parser's state stack. Not every value of type [mode list] is a valid parsing context. It must respect the following additional invariants. - a state stack always ends in [Toplevel _], - [Toplevel _] does not appear anywhere else, - [Unwrapped _] cannot appear directly on top of [Wrapped _], - [Wrapped _] cannot appear directly on top of [Sequence _], - [Wrapped _] cannot appear directly on top of [Sequence _]. *) type mode = | Toplevel of node list | Expression of node option | Sequence of token * node list | Unwrapped of location * string * node list * string list | Wrapped of token * string * node list * string list (* Enter a new parsing state. *) let push_mode mode stack = mode :: stack (* Leave a parsing state. *) let pop_mode = function [] -> assert false | _ :: rest -> rest (* Usually after a [pop_mode], jump back into the previous parsing state, injecting the current reduction (insert the just parsed item of a sequence or argument of a primitive application). *) let fill_mode result = function | [] -> assert false | Expression _ :: _ :: _ -> assert false | [Expression (Some _)] -> assert false | Toplevel _ :: _ :: _ -> assert false | [Expression None] -> [Expression (Some result)] | [Toplevel exprs] -> [Toplevel (result :: exprs)] | Sequence (token, exprs) :: rest -> Sequence (token, result :: exprs) :: rest | Wrapped (token, name, exprs, annot) :: rest -> Wrapped (token, name, result :: exprs, annot) :: rest | Unwrapped (start, name, exprs, annot) :: rest -> Unwrapped (start, name, result :: exprs, annot) :: rest type error += Unclosed of token type error += Unexpected of token type error += Extra of token type error += Misaligned of node type error += Empty let rec annots = function | {token = Annot annot; _} :: rest -> let annots, rest = annots rest in (annot :: annots, rest) | rest -> ([], rest) let rec parse ?(check = true) errors tokens stack = (* Two steps: - 1. parse without checking indentation [parse] - 2. check indentation [check] (inlined in 1) *) match (stack, tokens) with (* Start by preventing all absurd cases, so now the pattern matching exhaustivity can tell us that we treater all possible tokens for all possible valid states. *) | [], _ | [Wrapped _], _ | [Unwrapped _], _ | Unwrapped _ :: Unwrapped _ :: _, _ | Unwrapped _ :: Wrapped _ :: _, _ | Toplevel _ :: _ :: _, _ | Expression _ :: _ :: _, _ -> assert false (* Return *) | Expression (Some result) :: _, [] -> ([result], List.rev errors) | Expression (Some _) :: _, token :: rem -> let errors = Unexpected token :: errors in parse ~check errors rem (* skip *) stack | Expression None :: _, [] -> let errors = Empty :: errors in let ghost = {start = point_zero; stop = point_zero} in ([Seq (ghost, [])], List.rev errors) | [Toplevel [(Seq (_, exprs) as expr)]], [] -> let errors = if check then do_check ~toplevel:false errors expr else errors in (exprs, List.rev errors) | [Toplevel exprs], [] -> let exprs = List.rev exprs in let loc = {start = min_point exprs; stop = max_point exprs} in let expr = Seq (loc, exprs) in let errors = if check then do_check ~toplevel:true errors expr else errors in (exprs, List.rev errors) (* Ignore comments *) | _, {token = Eol_comment _ | Comment _; _} :: rest -> parse ~check errors rest stack | ( (Expression None | Sequence _ | Toplevel _) :: _, ({token = Int _ | String _ | Bytes _; _} as token) :: {token = Eol_comment _ | Comment _; _} :: rest ) | ( (Wrapped _ | Unwrapped _) :: _, ({token = Open_paren; _} as token) :: {token = Eol_comment _ | Comment _; _} :: rest ) -> parse ~check errors (token :: rest) stack (* Erroneous states *) | ( (Wrapped _ | Unwrapped _) :: _, ({token = Open_paren; _} as token) :: {token = Open_paren | Open_brace; _} :: rem ) | ( Unwrapped _ :: Expression _ :: _, ({token = Semi | Close_brace | Close_paren; _} as token) :: rem ) | ( Expression None :: _, ({token = Semi | Close_brace | Close_paren | Open_paren; _} as token) :: rem ) -> let errors = Unexpected token :: errors in parse ~check errors rem (* skip *) stack | ( (Sequence _ | Toplevel _) :: _, ({token = Semi; _} as valid) :: ({token = Semi; _} as token) :: rem ) -> let errors = Extra token :: errors in parse ~check errors (* skip *) (valid :: rem) stack | ( (Wrapped _ | Unwrapped _) :: _, {token = Open_paren; _} :: ({token = Int _ | String _ | Bytes _ | Annot _ | Close_paren; _} as token) :: rem ) | ( (Expression None | Sequence _ | Toplevel _) :: _, {token = Int _ | String _ | Bytes _; _} :: ({ token = ( Ident _ | Int _ | String _ | Bytes _ | Annot _ | Close_paren | Open_paren | Open_brace ); _; } as token) :: rem ) | ( Unwrapped (_, _, _, _) :: Toplevel _ :: _, ({token = Close_brace; _} as token) :: rem ) | Unwrapped (_, _, _, _) :: _, ({token = Close_paren; _} as token) :: rem | [Toplevel _], ({token = Close_paren; _} as token) :: rem | [Toplevel _], ({token = Open_paren; _} as token) :: rem | [Toplevel _], ({token = Close_brace; _} as token) :: rem | Sequence _ :: _, ({token = Open_paren; _} as token) :: rem | Sequence _ :: _, ({token = Close_paren; _} as token) :: rem | ( (Wrapped _ | Unwrapped _) :: _, ({token = Open_paren; _} as token) :: (({token = Close_brace | Semi; _} :: _ | []) as rem) ) | _, ({token = Annot _; _} as token) :: rem -> let errors = Unexpected token :: errors in parse ~check errors rem (* skip *) stack | Wrapped (token, _, _, _) :: _, ([] | {token = Close_brace | Semi; _} :: _) -> let errors = Unclosed token :: errors in let fake = {token with token = Close_paren} in let tokens = (* insert *) fake :: tokens in parse ~check errors tokens stack | (Sequence (token, _) :: _ | Unwrapped _ :: Sequence (token, _) :: _), [] -> let errors = Unclosed token :: errors in let fake = {token with token = Close_brace} in let tokens = (* insert *) fake :: tokens in parse ~check errors tokens stack (* Valid states *) | ( (Toplevel _ | Sequence (_, _)) :: _, {token = Ident name; loc} :: ({token = Annot _; _} :: _ as rest) ) -> let annots, rest = annots rest in let mode = Unwrapped (loc, name, [], annots) in parse ~check errors rest (push_mode mode stack) | ( (Expression None | Toplevel _ | Sequence (_, _)) :: _, {token = Ident name; loc} :: rest ) -> let mode = Unwrapped (loc, name, [], []) in parse ~check errors rest (push_mode mode stack) | (Unwrapped _ | Wrapped _) :: _, {token = Int value; loc} :: rest | ( (Expression None | Sequence _ | Toplevel _) :: _, {token = Int value; loc} :: (([] | {token = Semi | Close_brace; _} :: _) as rest) ) -> let expr : node = Int (loc, Z.of_string value) in let errors = if check then do_check ~toplevel:false errors expr else errors in parse ~check errors rest (fill_mode expr stack) | (Unwrapped _ | Wrapped _) :: _, {token = String contents; loc} :: rest | ( (Expression None | Sequence _ | Toplevel _) :: _, {token = String contents; loc} :: (([] | {token = Semi | Close_brace; _} :: _) as rest) ) -> let expr : node = String (loc, contents) in let errors = if check then do_check ~toplevel:false errors expr else errors in parse ~check errors rest (fill_mode expr stack) | (Unwrapped _ | Wrapped _) :: _, {token = Bytes contents; loc} :: rest | ( (Expression None | Sequence _ | Toplevel _) :: _, {token = Bytes contents; loc} :: (([] | {token = Semi | Close_brace; _} :: _) as rest) ) -> let errors, bytes = match Hex.to_bytes (`Hex (String.sub contents 2 (String.length contents - 2))) with | None -> (Invalid_hex_bytes loc :: errors, Bytes.empty) | Some bytes -> (errors, bytes) in let expr : node = Bytes (loc, bytes) in let errors = if check then do_check ~toplevel:false errors expr else errors in parse ~check errors rest (fill_mode expr stack) | ( Sequence ({loc = {start; _}; _}, exprs) :: _, {token = Close_brace; loc = {stop; _}} :: rest ) -> let exprs = List.rev exprs in let expr = Micheline.Seq ({start; stop}, exprs) in let errors = if check then do_check ~toplevel:false errors expr else errors in parse ~check errors rest (fill_mode expr (pop_mode stack)) | (Sequence _ | Toplevel _) :: _, {token = Semi; _} :: rest -> parse ~check errors rest stack | ( Unwrapped ({start; stop}, name, exprs, annot) :: Expression _ :: _, ([] as rest) ) | ( Unwrapped ({start; stop}, name, exprs, annot) :: Toplevel _ :: _, (({token = Semi; _} :: _ | []) as rest) ) | ( Unwrapped ({start; stop}, name, exprs, annot) :: Sequence _ :: _, ({token = Close_brace | Semi; _} :: _ as rest) ) | ( Wrapped ({loc = {start; stop}; _}, name, exprs, annot) :: _, {token = Close_paren; _} :: rest ) -> let exprs = List.rev exprs in let stop = if exprs = [] then stop else max_point exprs in let expr = Micheline.Prim ({start; stop}, name, exprs, annot) in let errors = if check then do_check ~toplevel:false errors expr else errors in parse ~check errors rest (fill_mode expr (pop_mode stack)) | ( (Wrapped _ | Unwrapped _) :: _, ({token = Open_paren; _} as token) :: {token = Ident name; _} :: ({token = Annot _; _} :: _ as rest) ) -> let annots, rest = annots rest in let mode = Wrapped (token, name, [], annots) in parse ~check errors rest (push_mode mode stack) | ( (Wrapped _ | Unwrapped _) :: _, ({token = Open_paren; _} as token) :: {token = Ident name; _} :: rest ) -> let mode = Wrapped (token, name, [], []) in parse ~check errors rest (push_mode mode stack) | (Wrapped _ | Unwrapped _) :: _, {token = Ident name; loc} :: rest -> let expr = Micheline.Prim (loc, name, [], []) in let errors = if check then do_check ~toplevel:false errors expr else errors in parse ~check errors rest (fill_mode expr stack) | ( (Wrapped _ | Unwrapped _ | Toplevel _ | Sequence _ | Expression None) :: _, ({token = Open_brace; _} as token) :: rest ) -> let mode = Sequence (token, []) in parse ~check errors rest (push_mode mode stack) (* indentation checker *) and do_check ?(toplevel = false) errors = function | Seq ({start; stop}, []) as expr -> if start.column >= stop.column then Misaligned expr :: errors else errors | ( Prim ({start; stop}, _, first :: rest, _) | Seq ({start; stop}, first :: rest) ) as expr -> let {column = first_column; line = first_line; _} = min_point [first] in if start.column >= stop.column then Misaligned expr :: errors else if (not toplevel) && start.column >= first_column then Misaligned expr :: errors else (* In a sequence or in the arguments of a primitive, we require all items to be aligned, but we relax the rule to allow consecutive items to be written on the same line. *) let rec in_line_or_aligned prev_start_line errors = function | [] -> errors | expr :: rest -> let {column; line = start_line; _} = min_point [expr] in let {line = stop_line; _} = max_point [expr] in let errors = if stop_line <> prev_start_line && column <> first_column then Misaligned expr :: errors else errors in in_line_or_aligned start_line errors rest in in_line_or_aligned first_line errors rest | Prim (_, _, [], _) | String _ | Int _ | Bytes _ -> errors let parse_expression ?check tokens = let result = match tokens with | ({token = Open_paren; _} as token) :: {token = Ident name; _} :: {token = Annot annot; _} :: rest -> let annots, rest = annots rest in let mode = Wrapped (token, name, [], annot :: annots) in parse ?check [] rest [mode; Expression None] | ({token = Open_paren; _} as token) :: {token = Ident name; _} :: rest -> let mode = Wrapped (token, name, [], []) in parse ?check [] rest [mode; Expression None] | _ -> parse ?check [] tokens [Expression None] in match result with [single], errors -> (single, errors) | _ -> assert false let parse_toplevel ?check tokens = parse ?check [] tokens [Toplevel []] let print_point ppf {line; column; _} = Format.fprintf ppf "At line %d character %d" line column let print_token_kind ppf = function | Open_paren | Close_paren -> Format.fprintf ppf "parenthesis" | Open_brace | Close_brace -> Format.fprintf ppf "curly brace" | String _ -> Format.fprintf ppf "string constant" | Bytes _ -> Format.fprintf ppf "bytes constant" | Int _ -> Format.fprintf ppf "integer constant" | Ident _ -> Format.fprintf ppf "identifier" | Annot _ -> Format.fprintf ppf "annotation" | Comment _ | Eol_comment _ -> Format.fprintf ppf "comment" | Semi -> Format.fprintf ppf "semi colon" let print_location ppf loc = if loc.start.line = loc.stop.line then if loc.start.column = loc.stop.column then Format.fprintf ppf "At line %d character %d" loc.start.line loc.start.column else Format.fprintf ppf "At line %d characters %d to %d" loc.start.line loc.start.column loc.stop.column else Format.fprintf ppf "From line %d character %d to line %d character %d" loc.start.line loc.start.column loc.stop.line loc.stop.column let no_parsing_error (ast, errors) = match errors with [] -> Ok ast | errors -> Error errors let () = register_error_kind `Permanent ~id:"micheline.parse_error.invalid_utf8_sequence" ~title:"Micheline parser error: invalid UTF-8 sequence" ~description: "While parsing a piece of Micheline source, a sequence of bytes that is \ not valid UTF-8 was encountered." ~pp:(fun ppf (point, str) -> Format.fprintf ppf "%a, invalid UTF-8 sequence %S" print_point point str) Data_encoding.(obj2 (req "point" point_encoding) (req "sequence" string)) (function | Invalid_utf8_sequence (point, str) -> Some (point, str) | _ -> None) (fun (point, str) -> Invalid_utf8_sequence (point, str)) ; register_error_kind `Permanent ~id:"micheline.parse_error.unexpected_character" ~title:"Micheline parser error: unexpected character" ~description: "While parsing a piece of Micheline source, an unexpected character was \ encountered." ~pp:(fun ppf (point, str) -> Format.fprintf ppf "%a, unexpected character %s" print_point point str) Data_encoding.(obj2 (req "point" point_encoding) (req "character" string)) (function | Unexpected_character (point, str) -> Some (point, str) | _ -> None) (fun (point, str) -> Unexpected_character (point, str)) ; register_error_kind `Permanent ~id:"micheline.parse_error.undefined_escape_sequence" ~title:"Micheline parser error: undefined escape sequence" ~description: "While parsing a piece of Micheline source, an unexpected escape \ sequence was encountered in a string." ~pp:(fun ppf (point, str) -> Format.fprintf ppf "%a, undefined escape sequence \"%s\"" print_point point str) Data_encoding.(obj2 (req "point" point_encoding) (req "sequence" string)) (function | Undefined_escape_sequence (point, str) -> Some (point, str) | _ -> None) (fun (point, str) -> Undefined_escape_sequence (point, str)) ; register_error_kind `Permanent ~id:"micheline.parse_error.missing_break_after_number" ~title:"Micheline parser error: missing break after number" ~description: "While parsing a piece of Micheline source, a number was not visually \ separated from its follower token, leading to misreadability." ~pp:(fun ppf point -> Format.fprintf ppf "%a, missing break after number" print_point point) Data_encoding.(obj1 (req "point" point_encoding)) (function Missing_break_after_number point -> Some point | _ -> None) (fun point -> Missing_break_after_number point) ; register_error_kind `Permanent ~id:"micheline.parse_error.unterminated_string" ~title:"Micheline parser error: unterminated string" ~description: "While parsing a piece of Micheline source, a string was not terminated." ~pp:(fun ppf loc -> Format.fprintf ppf "%a, unterminated string" print_location loc) Data_encoding.(obj1 (req "location" location_encoding)) (function Unterminated_string loc -> Some loc | _ -> None) (fun loc -> Unterminated_string loc) ; register_error_kind `Permanent ~id:"micheline.parse_error.unterminated_integer" ~title:"Micheline parser error: unterminated integer" ~description: "While parsing a piece of Micheline source, an integer was not \ terminated." ~pp:(fun ppf loc -> Format.fprintf ppf "%a, unterminated integer" print_location loc) Data_encoding.(obj1 (req "location" location_encoding)) (function Unterminated_integer loc -> Some loc | _ -> None) (fun loc -> Unterminated_integer loc) ; register_error_kind `Permanent ~id:"micheline.parse_error.invalid_hex_bytes" ~title:"Micheline parser error: invalid hex bytes" ~description: "While parsing a piece of Micheline source, a byte sequence (0x...) was \ not valid as a hex byte." ~pp:(fun ppf loc -> Format.fprintf ppf "%a, invalid hex bytes" print_location loc) Data_encoding.(obj1 (req "location" location_encoding)) (function Invalid_hex_bytes loc -> Some loc | _ -> None) (fun loc -> Invalid_hex_bytes loc) ; register_error_kind `Permanent ~id:"micheline.parse_error.unterminated_comment" ~title:"Micheline parser error: unterminated comment" ~description: "While parsing a piece of Micheline source, a commentX was not \ terminated." ~pp:(fun ppf loc -> Format.fprintf ppf "%a, unterminated comment" print_location loc) Data_encoding.(obj1 (req "location" location_encoding)) (function Unterminated_comment loc -> Some loc | _ -> None) (fun loc -> Unterminated_comment loc) ; register_error_kind `Permanent ~id:"micheline.parse_error.annotation_exceeds_max_length" ~title:"Micheline parser error: annotation exceeds max length" ~description: (Format.sprintf "While parsing a piece of Micheline source, an annotation exceeded \ the maximum length (%d)." max_annot_length) ~pp:(fun ppf loc -> Format.fprintf ppf "%a, annotation exceeded maximum length (%d chars)" print_location loc max_annot_length) Data_encoding.(obj1 (req "location" location_encoding)) (function Annotation_length loc -> Some loc | _ -> None) (fun loc -> Annotation_length loc) ; register_error_kind `Permanent ~id:"micheline.parse_error.unclosed_token" ~title:"Micheline parser error: unclosed token" ~description: "While parsing a piece of Micheline source, a parenthesis or a brace was \ unclosed." ~pp:(fun ppf (loc, token) -> Format.fprintf ppf "%a, unclosed %a" print_location loc print_token_kind token) Data_encoding.( obj2 (req "location" location_encoding) (req "token" token_value_encoding)) (function Unclosed {loc; token} -> Some (loc, token) | _ -> None) (fun (loc, token) -> Unclosed {loc; token}) ; register_error_kind `Permanent ~id:"micheline.parse_error.unexpected_token" ~title:"Micheline parser error: unexpected token" ~description: "While parsing a piece of Micheline source, an unexpected token was \ encountered." ~pp:(fun ppf (loc, token) -> Format.fprintf ppf "%a, unexpected %a" print_location loc print_token_kind token) Data_encoding.( obj2 (req "location" location_encoding) (req "token" token_value_encoding)) (function Unexpected {loc; token} -> Some (loc, token) | _ -> None) (fun (loc, token) -> Unexpected {loc; token}) ; register_error_kind `Permanent ~id:"micheline.parse_error.extra_token" ~title:"Micheline parser error: extra token" ~description: "While parsing a piece of Micheline source, an extra semi colon or \ parenthesis was encountered." ~pp:(fun ppf (loc, token) -> Format.fprintf ppf "%a, extra %a" print_location loc print_token_kind token) Data_encoding.( obj2 (req "location" location_encoding) (req "token" token_value_encoding)) (function Extra {loc; token} -> Some (loc, token) | _ -> None) (fun (loc, token) -> Extra {loc; token}) ; (* [Misaligned] is registered in the encoding module to break a dependency cycle *) register_error_kind `Permanent ~id:"micheline.parse_error.empty_expression" ~title:"Micheline parser error: empty_expression" ~description: "Tried to interpret an empty piece or Micheline source as a single \ expression." ~pp:(fun ppf () -> Format.fprintf ppf "empty expression") Data_encoding.empty (function Empty -> Some () | _ -> None) (fun () -> Empty) (* helper functions for the encoding *) let check_annot s = String.length s <= max_annot_length && match tokenize s with | [{token = Annot s'; _}], [] (* no errors *) -> String.equal s s' | _ -> false