https://gitlab.com/nomadic-labs/mi-cho-coq
Raw File
Tip revision: 6ba8c4796e0de39e65f7173796f13e72e7231d88 authored by Raphaƫl Cauderlier on 25 April 2021, 12:26:44 UTC
[Readme] Remove a TODO item
Tip revision: 6ba8c47
micheline_lexer.v
Require Import List String Ascii ZArith.
Require error micheline_parser bytes_repr.
Require Import micheline_tokens location.
Import error.Notations.

Definition char_is_num (c : ascii) :=
  match c with
  | "0"%char | "1"%char | "2"%char | "3"%char | "4"%char | "5"%char
  | "6"%char | "7"%char | "8"%char | "9"%char => true
  | _ => false
  end.

Definition char_is_annot (c : ascii) :=
  match c with
  | "%"%char | "@"%char | ":"%char => true
  | _ => false
  end.

Goal (char_is_num "a"%char = false). reflexivity. Qed.
Goal (char_is_num "z"%char = false). reflexivity. Qed.
Goal (char_is_num "A"%char = false). reflexivity. Qed.
Goal (char_is_num "Z"%char = false). reflexivity. Qed.
Goal (char_is_num "_"%char = false). reflexivity. Qed.
Goal (char_is_num "0"%char = true). reflexivity. Qed.
Goal (char_is_num "9"%char = true). reflexivity. Qed.

Definition char_is_alpha (c : ascii) :=
  let leb a b := (N.leb (N_of_ascii a) (N_of_ascii b)) in
  orb (andb (leb "a"%char c) (leb c "z"%char))
      (orb (andb (leb "A"%char c) (leb c "Z"%char))
           (Ascii.eqb "_"%char c)).

Definition char_is_dot (c : ascii) := Ascii.eqb "."%char c.

Definition char_is_hex (c : ascii) :=
  let leb a b := (N.leb (N_of_ascii a) (N_of_ascii b)) in
  orb (andb (leb "a"%char c) (leb c "f"%char))
      (orb (andb (leb "A"%char c) (leb c "F"%char))
           (char_is_num c)).

Goal (char_is_alpha "a"%char = true). reflexivity. Qed.
Goal (char_is_alpha "z"%char = true). reflexivity. Qed.
Goal (char_is_alpha "A"%char = true). reflexivity. Qed.
Goal (char_is_alpha "Z"%char = true). reflexivity. Qed.
Goal (char_is_alpha "_"%char = true). reflexivity. Qed.
Goal (char_is_alpha "0"%char = false). reflexivity. Qed.
Goal (char_is_alpha "9"%char = false). reflexivity. Qed.


Definition Z_of_char (c : ascii) (acc : Z) : Z :=
  match c with
  | "0"%char => (10 * acc)%Z
  | "1"%char => (10 * acc + 1)%Z
  | "2"%char => (10 * acc + 2)%Z
  | "3"%char => (10 * acc + 3)%Z
  | "4"%char => (10 * acc + 4)%Z
  | "5"%char => (10 * acc + 5)%Z
  | "6"%char => (10 * acc + 6)%Z
  | "7"%char => (10 * acc + 7)%Z
  | "8"%char => (10 * acc + 8)%Z
  | "9"%char => (10 * acc + 9)%Z
  | c => 0%Z
  end.


Definition string_snoc s c := (s ++ String c "")%string.

Definition bytes_of_string loc (s : string) :=
  match bytes_repr.of_string s with
  | Some bs => error.Return bs
  | None => error.Failed _ (error.Lexing loc)
 end.

Fixpoint lex_micheline (input : string) (loc : location) : error.M (list (location.location * location.location * token)) :=
  match input with
  | String first_char input =>
    let nloc := location_incr loc in
    match first_char with
    | "{"%char =>
      let! l := lex_micheline input loc in
      error.Return (cons (loc, nloc, LBRACE) l)
    | "}"%char =>
      let! l := lex_micheline input loc in
      error.Return (cons (loc, nloc, RBRACE) l)
    | "("%char =>
      let! l := lex_micheline input loc in
      error.Return (cons (loc, nloc, LPAREN) l)
    | ")"%char =>
      let! l := lex_micheline input loc in
      error.Return (cons (loc, nloc, RPAREN) l)
    | ";"%char =>
      let! l := lex_micheline input loc in
      error.Return (cons (loc, nloc, SEMICOLON) l)
    | """"%char =>
      lex_micheline_string input ""%string loc nloc
    | "#"%char =>
      lex_micheline_inline_comment input nloc
    | " "%char =>
      lex_micheline input nloc
    | "010"%char (* newline *) =>
      lex_micheline input (location_newline loc)
    | "/"%char =>
      match input with
      | String "*"%char s =>
        lex_micheline_multiline_comment s (location_incr nloc)
      | _ =>
        error.Failed _ (error.Lexing loc)
      end
    | "-"%char =>
      let loc := location_incr loc in
      (fix lex_micheline_number (input : string) (acc : Z) start loc :=
         match input with
         | String c s =>
           if char_is_num c then
             let loc := location_incr loc in
             lex_micheline_number s (Z_of_char c acc) start loc
           else
            let! l := lex_micheline input loc in
            error.Return (cons (start, loc, NUMBER (- acc)%Z) l)
         | EmptyString => error.Return (cons (start, loc, NUMBER (- acc)%Z) nil)
         end) input 0%Z loc loc
    | "0"%char =>
      match input with
      | String "x" s =>
        (fix lex_micheline_bytes (input : string) (acc : string) start loc :=
           match input with
           | String c s =>
             if char_is_hex c then
               let loc := location_incr loc in
               lex_micheline_bytes s (string_snoc acc c) start loc
             else
               let! l := lex_micheline input loc in
               let! bs := bytes_of_string start acc in
              error.Return (cons (start, loc, BYTES bs) l)
           | EmptyString =>
               let! bs := bytes_of_string start acc in
               error.Return (cons (start, loc, BYTES bs) nil)
           end) s EmptyString loc (location_incr (location_incr loc))
      | String c s =>
        if char_is_num c then error.Failed _ (error.Lexing loc)
        else
          let! l := lex_micheline input loc in
          error.Return (cons (loc, location_incr loc, NUMBER 0%Z) l)
      | EmptyString => error.Return (cons (loc, location_incr loc, NUMBER 0%Z) nil)
      end
    | c =>
      if char_is_num c then
        (fix lex_micheline_number (input : string) (acc : Z) start loc :=
           match input with
           | String c s =>
             if char_is_num c then
               let loc := location_incr loc in
               lex_micheline_number s (Z_of_char c acc) start loc
             else
              let! l := lex_micheline input loc in
              error.Return (cons (start, loc, NUMBER acc) l)
           | EmptyString => error.Return (cons (start, loc, NUMBER acc) nil)
           end) input (Z_of_char c 0%Z) loc loc
      else
        if char_is_alpha c then
          (fix lex_micheline_prim (input : string) (acc : string) start loc :=
             match input with
             | String c s =>
               if orb (char_is_alpha c) (char_is_num c) then
                 let loc := location_incr loc in
                 lex_micheline_prim s (string_snoc acc c) start loc
               else
                let! l := lex_micheline input loc in
                error.Return (cons (start, loc, PRIM acc) l)
             | EmptyString => error.Return (cons (start, loc, PRIM acc) nil)
             end) input (String c ""%string) loc loc
        else
          if char_is_annot c then (* Annotations are ignored *)
            (fix lex_micheline_annot (input : string) (acc : string) start loc :=
               match input with
               | String c s =>
                 if (orb (char_is_alpha c)
                         (orb (char_is_num c)
                              (orb (char_is_annot c)
                                   (char_is_dot c)))) then
                   let loc := location_incr loc in
                   lex_micheline_annot s (string_snoc acc c) start loc
                 else
                   let! l := lex_micheline input loc in
                error.Return (cons (start, loc, ANNOTATION acc) l)
               | EmptyString => error.Return (cons (start, loc, ANNOTATION acc) nil)
               end) input (String c ""%string) loc loc
          else
            error.Failed _ (error.Lexing loc)
    end
  | EmptyString => error.Return nil
  end
with
lex_micheline_string (input : string) (acc : string) start loc :=
  match input with
  | String """"%char s =>
    let loc := location_incr loc in
    let! l := lex_micheline s loc in
    error.Return (cons (start, loc, STR acc) l)
  | String c s =>
    let loc := location_incr loc in
    lex_micheline_string s (string_snoc acc c) start loc
  | EmptyString =>
    error.Failed _ (error.Lexing loc)
  end
with
lex_micheline_inline_comment (input : string) loc :=
  match input with
  | String "010"%char (* newline *) s => lex_micheline s (location_newline loc)
  | String _ s =>
    let loc := location_incr loc in
    lex_micheline_inline_comment s loc
  | Empty_string => error.Return nil
  end
with
lex_micheline_multiline_comment (input : string) loc :=
  match input with
  | String "*"%char (String "/" s) =>
    let loc := location_incr loc in
    let loc := location_incr loc in
    lex_micheline s loc
  | String "010"%char (* newline *) s =>
    let loc := location_newline loc in
    lex_micheline_multiline_comment s loc
  | String _ s =>
    let loc := location_incr loc in
    lex_micheline_multiline_comment s loc
  | Empty_string => error.Failed _ (error.Lexing loc)
  end.

CoFixpoint const_buffer (t : micheline_parser.Aut.Gram.token) : micheline_parser.MenhirLibParser.Inter.buffer :=
  micheline_parser.MenhirLibParser.Inter.Buf_cons t (const_buffer t).

Fixpoint tokens_to_parser (ts : list (location * location * token)) : error.M micheline_parser.MenhirLibParser.Inter.buffer :=
  match ts with
  | nil => error.Return (const_buffer (token_to_parser (location_start, location_start, EOF)))
  | cons t ts =>
    let! s := tokens_to_parser ts in
    error.Return (micheline_parser.MenhirLibParser.Inter.Buf_cons (token_to_parser t) s)
  end.

Definition lex_micheline_to_parser (input : string)
  : error.M micheline_parser.MenhirLibParser.Inter.buffer :=
  let! ts := lex_micheline input location_start in
  tokens_to_parser ts.

(* Some interesting lemmas *)

Inductive char_not_in_string : ascii -> string -> Prop :=
| CharNotInEmpty : forall c, char_not_in_string c ""
| CharNotInString : forall c c' s,
    c <> c' ->
    char_not_in_string c s ->
    char_not_in_string c (String c' s).

Lemma lex_string_end : forall s start loc,
    lex_micheline_string """" s start loc =
    error.Return ((start, location_incr loc, (micheline_tokens.STR s)) :: nil).
Proof.
  intros s start loc.
  reflexivity.
Qed.

Lemma append_empty_right : forall s, (String.append s "") = s.
Proof.
  induction s.
  - reflexivity.
  - simpl. rewrite IHs. reflexivity.
Qed.

Lemma append_assoc : forall s1 s2 s3,
    ((s1 ++ s2) ++ s3)%string = (s1 ++ (s2 ++ s3))%string.
Proof.
  induction s1; intros.
  - reflexivity.
  - simpl. rewrite IHs1. reflexivity.
Qed.

Lemma append_string_empty_left : forall a s, (String a "" ++ s)%string = String a s.
Proof. reflexivity. Qed.

Lemma lex_string_aux : forall s2 start loc,
    char_not_in_string """" s2 ->
    forall s1, lex_micheline_string (s2++"""") s1 start loc =
               error.Return ((start, location_add loc ((String.length s2)+1),
                              (micheline_tokens.STR (s1++s2))) :: nil).
Proof.
  induction s2; intros.
  - (* Empty string *) simpl. rewrite append_empty_right. reflexivity.
  - (* c :: s *)
    simpl. inversion H; subst.
    specialize (IHs2 start (location_incr loc) H4 (string_snoc s1 a)).
    unfold string_snoc in *. rewrite IHs2. simpl.
    unfold string_snoc; simpl.
    rewrite location.location_add_incr_S.
    rewrite append_assoc. rewrite append_string_empty_left.
    case_eq a. intros. case_eq b; case_eq b0; case_eq b1; case_eq b2; case_eq b3; case_eq b4; case_eq b5; case_eq b6; try reflexivity.
    intros. subst b; subst b0; subst b1; subst b2; subst b3; subst b4; subst b5; subst b6. symmetry in H0. contradiction.
Qed.

Corollary lex_string : forall s start,
    char_not_in_string """" s ->
    lex_micheline (""""++s++"""") start =
    error.Return ((start, location_add start ((String.length s)+2),
                   (micheline_tokens.STR s)) :: nil).
Proof.
  intros. simpl.
  specialize (lex_string_aux s start (location_incr start) H "").
  simpl. intro.
  rewrite <- location.location_add_incr_S in H0.
  assert (S (length s + 1) = (length s + 2)) as Hlen by omega.
  rewrite Hlen in H0. assumption.
Qed.
back to top