(* --------------------------------------------------------------------
 * Copyright (c) - 2012--2016 - IMDEA Software Institute
 * Copyright (c) - 2012--2021 - Inria
 * Copyright (c) - 2012--2021 - Ecole Polytechnique
 *
 * Distributed under the terms of the CeCILL-C-V1 license
 * -------------------------------------------------------------------- *)

(* -------------------------------------------------------------------- *)
{
  open EcUtils
  open EcParser

  module BI = EcBigInt
  module L  = EcLocation

  (* ------------------------------------------------------------------ *)
  exception LexicalError of L.t option * string

  let pp_lex_error fmt msg =
    Format.fprintf fmt "parse error: %s" msg

  let () =
    let pp fmt exn =
      match exn with
      | LexicalError (_, msg) -> pp_lex_error fmt msg
      | _ -> raise exn
    in
      EcPException.register pp

  (* ------------------------------------------------------------------ *)
  let lex_error lexbuf msg =
    raise (LexicalError (Some (L.of_lexbuf lexbuf), msg))

  let unterminated_comment () =
    raise (LexicalError (None, "unterminated comment"))

  let unterminated_string () =
    raise (LexicalError (None, "unterminated string"))

  (* ------------------------------------------------------------------ *)
  let _keywords = [                     (* see [keywords.py] *)
    "admit"       , ADMIT      ;        (* KW: dangerous *)
    "admitted"    , ADMITTED   ;        (* KW: dangerous *)

    "forall"      , FORALL     ;        (* KW: prog *)
    "exists"      , EXIST      ;        (* KW: prog *)
    "fun"         , FUN        ;        (* KW: prog *)
    "glob"        , GLOB       ;        (* KW: prog *)
    "let"         , LET        ;        (* KW: prog *)
    "in"          , IN         ;        (* KW: prog *)
    "for"         , FOR        ;        (* KW: prog *)
    "var"         , VAR        ;        (* KW: prog *)
    "proc"        , PROC       ;        (* KW: prog *)
    "if"          , IF         ;        (* KW: prog *)
    "is"          , IS         ;        (* KW: prog *)
    "match"       , MATCH      ;        (* KW: prog *)
    "then"        , THEN       ;        (* KW: prog *)
    "else"        , ELSE       ;        (* KW: prog *)
    "elif"        , ELIF       ;        (* KW: prog *)
    "match"       , MATCH      ;        (* KW: prog *)
    "for"         , FOR        ;        (* KW: prog *)
    "while"       , WHILE      ;        (* KW: prog *)
    "assert"      , ASSERT     ;        (* KW: prog *)
    "return"      , RETURN     ;        (* KW: prog *)
    "res"         , RES        ;        (* KW: prog *)
    "equiv"       , EQUIV      ;        (* KW: prog *)
    "hoare"       , HOARE      ;        (* KW: prog *)
    "phoare"      , PHOARE     ;        (* KW: prog *)
    "islossless"  , LOSSLESS   ;        (* KW: prog *)
    "async"       , ASYNC      ;        (* KW: prog *)

    "try"         , TRY        ;        (* KW: tactical *)
    "first"       , FIRST      ;        (* KW: tactical *)
    "last"        , LAST       ;        (* KW: tactical *)
    "do"          , DO         ;        (* KW: tactical *)
    "strict"      , STRICT     ;        (* KW: tactical *)
    "expect"      , EXPECT     ;        (* KW: tactical *)

    (* Lambda tactics *)
    "beta"        , BETA       ;        (* KW: tactic *)
    "iota"        , IOTA       ;        (* KW: tactic *)
    "zeta"        , ZETA       ;        (* KW: tactic *)
    "eta"         , ETA        ;        (* KW: tactic *)
    "logic"       , LOGIC      ;        (* KW: tactic *)
    "delta"       , DELTA      ;        (* KW: tactic *)
    "simplify"    , SIMPLIFY   ;        (* KW: tactic *)
    "cbv"         , CBV        ;        (* KW: tactic *)
    "congr"       , CONGR      ;        (* KW: tactic *)

    (* Logic tactics *)
    "change"      , CHANGE     ;        (* KW: tactic *)
    "split"       , SPLIT      ;        (* KW: tactic *)
    "left"        , LEFT       ;        (* KW: tactic *)
    "right"       , RIGHT      ;        (* KW: tactic *)
    "case"        , CASE       ;        (* KW: tactic *)

    "pose"        , POSE       ;        (* KW: tactic *)
    "gen"         , GEN        ;        (* KW: tactic *)
    "have"        , HAVE       ;        (* KW: tactic *)
    "suff"        , SUFF       ;        (* KW: tactic *)
    "elim"        , ELIM       ;        (* KW: tactic *)
    "exlim"       , EXLIM      ;        (* KW: tactic *)
    "ecall"       , ECALL      ;        (* KW: tactic *)
    "clear"       , CLEAR      ;        (* KW: tactic *)
    "wlog"        , WLOG       ;        (* KW: tactic *)

    (* Auto tactics *)
    "apply"       , APPLY      ;        (* KW: tactic *)
    "rewrite"     , REWRITE    ;        (* KW: tactic *)
    "rwnormal"    , RWNORMAL   ;        (* KW: tactic *)
    "subst"       , SUBST      ;        (* KW: tactic *)
    "progress"    , PROGRESS   ;        (* KW: tactic *)
    "trivial"     , TRIVIAL    ;        (* KW: tactic *)
    "auto"        , AUTO       ;        (* KW: tactic *)

    (* Other tactics *)
    "idtac"       , IDTAC      ;        (* KW: tactic *)
    "move"        , MOVE       ;        (* KW: tactic *)
    "modpath"     , MODPATH    ;        (* KW: tactic *)
    "field"       , FIELD `Raw ;        (* KW: tactic *)
    "fieldeq"     , FIELD `Eq  ;        (* KW: tactic *)
    "ring"        , RING `Raw  ;        (* KW: tactic *)
    "ringeq"      , RING `Eq   ;        (* KW: tactic *)
    "algebra"     , ALGNORM    ;        (* KW: tactic *)

    "exact"       , EXACT      ;        (* KW: bytac *)
    "assumption"  , ASSUMPTION ;        (* KW: bytac *)
    "smt"         , SMT        ;        (* KW: bytac *)
    "by"          , BY         ;        (* KW: bytac *)
    "reflexivity" , REFLEX     ;        (* KW: bytac *)
    "done"        , DONE       ;        (* KW: bytac *)
    "solve"       , SOLVE      ;        (* KW: bytac *)

    (* PHL: tactics *)
    "replace"     , REPLACE    ;        (* KW: tactic *)
    "transitivity", TRANSITIVITY;       (* KW: tactic *)
    "symmetry"    , SYMMETRY   ;        (* KW: tactic *)
    "seq"         , SEQ        ;        (* KW: tactic *)
    "wp"          , WP         ;        (* KW: tactic *)
    "sp"          , SP         ;        (* KW: tactic *)
    "sim"         , SIM        ;        (* KW: tactic *)
    "skip"        , SKIP       ;        (* KW: tactic *)
    "call"        , CALL       ;        (* KW: tactic *)
    "rcondt"      , RCONDT     ;        (* KW: tactic *)
    "rcondf"      , RCONDF     ;        (* KW: tactic *)
    "swap"        , SWAP       ;        (* KW: tactic *)
    "cfold"       , CFOLD      ;        (* KW: tactic *)
    "rnd"         , RND        ;        (* KW: tactic *)
    "pr_bounded"  , PRBOUNDED  ;        (* KW: tactic *)
    "bypr"        , BYPR       ;        (* KW: tactic *)
    "byphoare"    , BYPHOARE   ;        (* KW: tactic *)
    "byequiv"     , BYEQUIV    ;        (* KW: tactic *)
    "fel"         , FEL        ;        (* KW: tactic *)

    "conseq"      , CONSEQ     ;        (* KW: tactic *)
    "exfalso"     , EXFALSO    ;        (* KW: tactic *)
    "inline"      , INLINE     ;        (* KW: tactic *)
    "interleave"  , INTERLEAVE ;        (* KW: tactic *)
    "alias"       , ALIAS      ;        (* KW: tactic *)
    "fission"     , FISSION    ;        (* KW: tactic *)
    "fusion"      , FUSION     ;        (* KW: tactic *)
    "unroll"      , UNROLL     ;        (* KW: tactic *)
    "splitwhile"  , SPLITWHILE ;        (* KW: tactic *)
    "kill"        , KILL       ;        (* KW: tactic *)
    "eager"       , EAGER      ;        (* KW: tactic *)

    "axiom"       , AXIOM      ;        (* KW: global *)
    "axiomatized" , AXIOMATIZED;        (* KW: global *)
    "lemma"       , LEMMA      ;        (* KW: global *)
    "realize"     , REALIZE    ;        (* KW: global *)
    "proof"       , PROOF      ;        (* KW: global *)
    "qed"         , QED        ;        (* KW: global *)
    "abort"       , ABORT      ;        (* KW: global *)
    "goal"        , GOAL       ;        (* KW: global *)
    "end"         , END        ;        (* KW: global *)
    "from"        , FROM       ;        (* KW: global *)
    "import"      , IMPORT     ;        (* KW: global *)
    "export"      , EXPORT     ;        (* KW: global *)
    "include"     , INCLUDE    ;        (* KW: global *)
    "local"       , LOCAL      ;        (* KW: global *)
    "declare"     , DECLARE    ;        (* KW: global *)
    "hint"        , HINT       ;        (* KW: global *)
    "nosmt"       , NOSMT      ;        (* KW: global *)
    "module"      , MODULE     ;        (* KW: global *)
    "of"          , OF         ;        (* KW: global *)
    "const"       , CONST      ;        (* KW: global *)
    "op"          , OP         ;        (* KW: global *)
    "pred"        , PRED       ;        (* KW: global *)
    "inductive"   , INDUCTIVE  ;        (* KW: global *)
    "notation"    , NOTATION   ;        (* KW: global *)
    "abbrev"      , ABBREV     ;        (* KW: global *)
    "require"     , REQUIRE    ;        (* KW: global *)
    "theory"      , THEORY     ;        (* KW: global *)
    "abstract"    , ABSTRACT   ;        (* KW: global *)
    "section"     , SECTION    ;        (* KW: global *)
    "type"        , TYPE       ;        (* KW: global *)
    "class"       , CLASS      ;        (* KW: global *)
    "instance"    , INSTANCE   ;        (* KW: global *)
    "print"       , PRINT      ;        (* KW: global *)
    "search"      , SEARCH     ;        (* KW: global *)
    "locate"      , LOCATE     ;        (* KW: global *)
    "as"          , AS         ;        (* KW: global *)
    "Pr"          , PR         ;        (* KW: global *)
    "clone"       , CLONE      ;        (* KW: global *)
    "with"        , WITH       ;        (* KW: global *)
    "rename"      , RENAME     ;        (* KW: global *)
    "prover"      , PROVER     ;        (* KW: global *)
    "timeout"     , TIMEOUT    ;        (* KW: global *)
    "why3"        , WHY3       ;        (* KW: global *)
    "dump"        , DUMP       ;        (* KW: global *)
    "remove"      , REMOVE     ;        (* KW: global *)

    "time"        , TIME       ;        (* KW: internal *)
    "undo"        , UNDO       ;        (* KW: internal *)
    "debug"       , DEBUG      ;        (* KW: internal *)
    "pragma"      , PRAGMA     ;        (* KW: internal *)

    "Top"         , TOP        ;        (* KW: global *)
    "Self"        , SELF       ;        (* KW: global *)
  ]

  (* ------------------------------------------------------------------ *)
  let _operators = [
    (":"   , (COLON            , true ));
    ("#"   , (SHARP            , true ));
    ("#|"   ,(SHARPPIPE        , true ));
    ("//"  , (SLASHSLASH       , true ));
    ("//#" , (SLASHSLASHSHARP  , true ));
    ("/="  , (SLASHEQ          , true ));
    ("/#"  , (SLASHSHARP       , true ));
    ("//=" , (SLASHSLASHEQ     , true ));
    ("/>"  , (SLASHGT          , true ));
    ("|>"  , (PIPEGT           , true ));
    ("//>" , (SLASHSLASHGT     , true ));
    ("||>" , (PIPEPIPEGT       , true ));
    ("=>"  , (IMPL             , true ));
    ("|"   , (PIPE             , true ));
    (":="  , (CEQ              , true ));
    ("/"   , (SLASH            , true ));
    ("\\"  , (BACKSLASH        , true ));
    ("<-"  , (LARROW           , true ));
    ("->"  , (RARROW           , true ));
    ("<<-" , (LLARROW          , true ));
    ("->>" , (RRARROW          , true ));
    ("!"   , (NOT              , false));
    ("^"   , (HAT              , false));
    ("&"   , (AMP              , false));
    ("&&"  , (ANDA             , false));
    ("/\\" , (AND              , false));
    ("||"  , (ORA              , false));
    ("\\/" , (OR               , false));
    ("<=>" , (IFF              , false));
    ("%"   , (PCENT            , false));
    ("+"   , (PLUS             , false));
    ("-"   , (MINUS            , false));
    ("*"   , (STAR             , false));
    ("<<"  , (BACKS            , false));
    (">>"  , (FWDS             , false));
    ("<:"  , (LTCOLON          , false));
    ("==>" , (LONGARROW        , false));
    ("="   , (EQ               , false));
    ("<>"  , (NE               , false));
    (">"   , (GT               , false));
    ("<"   , (LT               , false));
    (">="  , (GE               , false));
    ("<="  , (LE               , false));
    ("<*>" , (LTSTARGT         , false));
    ("<<*>", (LTLTSTARGT       , false));
    ("<*>>", (LTSTARGTGT       , false));
  ]

  (* ------------------------------------------------------------------ *)
  let keywords =
    let table = Hashtbl.create 0 in
    List.iter (curry (Hashtbl.add table)) _keywords; table

  (* ------------------------------------------------------------------ *)
  let operators =
    let table = Hashtbl.create 0 in
    List.iter (curry (Hashtbl.add table)) _operators; table

  let opre =
    let ops = List.map fst (List.filter (snd |- snd) _operators) in
    let ops = List.ksort ~key:(String.length) ~cmp:compare ~rev:true ops in
    let ops = String.join "|" (List.map EcRegexp.quote ops) in
    let ops = Printf.sprintf "(%s)" ops in
    EcRegexp.regexp ops

  (* ----------------------------------------------------------------- *)
  let lex_std_op ?name op =
    match op.[0] with
    | '=' | '<' | '>'       -> LOP1 (name |> odfl op)
    | '+' | '-' | '|'       -> LOP2 (name |> odfl op)
    | '*' | '/' | '&' | '%' -> LOP3 (name |> odfl op)
    | _                     -> LOP4 (name |> odfl op)

  (* ------------------------------------------------------------------ *)
  let lex_operators (op : string) =
    let baseop (op : string) =
      try  fst (Hashtbl.find operators op)
      with Not_found ->
        if   EcRegexp.match_ (`S "^:+$") op
        then ROP4 op else begin
          if   EcRegexp.match_ (`S "^%+.$") op
          then lex_std_op ~name:op (String.make 1 op.[String.length op -1])
          else raise Not_found
        end
    in
      try  [baseop op]
      with Not_found ->
        List.map
        (function
          | EcRegexp.Delim op ->
              fst (Hashtbl.find operators op)
          | EcRegexp.Text op ->
              try baseop op with Not_found -> lex_std_op op)
        (EcRegexp.split (`C opre) op)

  (* ------------------------------------------------------------------ *)
  let lex_tick_operator (op : string) =
    let name = Printf.sprintf "`%s`" op in
    lex_std_op ~name op
}

let empty   = ""
let blank   = [' ' '\t' '\r']
let newline = '\n'
let upper   = ['A'-'Z']
let lower   = ['a'-'z']
let letter  = upper | lower
let digit   = ['0'-'9']
let uint    = digit+

let ichar  = (letter | digit | '_' | '\'')
let lident = (lower ichar*) | ('_' ichar+)
let uident = upper ichar*
let tident = '\'' lident
let mident = '&'  (lident | uint)

let opchar = ['=' '<' '>' '+' '-' '*' '/' '\\' '%' '&' '^' '|' ':' '#' '$']

let sop = opchar+ | '`' opchar+ '`'
let nop = '\\' ichar+

let uniop = nop | ['-' '+']+ | '!'
let binop = sop | nop
let numop = '\'' digit+

(* -------------------------------------------------------------------- *)
rule main = parse
  | newline      { Lexing.new_line lexbuf; main lexbuf }
  | blank+       { main lexbuf }
  | lident as id { try [Hashtbl.find keywords id] with Not_found -> [LIDENT id] }
  | uident as id { try [Hashtbl.find keywords id] with Not_found -> [UIDENT id] }
  | tident       { [TIDENT (Lexing.lexeme lexbuf)] }
  | mident       { [MIDENT (Lexing.lexeme lexbuf)] }
  | uint         { [UINT (BI.of_string (Lexing.lexeme lexbuf))] }

  | (digit+ as n) '.' (digit+ as f) {
      let nv, fv = BI.of_string n, BI.of_string f in
      [DECIMAL (nv, (String.length f, fv))]
    }

  | "(*" binop "*)" { main lexbuf }
  | '(' blank* (binop as s) blank* ')' { [PBINOP s] }
  | '(' blank* (numop as s) blank* ')' { [PNUMOP s] }

  | '[' blank* (uniop as s) blank* ']' {
      let name = Printf.sprintf "[%s]" s in
      try
        let (tk, ok) = Hashtbl.find operators s in
          if ok then [LBRACKET; tk; RBRACKET] else [PUNIOP name]
      with Not_found -> [PUNIOP name]
  }

  | "(*" { comment lexbuf; main lexbuf }
  | "\"" { [STRING (Buffer.contents (string (Buffer.create 0) lexbuf))] }

  (* string symbols *)
  | ".."    { [DOTDOT   ] }
  | ".["    { [DLBRACKET] }
  | ".`"    { [DOTTICK  ] }
  | "{0,1}" { [RBOOL    ] }

  (* punctuation *)
  | '_'   { [UNDERSCORE] }
  | "#<"  { [DASHLT    ] }
  | '('   { [LPAREN    ] }
  | ')'   { [RPAREN    ] }
  | '{'   { [LBRACE    ] }
  | '}'   { [RBRACE    ] }
  | '['   { [LBRACKET  ] }
  | ']'   { [RBRACKET  ] }
  | ','   { [COMMA     ] }
  | ';'   { [SEMICOLON ] }
  | '?'   { [QUESTION  ] }
  | "~"   { [TILD      ] }
  | "!"   { [NOT       ] }
  | "@"   { [AT        ] }
  | "{|"  { [LPBRACE   ] }
  | "|}"  { [RPBRACE   ] }
  | "`|"  { [TICKPIPE  ] }
  | "<$"  { [LESAMPLE  ] }
  | "<@"  { [LEAT      ] }
  | ":~"  { [COLONTILD ] }

  | "/~="  { [SLASHTILDEQ     ] }
  | "//~=" { [SLASHSLASHTILDEQ] }

  (* operators *)
  | nop as x { [NOP x] }

  | '`' (opchar+ as op) '`' {
      [lex_tick_operator op]
    }

  | opchar as op {
      let op = operator (Buffer.from_char op) lexbuf in
      lex_operators (Buffer.contents op)
    }

  | numop as op {
      [NUMOP op]
    }

  (* end of sentence / stream *)
  | '.' (eof | blank | newline as r) {
      if r = "\n" then
        Lexing.new_line lexbuf;
      let lc = Lexing.lexeme_start_p lexbuf in
      let lc = { lc with pos_cnum = lc.pos_cnum + 1; } in
      [FINAL lc]
    }

  | "." { [DOT] }

  | eof { [EOF] }

  (* errors *)

  | '\'' uident     { lex_error lexbuf "type variables must be low-identifiers" }
  | ['\'' '`'] as c { lex_error lexbuf (Printf.sprintf "illegal use of character: %c" c) }
  |  _         as c { lex_error lexbuf (Printf.sprintf "illegal character: %c" c) }

and comment = parse
  | "*)"        { () }
  | "(*"        { comment lexbuf; comment lexbuf }
  | newline     { Lexing.new_line lexbuf; comment lexbuf }
  | eof         { unterminated_comment () }
  | _           { comment lexbuf }

and string buf = parse
  | "\""          { buf }
  | "\\n"         { Buffer.add_char buf '\n'; string buf lexbuf }
  | "\\r"         { Buffer.add_char buf '\r'; string buf lexbuf }
  | "\\" (_ as c) { Buffer.add_char buf c   ; string buf lexbuf }
  | newline       { Buffer.add_string buf (Lexing.lexeme lexbuf); string buf lexbuf }
  | _ as c        { Buffer.add_char buf c   ; string buf lexbuf }
  | eof           { unterminated_string () }

and operator buf = parse
  | opchar* as x { Buffer.add_string buf x; buf }