https://github.com/xi-business/infsat
Raw File
Tip revision: 31552cff3d6d4239fcf05c9c201e9c2fa3678b7a authored by Jan Wroblewski on 10 February 2022, 12:43:54 UTC
Removed unusable benchmark and renamed the other g45 one.
Tip revision: 31552cf
cfa.ml
(** Control flow analysis module implementing 0CFA. *)

open Binding
open Grammar
open GrammarCommon
open HGrammar
open Utilities

(* --- types --- *)

module SortedHTermsIds = SortedList.Make(struct
    type t = hterms_id
    let compare = compare
  end)

type hterms_ids = SortedHTermsIds.t

module HTermSet = Set.Make (struct
    type t = hterm
    let compare = compare
  end)

class cfa (hg : hgrammar) = object(self)
  (* --- state of 0CFA --- *)

  (** Queue of hterms to process. Eventually all reachable hterms are processed. Note that aside
      from hterms that are present in the grammar, hterms with more variables can be processed
      too when a partially applied hterm was an argument to a nonterminal, and was substituted for
      a variable in this nonterminal gaining additional arguments. Some extra hterms may be
      processed, since all possibilities are iterated over when encountering a variable, without
      regard for context. *)
  val mutable hterm_queue : hterm list = []

  (** A set of hterms that were already processed kept to not process them again. *)
  val mutable visited_nodes : HTermSet.t = HTermSet.empty
  
  (* --- output of 0CFA --- *)

  (* TODO do cleanup on what is sorted and what has no duplicates. *)

  (** hterms_are_arg[id] contains a boolean whether hterms with given id can possibly be an
      argument to a nonterminal. It contains exactly the ids of hterms may be an argument to
      a nonterminal according to 0CFA, so there may be false positives. *)
  val hterms_are_arg : bool array = Array.make hg#hterms_count false

  (** nt_bindings[nt] contains a list of bindings for nonterminal nt. Each binding corresponds
      to a possible application of nonterminal nt detected by 0CFA and is a list of tuples
      (i, j, id) stating that hterms identified by id are arguments from i-th to j-th (0-indexed)
      in given application.
      In short, this tells which lists hterms flow into which arguments of a nonterminal. *)
  val nt_bindings : hterms_id binding list array = Array.make hg#nt_count []

  (** var_bindings[nt][i] contains a list of hterms that may be i-th argument (0-indexed) to
      nonterminal nt according to 0CFA. *)
  val var_bindings : hterm list array array =
    Array.init hg#nt_count (fun i -> Array.make (hg#nt_arity i) [])

  (** nt_bindings_applied_to_hterms[id] contains tuples of nonterminal ids and hterms bindings
      under which these nonterminals were applied to hterms identified by id. *)
  val nt_bindings_applied_to_hterms : (nt_id * hterms_id binding) list array =
    Array.make hg#hterms_count []

  (** variable_head_nodes[nt][i] is a list of processed hterms that have variable (nt, i) as
      head, i.e., i-th variable in definition of nonterminal nt. Internal for this module. *)
  val variable_head_nodes : hterm list array array = Array.make hg#nt_count [||]

  (** nt_headvars[f] is a list of head variables in nonterminal's definition, i.e., variables
      that are applied to something or the sole variable if the term is an identity. *)
  val nt_headvars : vars array = Array.make hg#nt_count SortedVars.empty

  (** hterms_where_hterms_flow[id] contains a list of ids of hterms that contain a variable that
      could be substituted with a hterm from hterms idenfitied by id. In other words, this is a
      list of ids of hterms where hterms idenfified by id flow. *)
  val hterms_where_hterms_flow : hterms_ids array =
    Array.make hg#hterms_count SortedHTermsIds.empty

  (** hterms_bindings[id] describes which hterms (sequences of terms) substitute which
      variables in hterms identified by id.
      Each binding is a list of tuples (i, j (v, h)) that describes that hterms identified by h
      were arguments from i-th to j-th (0-indexed) to a nonterminal nt in which hterms identified
      by id are defined, and that variables with indexes v (i.e., (nt, x) for all x in v) were
      substituted with hterms identified by h. *)
  val hterms_bindings : hterms_id binding list array = Array.make hg#hterms_count []

  (** hterms_containing_nt[nt] is a list of ids of hterms whose definitions contain nonterminal
      nt and these hterms were used as an argument to some nonterminal. *)
  val hterms_containing_nt : hterms_ids array = Array.make hg#nt_count SortedHTermsIds.empty
  
  (** nt_containing_nt[nt] contains ids of all nonterminals that have nonterminal with id nt
      present in their body. *)
  val nt_containing_nt : nts array = Array.make hg#nt_count SortedNTs.empty

  (* --- access --- *)

  method hterms_are_arg (id : hterms_id) : bool =
    hterms_are_arg.(id)
  
  method nt_has_headvars (nt : nt_id) : bool =
    not @@ SortedVars.is_empty nt_headvars.(nt)

  method lookup_binding_var (nt, i) = 
    var_bindings.(nt).(i)

  method lookup_nt_bindings (nt : nt_id) : hterms_id binding list =
    nt_bindings.(nt)

  (* --- modificaiton --- *)

  method register_variable_head_node (v : var_id) (hterm: hterm) =
    let nt, i = v in
    let a = variable_head_nodes.(nt) in
    a.(i) <- hterm :: a.(i)
    
  (* --- bindings --- *)

  method register_nt_bindings_applied_to_hterms (id : hterms_id) (nt : nt_id)
      (binding : hterms_id binding) =
    let x = nt_bindings_applied_to_hterms.(id) in
    (* TODO make sure there are no copies *)
    nt_bindings_applied_to_hterms.(id) <- (nt, binding) :: x

  method get_nt_bindings_applied_to_hterms (id : hterms_id) : (nt_id * hterms_id binding) list =
    nt_bindings_applied_to_hterms.(id) 

  (** When there was an enqueued node f [id1, id2, ...] for some nonterminal f and args id id1, id2,
      ... and with states qs. ids are converted to a list of arguments through tab_id_terms.
      If we join all these lists, we get arg1, arg2, ..., where the position of each argument is
      position of argument in its idX plus length of arguments of previous lists that id(1..X-1)
      stand for.
      This function prepends to bindings a tuple (A, ref qs), where A is a list of (n, m, id), where
      id is the args_id from tab_id_terms, and n, n+1, ..., m are numbers that represent which
      arguments does that list stand for (e.g., arg3, arg4 in f arg1 arg2 arg3 arg4 arg5). *)
  method insert_nt_binding (args : int list) bindings =
    (* refines a list of hterms by adding information on the first and last index of hterms in
       a concatenated list of all hterms, e.g., [[h1; h2]; [h3]] changes to
       [(0, 1, [h1; h2]); (2, 2, [h3])]. *)
    let rec add_index rho i =
      match rho with
      | [] -> []
      | ids :: rho' -> (* for each args_id *)
        (* check how many terms are under args_id *)
        let n = List.length @@ hg#id2terms ids in
        let j = i + n in
        (i, j - 1, ids) :: add_index rho' j
    in
    let iargs = add_index args 0 in
    iargs::bindings

  (** Prepends term to list terms if it is not already there. Returns tuple of resulting list and
      boolean whether it was prepended to list. *)
  method insert_var_binding (term : 'a) (terms : 'a list) : 'a list * bool =
    if List.mem term terms then
      (terms, false)
    else
      (term :: terms, true)

  (* --- 0CFA main phase --- *)

  method enqueue_hterm hterm =
    hterm_queue <- hterm :: hterm_queue

  method dequeue_hterm =
    match hterm_queue with
    | [] -> None
    | x :: q ->
      hterm_queue <- q;
      Some x

  method expand_varheadnode term (hterm: hterm) =
    let h, termss = hterm in
    match h with
    | HVar x ->
      let (h', terms) = term in
      self#enqueue_hterm (h', terms@termss)
    | _ -> assert false

  (** Iterates (expand_varheadnode term) over everything in variable_head_nodes[nt_id][arg_id] *)
  method expand_varheadnodes nt i term =
    let nodes = variable_head_nodes.(nt).(i) in
    List.iter (self#expand_varheadnode term) nodes

  (** Called when term was i-th argument in a call f arg1 arg2 ...
      It makes sure that var_bindings[f][i] contains terms (H, [ID..]) that were
      i-th argument to nonterminal f, and calls expand_varheadnodes f i term when it was the first
      time adding it to var_bindings. *)
  method register_binding_singlevar nt i term =
    let tab = var_bindings.(nt) in
    (* making sure term is in var_bindings[nt_id][arg_id] *)
    let binds, changed = self#insert_var_binding term tab.(i) in
    if changed then (* if it was added just now *)
      begin
        tab.(i) <- binds; (* persist the addition *)
        self#expand_varheadnodes nt i term
      end

  (** converts rho to lists of args in form (H, [ID..]) as they would appear in
      f arg1 arg2 ...
      after all args_id lists of arguments were concatenated.
      Then it calls
      register_binding_singlevar f i arg_i
      for each arg_i = arg1, arg2, ..., i.e., that arg_i was i-th argument to which f was applied *)
  method register_var_bindings f rho i =
    match rho with
      [] -> ()
    | termsid :: rho' ->
      let hterms = hg#id2hterms termsid in (* convert args_id back to a list of (H, [ID..]) *)
      let hterms' = index_list hterms in (* numbering these terms *)
      List.iter (* for each term register_binding_singlevar f global-numbering-id hterm *)
        (fun (j, hterm)-> self#register_binding_singlevar f (i+j) hterm)
        hterms';
      self#register_var_bindings f rho' (i + List.length hterms) (* recursively *)

  (** Register information that there was a call f args, i.e., save this as a binding in
      nt_bindings and register in hterms_are_arg that args were an argument to nonterminal f. *)
  method register_binding (nt : int) (args : int list) =
    (* nt_bindings[nt] contains a list of
       ((number of first term in app, number of last term in app, args_id for first bundle of terms in app)::..., states under which visited) *)
    List.iter (fun id ->
        hterms_are_arg.(id) <- true
      ) args;
    nt_bindings.(nt) <- self#insert_nt_binding args nt_bindings.(nt);
    self#register_var_bindings nt args 0

  (** Marking in visited_nodes a hterm that started being processed. *)
  method register_hterm (hterm : hterm) = 
    if HTermSet.mem hterm visited_nodes then
      failwith "Registering twice the same hterm.";
    visited_nodes <- HTermSet.add hterm visited_nodes

  (** Enqueueing all argument hterms. The terminal does not matter. *)
  method expand_terminal termss =
    List.iter self#enqueue_hterm termss

  method process_hterm (hterm : hterm) =
    (* each hterm may be processed at most once *)
    if not @@ HTermSet.mem hterm visited_nodes then
      begin
        let h, h_args = hterm in
        (* saving that the hterm is already processed *)
        self#register_hterm hterm;
        (* expanding the calls to see what hterms go in what variables *)
        match h with
        | HT _ ->
          (* decoding arguments as hterm ids into hterms *)
          let _, termss = hg#decompose_hterm hterm in
          (* ignore the terminal and go deeper *)
          self#expand_terminal termss
        | HNT nt ->
          (* Remember in nt_bindings that there was an application f h_args. Also remember
             that h_args were being used as an argument to a nonterminal in hterms_are_arg. *)
          self#register_binding nt h_args;
          (* Enqueue the body hterm, ignoring all arguments. Note that arguments are not enqueued.
             If the arguments don't have kind O, they can be passed as arguments again, or used
             as variable head, which finds the original terms. *)
          self#enqueue_hterm @@ hg#nt_body nt
        | HVar x ->
          (* check what hterms flow into x (these can also be variables) *)
          let x_hterms = self#lookup_binding_var x in
          (* substitute these hterms into x and enqueue resulting application *)
          List.iter (fun (h, x_args) ->
              self#enqueue_hterm (h, x_args @ h_args)
            ) x_hterms;
          self#register_variable_head_node x hterm
      end

  (** Expand hterms in queue until it's empty. *)
  method expand : unit =
    match self#dequeue_hterm with
    | None ->
      print_verbose !Flags.verbose_preprocessing @@ lazy (
        self#string_of_binding_array ^ "\n" ^
        "Size of abstract control flow graph: " ^
        string_of_int (HTermSet.cardinal visited_nodes)
      )
    | Some hterm ->
      self#process_hterm hterm;
      self#expand

  (* --- accessing dependencies --- *)

  method get_hterms_containing_nt (nt : nt_id) =
    hterms_containing_nt.(nt)

  method get_nt_containing_nt (nt : nt_id) : nts =
    nt_containing_nt.(nt)

  method get_hterms_where_hterms_flow id =
    hterms_where_hterms_flow.(id)

  method get_hterms_bindings (id : hterms_id) : hterms_id binding list =
    hterms_bindings.(id)

  (* --- registering dependencies --- *)

  (** Appends hterm id2 to id1's list of hterms_where_hterms_flow. *)
  method register_hterms_where_hterms_flow id1 id2 =
    hterms_where_hterms_flow.(id1) <- SortedHTermsIds.merge hterms_where_hterms_flow.(id1) @@
      SortedHTermsIds.singleton id2

  method register_hterms_bindings id bindings =
    hterms_bindings.(id) <- bindings (* only place with actual modification *)
  
  (* nt occurs in the term id *)
  method private register_hterms_containing_nt nt id =
    (* note that this function can never be called with the same (nt,id) pair *)
    let ids = SortedHTermsIds.merge hterms_containing_nt.(nt) (SortedHTermsIds.singleton id) in
    hterms_containing_nt.(nt) <- ids

  method private register_dep_nt_nt nt1 nt2 =
    let nts = nt_containing_nt.(nt1) in
    let nts' = SortedNTs.merge (SortedNTs.singleton nt2) nts in
    nt_containing_nt.(nt1) <- nts'

  (* --- computing dependencies --- *)

  (** Given a list of variables v1, v2, ... from a nonterminal f and bindings from application
      f as1 as2 ..., where asX translates to lists of terms through tab_id_terms, it returns all
      bindings (i, j, asY) such that i <= vK <= j for some K, i.e., that at least one of terms in
      tab_id_terms[asY] was substituted for some variable in nonterminal f. *)
  method filter_binding (vars : int list) (binding : hterms_id binding) : hterms_id binding =
    match binding with
    | [] -> []
    | (i, j, id) :: binding' ->
      match vars with
      | [] -> []
      | v::vars' ->
        if v < i then
          self#filter_binding vars' binding
        else if v <= j then
          (i, j, id) :: self#filter_binding vars' binding'
        else
          self#filter_binding vars binding'

  (** Computes all hterms identifiers present in a list of binding tuples, without duplicates. *)
  method ids_in_bindings bindings =
    let ids =
      List.fold_left (fun ids binding ->
          List.rev_append (List.map (fun (_, _, id) -> id) binding) ids
        ) [] bindings
    in
    sort_and_delete_duplicates ids

  (** Called for each term with term_id equal to id, that has free variables var, such that this
      term was an argument to a nonterminal. *)
  method mk_binding_depgraph_for_terms id vars =
    if SortedVars.is_empty vars then
      self#register_hterms_bindings id [[]]
    else
      (* figure out in which nonterminal the term is defined using variable id *)
      let f = fst @@ SortedVars.hd vars in
      let vars' = SortedVars.map snd vars in
      let bindings = nt_bindings.(f) in
      let bindings' =
        sort_and_delete_duplicates @@
        List.rev_map (fun binding -> self#filter_binding vars' binding) bindings
      in
      (* asX that are substituted into f's variables *)
      let ids = self#ids_in_bindings bindings' in
      (* register that term with given term id had was present in some nonterminal and
         given term sequences with id asX were substituted for arguments i-j of this
         nonterminal and specifically the list of variables used was vsX *)
      self#register_hterms_bindings id bindings';
      List.iter (fun id1 -> self#register_hterms_where_hterms_flow id1 id) ids
  (* appending current hterm's id to hterms_where_hterms_flow[asX] if f was applied to asX making a
     substitution of some variable in hterm to asX *)
  
  method mk_binding_depgraph_for_nt (nt : int) : hterms_id binding list -> unit =
    (* when no vars are only in arguments of nonterminals and terminals *)
    (* if no variable occurs in the head position, we do not use binding information to compute
       the type of f *)
    List.iter (fun binding ->
        binding |> List.iter (
          (* TODO possible duplicates when a binding has two same hterms *)
          fun (_, _, id) -> self#register_nt_bindings_applied_to_hterms id nt binding
        )
      )

  method compute_dependencies =
    for nt = 0 to hg#nt_count - 1 do
      nt_headvars.(nt) <- hg#headvars_in_nt nt;
      self#mk_binding_depgraph_for_nt nt nt_bindings.(nt)
    done;
    (* make dependency nt --> id (which means "id depends on nt") *)
    for id' = 0 to hg#hterms_count - 1 do (* for each hterm *)
      let id = hg#hterms_count - 1 -id' in
      if hterms_are_arg.(id) then (* that had something applied to it *)
        let nts = hg#nts_in_hterms id in (* list of used nonterminals *)
        SortedNTs.iter (fun nt -> self#register_hterms_containing_nt nt id) nts
    done;
    for id = 0 to hg#hterms_count - 1 do
      if hterms_are_arg.(id) then
        let vars = hg#id2vars id in (* for each term with given id that was an argument
                                       to a nonterminal and had free variables vars *)
        self#mk_binding_depgraph_for_terms id vars
    done;
    (* make dependency nt1 --> nt2 (which means "nt2 depends on nt1") *)
    for nt1 = 0 to hg#nt_count - 1 do
      let nts = hg#nts_in_nt nt1 in
      nts |> SortedNTs.iter (fun nt2 -> self#register_dep_nt_nt nt2 nt1)
    done;
    print_verbose !Flags.verbose_preprocessing @@ lazy (
      self#string_of_hterms_bindings ^
      "\n\nNonterminals with head variables: " ^
      (concat_map ", " hg#nt_name @@ List.filter_map (fun nt ->
           if self#nt_has_headvars nt then
             None
           else
             Some nt
         ) @@ range 0 @@ hg#nt_count) ^
      "\n"
    )

  (* --- printing --- *)

  method string_of_binding (binding : hterms_id binding) : string=
    String.concat ", " @@ List.map (fun (i, j, id) ->
        string_of_int i ^ "-" ^ string_of_int j ^ " <- " ^ string_of_int id
      ) binding

  method string_of_binding_array : string =
    "bindings (nt -> bindings, one per line, comma-sep same-nt parts):\n" ^
    String.concat "\n\n" @@ array_listmap nt_bindings (fun nt binding ->
        hg#nt_name nt ^ ":\n" ^
        String.concat "\n" @@ List.map self#string_of_binding (nt_bindings).(nt)
      )

  method string_of_hterms_bindings : string =
    "Dependency info (hterms_id -> [(from,to,hterms_id); ...]):\n" ^
    String.concat "\n" @@ List.filter (fun s -> s <> "") @@
    array_listmap hterms_bindings (fun id binding ->
        if hterms_are_arg.(id) then
          string_of_int id ^ ":\n" ^
          concat_map "\n" self#string_of_binding hterms_bindings.(id)
        else
          ""
      )

  initializer
    for i = 0 to hg#nt_count - 1 do
      variable_head_nodes.(i) <- Array.make (hg#nt_arity i) []
    done;
    (* enqueue the first nonterminal with no args *)
    self#enqueue_hterm (HNT 0, [])
end
back to top