https://bitbucket.org/tabourier/rankmerging/src/master/
Raw File
Tip revision: 9693ded52a16a97b89bb3f7f252bf35d2dc26416 authored by Lionel on 23 April 2020, 15:23:55 UTC
ajouts licence
Tip revision: 9693ded
merge_test.ml
(* ocamlopt -o Merge_test str.cmxa unix.cmxa nums.cmxa merge_test.ml *)

(* ***** author and license ***** *)
(* written by Lionel Tabourier, code under the terms of the Creative Commons 4.0 CC-BY License *)


Random.self_init ();;

let ios x = int_of_string x;;
let soi x = string_of_int x;;
let fos x = float_of_string x;;
let foi x = float_of_int x;;
let iof x = int_of_float x;;

(* ***** input definitions ***** *) 

let in_num_nodes = (ios Sys.argv.(1));;
let in_num_pred = (ios Sys.argv.(2));;
let in_training = Sys.argv.(3);;
let in_size_ratio = (fos Sys.argv.(4));;
let in_num_ranking = (ios Sys.argv.(5));;
let in_list_ranking = Sys.argv.(6);;
let in_external_key = Sys.argv.(7);;


(* ***** general tools ***** *)

(* print_output_list : stream lst -> () *)
(* print_output_list output_channel lst : prints list lst in stream output_channel *)
let rec print_output_list output_channel = function
  | [] -> ()
  | x::l -> 
    begin 
      output_string output_channel (soi x); 
      output_string output_channel "\t";
      print_output_list output_channel l; 
    end;;

(* print_output_table_plus : stream Table[int][int] -> () *)
(* print_output_table_plus output_channel arr lim : prints table arr in stream output_channel *)
let print_output_table_plus output_channel arr lim = 
  Array.iteri (fun i x -> 
    if (i < lim) then
      begin
	Array.iter (fun y ->
	  if (y <> -1) then
	    begin 
	      output_string output_channel (soi y);
	      output_string output_channel "\t";
	    end) x;
	output_string output_channel "\n";
      end) arr;;


(* ***** reading files ***** *)

(* checker : string -> bool *)
(* checker s : yields true when string s is not "" *)
let checker s = (s <> "" || s = "");;

(* max_line : string -> int *)
(* max_line name : yields the number of non-empty lines in the file name *)
let max_line name =
  let data = open_in name in
  let maximum = ref 0 in
  begin
    try
      while (checker (input_line data) = true) do
	maximum := !maximum +1;
      done;
    with | End_of_file -> close_in data;
  end;
  !maximum;;

(* standard_reader : string -> List[int] *)
(* standard_reader s : yields a list of integers corresponding to the content of string s with a split at each ' ' or '\t' character *)
let standard_reader s =
  let r = Str.regexp "[ \t]+" in
  let l = Str.split r s in
  List.map (fun st -> int_of_string st) l;;

(* rank_reader : string -> List[string] *)
(* rank_reader s : yields a list of strings corresponding to the content of s with a split at each ' ' or '\t' character *)
let rank_reader s =
  let r = Str.regexp "[ \t]+" in
  let l = Str.split r s in
    l;;

(* list_arg_reader : string -> List[string] *)
(* list_arg_reader s : yields a list of strings corresponding to the content of s with a split at each ',' character *)
let list_arg_reader s =
  let r = Str.regexp "[,]+" in
  let l = Str.split r s in
  l;;

(* ranking_maker : string int -> List[Table[int]] *)
(* ranking_maker names num_nodes : yields a list of tables, each table containing 2 or 3 columns corresponding to fields contained in the files which names are separated with ',' in names , fields number 1 and 2 must be in the range [0:num_nodes-1] *)
let ranking_maker names num_nodes =
  let name_list = list_arg_reader names in
  let num_ranking = List.length name_list in
  let ranking_list = ref [] in
  for i=0 to num_ranking-1
  do
    let name_i = List.nth name_list i in
    let length_ranking = max_line name_i in
    let ranking_i = Array.make_matrix length_ranking 3 (-1) in
    let data_i = open_in name_i in
    let rank_i = ref 0 in
    begin
      try
	while true do
	  begin
            let line_i = rank_reader (input_line data_i) in
	    let node_1_i = (ios (List.nth line_i 0)) in
	    let node_2_i = (ios (List.nth line_i 1)) in
	    ranking_i.(!rank_i).(0) <- node_1_i;
 	    ranking_i.(!rank_i).(1) <- node_2_i;
	    if (List.length line_i > 2) then
	      begin
		let score_i = (ios (List.nth line_i 2)) in
 		ranking_i.(!rank_i).(2) <- score_i;
	      end;
	    rank_i := !rank_i +1;
	  end
	done
      with | End_of_file -> close_in data_i;
    end;
    ranking_list := ranking_i :: !ranking_list;
  done;
  List.rev (!ranking_list);; 


(* ***** test phase ***** *)

let test_loop name_training ranking_list num_pred num_nodes num_ranking size_ratio external_key =

  let size_mixed_ranking =  2*num_pred in
  let mixed_ranking = Array.make_matrix size_mixed_ranking 3 0 in
  let counter_ranking = ref 0 in
  let ranked_ael = Array.make num_nodes [] in
  let category = ref 0 in
  let line = ref [] in
  let previous_line = ref [] in

  (* *** initialization *** *)

  (* scores initialization *)
  let sumscore_ranking_mixing = ref 0 in
  let sumscore_list = ref [] in
  for i=1 to num_ranking
  do 
    sumscore_list := 0 :: !sumscore_list 
  done;

  (* index initilization *)
  let index_list = ref [] in
  let maj_index_list = ref [] in
  for i=1 to num_ranking
  do 
    maj_index_list := 0 :: !maj_index_list;
  done;

  (* *** merging ranking according to files read *** *)

  let data = open_in name_training in
  begin
    try
      while (true && !counter_ranking < num_pred)
      do
	begin

	  (* le fichier de training contient des champs inutiles, on s'arrête alors à num_ranking élts *)
	  (* on ajoute une division par le rapport de taille entre training set et test set *)

	  let raw_line = ref (standard_reader (input_line data)) in  
	  for i=0 to num_ranking-1
	  do
	    let stop = List.hd !raw_line in
	    line := (iof (floor ((foi stop) *. size_ratio))) :: !line;
	    raw_line := List.tl !raw_line;
	  done;
	  line := List.rev !line;

	  if (!line <> !previous_line) (* if the ratio causes the prediction to be identical *)
	  then
	    begin
	      previous_line := !line;
	      index_list := !maj_index_list;
	      maj_index_list := [];
	      category := 0;

	      while (!line <> [] && !counter_ranking < num_pred)
	      do
		begin
		  let stop = List.hd !line in
		  let index = ref (List.hd !index_list) in
		  let ranking = List.nth ranking_list !category in
		  while (!index < stop && !index < Array.length ranking)  (* increase index and add links as long as no new link *)
		  do
		    begin
		      let node_1 = ranking.(!index).(0) in
		      let node_2 = ranking.(!index).(1) in
		      let score = ranking.(!index).(2) in
		      if (List.mem node_2 ranked_ael.(node_1) == false)
		      then
			begin
			  sumscore_ranking_mixing := !sumscore_ranking_mixing + score;
			  let inter_1 = ranked_ael.(node_1) in
			  ranked_ael.(node_1) <- node_2 :: inter_1;
			  let inter_2 = ranked_ael.(node_2) in
			  ranked_ael.(node_2) <- node_1 :: inter_2;
			  mixed_ranking.(!counter_ranking).(0) <- node_1;
			  mixed_ranking.(!counter_ranking).(1) <- node_2;
			  mixed_ranking.(!counter_ranking).(2) <- score;
			  counter_ranking := !counter_ranking +1;
			end;
		      index := !index +1;
		    end
		  done;
		  line := List.tl !line;
		  index_list := List.tl !index_list;
		  category := !category +1;
		  maj_index_list := !index :: !maj_index_list;
		end
	      done;
	      maj_index_list := List.rev !maj_index_list;
	    end
	  else 
	    line := []
	end
      done
    with | End_of_file -> close_in data;
  end;
  mixed_ranking;;


(* ***** execution ***** *)

let my_list_ranking = ranking_maker in_list_ranking in_num_nodes;;

let my_mixed_ranking = test_loop in_training my_list_ranking in_num_pred in_num_nodes in_num_ranking in_size_ratio in_external_key;;

let output_ranking_name = "./ranking_"^in_external_key^".txt" in
let output_ranking_channel = open_out output_ranking_name in
print_output_table_plus output_ranking_channel my_mixed_ranking in_num_pred;
close_out output_ranking_channel;;

back to top