https://bitbucket.org/tabourier/rankmerging/src/master/
Tip revision: 9693ded52a16a97b89bb3f7f252bf35d2dc26416 authored by Lionel on 23 April 2020, 15:23:55 UTC
ajouts licence
ajouts licence
Tip revision: 9693ded
merge_test.ml
(* ocamlopt -o Merge_test str.cmxa unix.cmxa nums.cmxa merge_test.ml *)
(* ***** author and license ***** *)
(* written by Lionel Tabourier, code under the terms of the Creative Commons 4.0 CC-BY License *)
Random.self_init ();;
let ios x = int_of_string x;;
let soi x = string_of_int x;;
let fos x = float_of_string x;;
let foi x = float_of_int x;;
let iof x = int_of_float x;;
(* ***** input definitions ***** *)
let in_num_nodes = (ios Sys.argv.(1));;
let in_num_pred = (ios Sys.argv.(2));;
let in_training = Sys.argv.(3);;
let in_size_ratio = (fos Sys.argv.(4));;
let in_num_ranking = (ios Sys.argv.(5));;
let in_list_ranking = Sys.argv.(6);;
let in_external_key = Sys.argv.(7);;
(* ***** general tools ***** *)
(* print_output_list : stream lst -> () *)
(* print_output_list output_channel lst : prints list lst in stream output_channel *)
let rec print_output_list output_channel = function
| [] -> ()
| x::l ->
begin
output_string output_channel (soi x);
output_string output_channel "\t";
print_output_list output_channel l;
end;;
(* print_output_table_plus : stream Table[int][int] -> () *)
(* print_output_table_plus output_channel arr lim : prints table arr in stream output_channel *)
let print_output_table_plus output_channel arr lim =
Array.iteri (fun i x ->
if (i < lim) then
begin
Array.iter (fun y ->
if (y <> -1) then
begin
output_string output_channel (soi y);
output_string output_channel "\t";
end) x;
output_string output_channel "\n";
end) arr;;
(* ***** reading files ***** *)
(* checker : string -> bool *)
(* checker s : yields true when string s is not "" *)
let checker s = (s <> "" || s = "");;
(* max_line : string -> int *)
(* max_line name : yields the number of non-empty lines in the file name *)
let max_line name =
let data = open_in name in
let maximum = ref 0 in
begin
try
while (checker (input_line data) = true) do
maximum := !maximum +1;
done;
with | End_of_file -> close_in data;
end;
!maximum;;
(* standard_reader : string -> List[int] *)
(* standard_reader s : yields a list of integers corresponding to the content of string s with a split at each ' ' or '\t' character *)
let standard_reader s =
let r = Str.regexp "[ \t]+" in
let l = Str.split r s in
List.map (fun st -> int_of_string st) l;;
(* rank_reader : string -> List[string] *)
(* rank_reader s : yields a list of strings corresponding to the content of s with a split at each ' ' or '\t' character *)
let rank_reader s =
let r = Str.regexp "[ \t]+" in
let l = Str.split r s in
l;;
(* list_arg_reader : string -> List[string] *)
(* list_arg_reader s : yields a list of strings corresponding to the content of s with a split at each ',' character *)
let list_arg_reader s =
let r = Str.regexp "[,]+" in
let l = Str.split r s in
l;;
(* ranking_maker : string int -> List[Table[int]] *)
(* ranking_maker names num_nodes : yields a list of tables, each table containing 2 or 3 columns corresponding to fields contained in the files which names are separated with ',' in names , fields number 1 and 2 must be in the range [0:num_nodes-1] *)
let ranking_maker names num_nodes =
let name_list = list_arg_reader names in
let num_ranking = List.length name_list in
let ranking_list = ref [] in
for i=0 to num_ranking-1
do
let name_i = List.nth name_list i in
let length_ranking = max_line name_i in
let ranking_i = Array.make_matrix length_ranking 3 (-1) in
let data_i = open_in name_i in
let rank_i = ref 0 in
begin
try
while true do
begin
let line_i = rank_reader (input_line data_i) in
let node_1_i = (ios (List.nth line_i 0)) in
let node_2_i = (ios (List.nth line_i 1)) in
ranking_i.(!rank_i).(0) <- node_1_i;
ranking_i.(!rank_i).(1) <- node_2_i;
if (List.length line_i > 2) then
begin
let score_i = (ios (List.nth line_i 2)) in
ranking_i.(!rank_i).(2) <- score_i;
end;
rank_i := !rank_i +1;
end
done
with | End_of_file -> close_in data_i;
end;
ranking_list := ranking_i :: !ranking_list;
done;
List.rev (!ranking_list);;
(* ***** test phase ***** *)
let test_loop name_training ranking_list num_pred num_nodes num_ranking size_ratio external_key =
let size_mixed_ranking = 2*num_pred in
let mixed_ranking = Array.make_matrix size_mixed_ranking 3 0 in
let counter_ranking = ref 0 in
let ranked_ael = Array.make num_nodes [] in
let category = ref 0 in
let line = ref [] in
let previous_line = ref [] in
(* *** initialization *** *)
(* scores initialization *)
let sumscore_ranking_mixing = ref 0 in
let sumscore_list = ref [] in
for i=1 to num_ranking
do
sumscore_list := 0 :: !sumscore_list
done;
(* index initilization *)
let index_list = ref [] in
let maj_index_list = ref [] in
for i=1 to num_ranking
do
maj_index_list := 0 :: !maj_index_list;
done;
(* *** merging ranking according to files read *** *)
let data = open_in name_training in
begin
try
while (true && !counter_ranking < num_pred)
do
begin
(* le fichier de training contient des champs inutiles, on s'arrête alors à num_ranking élts *)
(* on ajoute une division par le rapport de taille entre training set et test set *)
let raw_line = ref (standard_reader (input_line data)) in
for i=0 to num_ranking-1
do
let stop = List.hd !raw_line in
line := (iof (floor ((foi stop) *. size_ratio))) :: !line;
raw_line := List.tl !raw_line;
done;
line := List.rev !line;
if (!line <> !previous_line) (* if the ratio causes the prediction to be identical *)
then
begin
previous_line := !line;
index_list := !maj_index_list;
maj_index_list := [];
category := 0;
while (!line <> [] && !counter_ranking < num_pred)
do
begin
let stop = List.hd !line in
let index = ref (List.hd !index_list) in
let ranking = List.nth ranking_list !category in
while (!index < stop && !index < Array.length ranking) (* increase index and add links as long as no new link *)
do
begin
let node_1 = ranking.(!index).(0) in
let node_2 = ranking.(!index).(1) in
let score = ranking.(!index).(2) in
if (List.mem node_2 ranked_ael.(node_1) == false)
then
begin
sumscore_ranking_mixing := !sumscore_ranking_mixing + score;
let inter_1 = ranked_ael.(node_1) in
ranked_ael.(node_1) <- node_2 :: inter_1;
let inter_2 = ranked_ael.(node_2) in
ranked_ael.(node_2) <- node_1 :: inter_2;
mixed_ranking.(!counter_ranking).(0) <- node_1;
mixed_ranking.(!counter_ranking).(1) <- node_2;
mixed_ranking.(!counter_ranking).(2) <- score;
counter_ranking := !counter_ranking +1;
end;
index := !index +1;
end
done;
line := List.tl !line;
index_list := List.tl !index_list;
category := !category +1;
maj_index_list := !index :: !maj_index_list;
end
done;
maj_index_list := List.rev !maj_index_list;
end
else
line := []
end
done
with | End_of_file -> close_in data;
end;
mixed_ranking;;
(* ***** execution ***** *)
let my_list_ranking = ranking_maker in_list_ranking in_num_nodes;;
let my_mixed_ranking = test_loop in_training my_list_ranking in_num_pred in_num_nodes in_num_ranking in_size_ratio in_external_key;;
let output_ranking_name = "./ranking_"^in_external_key^".txt" in
let output_ranking_channel = open_out output_ranking_name in
print_output_table_plus output_ranking_channel my_mixed_ranking in_num_pred;
close_out output_ranking_channel;;