Revision a6a40d62517655a8f72002ab395e1e63b6251886 authored by Marge Bot on 06 February 2024, 08:47:30 UTC, committed by Marge Bot on 06 February 2024, 08:47:30 UTC
Co-authored-by: Alain Mebsout <alain.mebsout@functori.com>

Approved-by: Sylvain R. <sylvain.ribstein@nomadic-labs.com>
Approved-by: Nic Volanschi <nic.volanschi@nomadic-labs.com>
Approved-by: Killian Delarue <killian.delarue@nomadic-labs.com>
Approved-by: Victor Allombert <victor.allombert@nomadic-labs.com>

See merge request https://gitlab.com/tezos/tezos/-/merge_requests/11829
2 parent s d9ecf96 + 87fd6cf
Raw File
csv.ml
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2020 Nomadic Labs. <contact@nomadic-labs.com>               *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

type csv = string list list

let all_equal (l : int list) =
  let rec loop l (elt : int) =
    match l with [] -> true | hd :: tl -> hd = elt && loop tl elt
  in
  match l with [] -> true | hd :: tl -> loop tl hd

module String_set = Set.Make (String)

let disjoint_headers (csv1 : csv) (csv2 : csv) =
  match (csv1, csv2) with
  | [], _ | _, [] -> true
  | header1 :: _, header2 :: _ ->
      let header1 = String_set.of_list header1 in
      let header2 = String_set.of_list header2 in
      String_set.disjoint header1 header2

let concat ?(check_disjoint_headers = true) (csv1 : csv) (csv2 : csv) : csv =
  (* Check that both CSVs have the same number of lines. *)
  if Compare.List_lengths.(csv1 <> csv2) then
    Stdlib.failwith "Csv.concat: CSVs have different length"
  else
    (* Check that each line has the same number of columns *)
    let lengths1 = List.map List.length csv1 in
    let lengths2 = List.map List.length csv2 in
    if not (all_equal lengths1) then
      let msg = "Csv.concat: first argument has uneven # of lines" in
      Stdlib.failwith msg
    else if not (all_equal lengths2) then
      let msg = "Csv.concat: second argument has uneven # of lines" in
      Stdlib.failwith msg
    else if check_disjoint_headers && not (disjoint_headers csv1 csv2) then
      let msg = "Csv.concat: headers are not disjoint" in
      Stdlib.failwith msg
    else
      (* see top if condition *)
      WithExceptions.List.map2
        ~loc:__LOC__
        (fun line1 line2 -> line1 @ line2)
        csv1
        csv2

let export ~filename ?(separator = ',') ?(linebreak = '\n') (data : csv) =
  Format.eprintf "Exporting to %s@." filename ;
  let sep_str = String.make 1 separator in
  Out_channel.with_open_text filename @@ fun outfile ->
  let fmtr = Format.formatter_of_out_channel outfile in
  List.iter
    (fun line ->
      match line with
      | [] -> ()
      | _ ->
          let s = String.concat sep_str line in
          Format.fprintf fmtr "%s%c@?" s linebreak)
    data

let read_lines name : string list =
  In_channel.with_open_text name @@ fun ic ->
  let rec loop acc =
    match In_channel.input_line ic with
    | Some s -> loop (s :: acc)
    | None -> List.rev acc
  in
  loop []

exception Empty_csv_file

let import ~filename ?(separator = ',') () : csv =
  Format.eprintf "Importing %s@." filename ;
  let lines = read_lines filename in
  let header, rows =
    match lines with
    | [] -> raise Empty_csv_file
    | header :: tail -> (header, tail)
  in
  let header = String.split_on_char separator header in
  let ncols = List.length header in
  let rows = List.map (String.split_on_char separator) rows in
  if not (List.for_all (fun l -> Compare.List_length_with.(l = ncols)) rows)
  then Stdlib.failwith "Csv.import: mismatch between header width and row width" ;
  header :: rows

let append_columns ~filename ?(separator = ',') ?(linebreak = '\n') (data : csv)
    =
  let file_data =
    try import ~filename ~separator ()
    with Sys_error _ | Empty_csv_file ->
      (* If the target file does not exist or is empty, we create a dummy
         CSV matrix with the expected dimensions. *)
      List.map (fun _ -> []) data
  in
  let csv_data = concat file_data data in
  export ~filename ~separator ~linebreak csv_data

let export_stdout ?(separator = ',') ?(linebreak = '\n') (data : csv) =
  Format.eprintf "Exporting to stdout@." ;
  let sep_str = String.make 1 separator in
  List.iter
    (fun line ->
      let s = String.concat sep_str line in
      Format.printf "%s%c" s linebreak)
    data ;
  flush stdout
back to top