https://github.com/vsiivola/variKN
Revision fc6cd7a70463888ec0d3cc010b7a9ffcaf099193 authored by Sami Virpioja on 28 January 2023, 11:50:28 UTC, committed by Sami Virpioja on 28 January 2023, 11:53:05 UTC
1 parent 24385dc
Raw File
Tip revision: fc6cd7a70463888ec0d3cc010b7a9ffcaf099193 authored by Sami Virpioja on 28 January 2023, 11:50:28 UTC
test macos-12 wheels and test more python versions
Tip revision: fc6cd7a
add_zeroprob_grams.cc
// This program adds n-grams to the model so that it can be written as a
// full prefix tree ARPA model. This is required for compability with some
// other tools. Also, the binary format used requires this.
#include <memory>
#include "HashGram.hh"
#include "conf.hh"
#include "io.hh"

int main(int argc, char *argv[]) {
  conf::Config config;
  config(
      "Usage: add_zeroprob_grams arpain arpaout\nAdds grams for treegram.\n")(
      's', "smallvocab", "", "",
      "Vocabulary is less than 65000 entries. Saves some memory.");
  config.parse(argc, argv, 2);

  const bool smallvocab = config["smallvocab"].specified;

  io::Stream::verbose = true;
  io::Stream in(config.arguments[0], "r");
  io::Stream out(config.arguments[1], "w");

  std::unique_ptr<HashGram> hg(
      smallvocab ? std::unique_ptr<HashGram>(new HashGram_t<unsigned short>())
                 : std::unique_ptr<HashGram>(new HashGram_t<int>()));
  fprintf(stderr, "Reading\n");
  hg->read(in.file);
  in.close();

  fprintf(stderr, "Adding zpgs\n");
  hg->add_zeroprob_grams();

  fprintf(stderr, "Writing\n");
  hg->write(out.file);
  out.close();
}
back to top