Raw File
processPhraseTable.cpp
#include <iostream>
//#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <iterator>
#include <functional>
#include <sys/stat.h>
#include "moses/TypeDef.h"
#include "moses/ConfusionNet.h"
#include "moses/FactorCollection.h"
#include "moses/Phrase.h"
#include "moses/InputFileStream.h"
#include "moses/Timer.h"
#include "moses/TranslationModel/PhraseDictionaryTree.h"

using namespace std;
using namespace Moses;

Timer timer;

template<typename T>
std::ostream& operator<<(std::ostream& out,const std::vector<T>& x)
{
  out<<x.size()<<" ";
  typename std::vector<T>::const_iterator iend=x.end();
  for(typename std::vector<T>::const_iterator i=x.begin(); i!=iend; ++i)
    out<<*i<<' ';
  return out;
}

inline bool existsFile(const char* filename)
{
  struct stat mystat;
  return  (stat(filename,&mystat)==0);
}
inline bool existsFile(const std::string& filename)
{
  return existsFile(filename.c_str());
}

int main(int argc,char **argv)
{
  std::string fto;
  size_t noScoreComponent=5;
  int cn=0;
  bool aligninfo=true;
  std::vector<std::pair<std::string,std::pair<char*,char*> > > ftts;
  int verb=0;
  for(int i=1; i<argc; ++i) {
    std::string s(argv[i]);
    if(s=="-ttable") {
      std::pair<char*,char*> p;
      p.first=argv[++i];
      p.second=argv[++i];
      ftts.push_back(std::make_pair(std::string(argv[++i]),p));
    } else if(s=="-nscores") noScoreComponent=atoi(argv[++i]);
    else if(s=="-out") fto=std::string(argv[++i]);
    else if(s=="-cn") cn=1;
    else if(s=="-irst") cn=2;
    else if(s=="-no-alignment-info") aligninfo=false;
    else if(s=="-v") verb=atoi(argv[++i]);
    else if(s=="-h") {
      std::cerr<<"usage "<<argv[0]<<" :\n\n"
               "options:\n"
               "\t-ttable int int string   -- translation table file, use '-' for stdin\n"
               "\t-out string      -- output file name prefix for binary ttable\n"
               "\t-nscores int     -- number of scores in ttable\n"
               "\t-no-alignment-info  -- omit alignment info from the binary ttable \n"
               "\nfunctions:\n"
               "\t - convert ascii ttable in binary format\n"
               "\t - if ttable is not read from stdin:\n"
               "\t     treat each line as source phrase an print tgt candidates\n"
               "\n";
      return 1;
    } else {
      std::cerr<<"ERROR: unknown option '"<<s<<"'\n";
      return 1;
    }
  }


  if(ftts.size()) {

    if(ftts.size()==1) {
      std::cerr<<"processing ptree for ";
      PhraseDictionaryTree pdt;

      pdt.PrintWordAlignment(aligninfo);

      if (ftts[0].first=="-") {
        std::cerr<< "stdin\n";
        pdt.Create(std::cin,fto);
      } else {
        std::cerr<< ftts[0].first << "\n";
        InputFileStream in(ftts[0].first);
        pdt.Create(in,fto);
      }
    } else {
#if 0
      std::vector<PhraseDictionaryTree const*> pdicts;
      std::vector<FactorType> factorOrder;
      for(size_t i=0; i<ftts.size(); ++i) {

        PhraseDictionaryTree *pdtptr=new PhraseDictionaryTree(noScoreComponent,
            &factorCollection,
            getFactorType(atoi(ftts[i].second.first)),
            getFactorType(atoi(ftts[i].second.second))
                                                             );
        factorOrder.push_back(pdtptr->GetInputFactorType());
        PhraseDictionaryTree &pdt=*pdtptr;
        pdicts.push_back(pdtptr);

        std::string facStr="."+std::string(ftts[i].second.first)+"-"+std::string(ftts[i].second.second);
        std::string prefix=ftts[i].first+facStr;
        if(!existsFile(prefix+".binphr.idx")) {
          std::cerr<<"bin ttable does not exist -> create it\n";
          InputFileStream in(prefix);
          pdt.Create(in,prefix);
        }
        std::cerr<<"reading bin ttable\n";
        pdt.Read(prefix);

      }

      std::cerr<<"processing stdin\n";
      if(!cn) {
        std::string line;
        while(getline(std::cin,line)) {
          std::istringstream is(line);
#if 0
          std::vector<std::string> f;
          std::copy(std::istream_iterator<std::string>(is),
                    std::istream_iterator<std::string>(),
                    std::back_inserter(f));
#endif
          std::cerr<<"got source phrase '"<<line<<"'\n";

          Phrase F(Input);
          F.CreateFromString(factorOrder,line,factorCollection);

          for(size_t k=0; k<pdicts.size(); ++k) {
            PhraseDictionaryTree const& pdt=*pdicts[k];

            std::vector<std::string> f(F.GetSize());
            for(size_t i=0; i<F.GetSize(); ++i)
              f[i]=F.GetFactor(i,pdt.GetInputFactorType())->ToString();

            std::stringstream iostA,iostB;
            std::cerr<<"full phrase processing "<<f<<"\n";
            pdt.PrintTargetCandidates(f,iostA);

            std::cerr<<"processing with prefix ptr\n";
            PhraseDictionaryTree::PrefixPtr p(pdt.GetRoot());

            for(size_t i=0; i<f.size() && p; ++i) {
              std::cerr<<"pre "<<i<<" "<<(p?"1":"0")<<"\n";
              p=pdt.Extend(p,f[i]);
              std::cerr<<"post "<<i<<" "<<(p?"1":"0")<<"\n";
            }
            if(p) {
              std::cerr<<"retrieving candidates from prefix ptr\n";
              pdt.PrintTargetCandidates(p,iostB);
            } else {
              std::cerr<<"final ptr is invalid\n";
              iostB<<"there are 0 target candidates\n";
            }
            if(iostA.str() != iostB.str())
              std::cerr<<"ERROR: translation candidates mismatch '"<<iostA.str()<<"' and for prefix pointer: '"<<iostB.str()<<"'\n";

            std::cerr<<"translation candidates:\n"<<iostA.str()<<"\n";
            pdt.FreeMemory();

          }

        }
      } else {
        // process confusion net input
        ConfusionNet net(&factorCollection);
        std::vector<std::vector<float> > weights;
        for(size_t i=0; i<pdicts.size(); ++i)
          weights.push_back(std::vector<float>(noScoreComponent,1/(1.0*noScoreComponent)));

        while(net.ReadF(std::cin,factorOrder,cn-1)) {
          net.Print(std::cerr);
          GenerateCandidates(net,pdicts,weights,verb);
        }

      }
#else
      std::cerr<<"ERROR: these functions are currently broken...\n";
      exit(1);
#endif
    }
  }

}
back to top