https://github.com/wilkeraziz/mosesdecoder
Raw File
Tip revision: 5d9d691ad6c322c95d6eb0fb2c762f5616b70eee authored by Hieu Hoang on 15 October 2015, 15:58:51 UTC
merge
Tip revision: 5d9d691
Vocabulary.h
// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $

#pragma once

#include <iostream>
#include <cstdlib>
#include <string>
#include <map>
#include <vector>

#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
                _IS.getline(_LINE, _SIZE, _DELIM); \
                if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
                if (_IS.gcount() == _SIZE-1) { \
                  std::cerr << "Line too long! Buffer overflow. Delete lines >=" \
                    << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
                       << std::endl; \
                  std::exit(1);  \
                } \
              }

typedef std::string WORD;
typedef unsigned int WORD_ID;

class Vocabulary
{
public:
  std::map<WORD, WORD_ID> lookup;
  std::vector< WORD > vocab;
  WORD_ID StoreIfNew( const WORD& );
  WORD_ID GetWordID( const WORD& ) const;
  std::vector<WORD_ID> Tokenize( const char[] );
  inline WORD &GetWord( WORD_ID id ) const {
    WORD &i = (WORD&) vocab[ id ];
    return i;
  }
  void Save(const std::string& fileName ) const;
  void Load(const std::string& fileName );
};
back to top