https://github.com/wilkeraziz/mosesdecoder
Raw File
Tip revision: c55161dff40ef01fea15d6c0a8ed30b8ce1c7d46 authored by jiejiang on 16 January 2014, 01:08:01 UTC
merge c11 revert with mingw changes
Tip revision: c55161d
PrefixTreeMap.cpp
#include "PrefixTreeMap.h"
#include "TypeDef.h"

#ifdef WITH_THREADS
#include <boost/thread.hpp>
#endif

namespace Moses
{
void GenericCandidate::readBin(FILE* f)
{
  m_PhraseList.clear();
  m_ScoreList.clear();
  UINT32 num_phrases;  // on older compilers, <stdint.h> may need to be included
  fRead(f, num_phrases);
  for(unsigned int i = 0; i < num_phrases; ++i) {
    IPhrase phrase;
    fReadVector(f, phrase);
    m_PhraseList.push_back(phrase);
  };
  UINT32 num_scores;
  fRead(f, num_scores);
  for(unsigned int j = 0; j < num_scores; ++j) {
    std::vector<float> score;
    fReadVector(f, score);
    m_ScoreList.push_back(score);
  };
};

void GenericCandidate::writeBin(FILE* f) const
{
  // cast is necessary to ensure compatibility between 32- and 64-bit platforms
  fWrite(f, static_cast<UINT32>(m_PhraseList.size()));
  for(size_t i = 0; i < m_PhraseList.size(); ++i) {
    fWriteVector(f, m_PhraseList[i]);
  }
  fWrite(f, static_cast<UINT32>(m_ScoreList.size()));
  for(size_t j = 0; j < m_ScoreList.size(); ++j) {
    fWriteVector(f, m_ScoreList[j]);
  }
};


void Candidates::writeBin(FILE* f) const
{
  UINT32 s = this->size();
  fWrite(f,s);
  for(size_t i = 0; i < s; ++i) {
    MyBase::operator[](i).writeBin(f);
  }
}

void Candidates::readBin(FILE* f)
{
  UINT32 s;
  fRead(f,s);
  this->resize(s);
  for(size_t i = 0; i<s; ++i) {
    MyBase::operator[](i).readBin(f);
  }
}

const LabelId PrefixTreeMap::MagicWord = std::numeric_limits<LabelId>::max() - 1;


void PrefixTreeMap::FreeMemory()
{
  for(Data::iterator i = m_Data.begin(); i != m_Data.end(); ++i) {
    i->free();
  }
  /*for(size_t i = 0; i < m_Voc.size(); ++i){
  delete m_Voc[i];
  m_Voc[i] = 0;
  }*/
  m_PtrPool.reset();
}

static WordVoc* ReadVoc(const std::string& filename)
{
  static std::map<std::string,WordVoc*> vocs;
#ifdef WITH_THREADS
  boost::mutex mutex;
  boost::mutex::scoped_lock lock(mutex);
#endif
  std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
  if (vi == vocs.end()) {
    WordVoc* voc = new WordVoc();
    voc->Read(filename);
    vocs[filename] = voc;
  }
  return vocs[filename];
}

int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs)
{
  std::string ifs(fileNameStem + ".srctree"),
      ift(fileNameStem + ".tgtdata"),
      ifi(fileNameStem + ".idx"),
      ifv(fileNameStem + ".voc");

  std::vector<OFF_T> srcOffsets;
  FILE *ii=fOpen(ifi.c_str(),"rb");
  fReadVector(ii,srcOffsets);
  fClose(ii);

  if (m_FileSrc) {
    fClose(m_FileSrc);
  }
  m_FileSrc = fOpen(ifs.c_str(),"rb");
  if (m_FileTgt) {
    fClose(m_FileTgt);
  }
  m_FileTgt = fOpen(ift.c_str(),"rb");

  m_Data.resize(srcOffsets.size());

  for(size_t i = 0; i < m_Data.size(); ++i) {
    m_Data[i] = CPT(m_FileSrc, srcOffsets[i]);
  }

  if(-1 == numVocs) {
    char num[5];
    numVocs = 0;
    sprintf(num, "%d", numVocs);
    while(FileExists(ifv + num)) {
      ++numVocs;
      sprintf(num, "%d", numVocs);
    }
  }
  char num[5];
  m_Voc.resize(numVocs);
  for(int i = 0; i < numVocs; ++i) {
    sprintf(num, "%d", i);
    //m_Voc[i] = new WordVoc();
    //m_Voc[i]->Read(ifv + num);
    m_Voc[i] = ReadVoc(ifv + num);
  }

  TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");
  return 1;
};


void PrefixTreeMap::GetCandidates(const IPhrase& key, Candidates* cands)
{
  //check if key is valid
  if(key.empty() || key[0] >= m_Data.size() || !m_Data[key[0]]) {
    return;
  }
  UTIL_THROW_IF2(m_Data[key[0]]->findKey(key[0]) >= m_Data[key[0]]->size(),
		  "Key not found: " << key[0]);

  OFF_T candOffset = m_Data[key[0]]->find(key);
  if(candOffset == InvalidOffT) {
    return;
  }
  fSeek(m_FileTgt,candOffset);
  cands->readBin(m_FileTgt);
}

void PrefixTreeMap::GetCandidates(const PPimp& p, Candidates* cands)
{
  UTIL_THROW_IF2(!p.isValid(), "Not a valid PPimp...");
  if(p.isRoot()) {
    return;
  };
  OFF_T candOffset = p.ptr()->getData(p.idx);
  if(candOffset == InvalidOffT) {
    return;
  }
  fSeek(m_FileTgt,candOffset);
  cands->readBin(m_FileTgt);
}

std::vector< std::string const * > PrefixTreeMap::ConvertPhrase(const IPhrase& p, unsigned int voc) const
{
  UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
		  "Invalid vocab id: " << voc);
  std::vector< std::string const * > result;
  result.reserve(p.size());
  for(IPhrase::const_iterator i = p.begin(); i != p.end(); ++i) {
    result.push_back(&(m_Voc[voc]->symbol(*i)));
  }
  return result;
}

IPhrase PrefixTreeMap::ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const
{
  UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
		  "Invalid vocab id: " << voc);
  IPhrase result;
  result.reserve(p.size());
  for(size_t i = 0; i < p.size(); ++i) {
    result.push_back(m_Voc[voc]->index(p[i]));
  }
  return result;
}

LabelId PrefixTreeMap::ConvertWord(const std::string& w, unsigned int voc) const
{
  UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
		  "Invalid vocab id: " << voc);
  return m_Voc[voc]->index(w);
}

std::string PrefixTreeMap::ConvertWord(LabelId w, unsigned int voc) const
{
  UTIL_THROW_IF2(voc >= m_Voc.size() || m_Voc[voc] == 0,
		  "Invalid vocab id: " << voc);
  if(w == PrefixTreeMap::MagicWord) {
    return "|||";
  } else if (w == InvalidLabelId) {
    return "<invalid>";
  } else {
    return m_Voc[voc]->symbol(w);
  }
}

PPimp* PrefixTreeMap::GetRoot()
{
  return m_PtrPool.get(PPimp(0,0,1));
}

PPimp* PrefixTreeMap::Extend(PPimp* p, LabelId wi)
{
  UTIL_THROW_IF2(!p->isValid(), "Not a valid PPimp...");

  if(wi == InvalidLabelId) {
    return 0; // unknown word, return invalid pointer

  } else if(p->isRoot()) {
    if(wi < m_Data.size() && m_Data[wi]) {
      const void* ptr = m_Data[wi]->findKeyPtr(wi);
      UTIL_THROW_IF2(ptr == NULL, "Null pointer");
      return m_PtrPool.get(PPimp(m_Data[wi],m_Data[wi]->findKey(wi),0));
    }
  } else if(PTF const* nextP = p->ptr()->getPtr(p->idx)) {
    return m_PtrPool.get(PPimp(nextP, nextP->findKey(wi),0));
  }
  return 0; // should never get here, return invalid pointer

}

}

back to top