// $Id$ /* ---------------------------------------------------------------- */ /* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */ /* Richard Zens */ /* ---------------------------------------------------------------- */ #ifndef moses_PrefixTree_h #define moses_PrefixTree_h #include #include #include #include "Util.h" #include "FilePtr.h" #include "File.h" namespace Moses { /** @todo How is this used in the pb binary phrase table? */ template class PrefixTreeSA { public: typedef T Key; typedef D Data; typedef PrefixTreeSA Self; typedef std::vector VT; typedef std::vector VP; typedef std::vector VD; VT keys; VP ptr; VD data; static Data def; public: PrefixTreeSA() {} ~PrefixTreeSA() { for(size_t i=0; i Data& insert(fwiter b,fwiter e) { typename VT::iterator i=std::lower_bound(keys.begin(),keys.end(),*b); typename VT::iterator kb=keys.begin(); size_t pos=std::distance(kb,i); if(i==keys.end() || *i!=*b) { keys.insert(i,*b); data.insert(data.begin()+pos,def); Self *self = NULL; ptr.insert(ptr.begin()+pos, self); } if(++b!=e) { if(!ptr[pos]) ptr[pos]=new Self; return ptr[pos]->insert(b,e); } else return data[pos]; } // insert container template Data& insert(const cont& c) { return insert(c.begin(),c.end()); } size_t size() const { return keys.size(); } const Key& getKey(size_t i) const { return keys[i]; } const Data& getData(size_t i) const { return data[i]; } const Self* getPtr(size_t i) const { return ptr[i]; } size_t findKey(const Key& k) const { typename VT::const_iterator i=std::lower_bound(keys.begin(),keys.end(),k); if(i==keys.end() || *i!=k) return keys.size(); return std::distance(keys.begin(),i); } // find sequence template const Data* findPtr(fwiter b,fwiter e) const { size_t pos=findKey(*b); if(pos==keys.size()) return 0; if(++b==e) return &data[pos]; if(ptr[pos]) return ptr[pos]->findPtr(b,e); else return 0; } // find container template const Data* findPtr(const cont& c) const { return findPtr(c.begin(),c.end()); } // find sequence template const Data& find(fwiter b,fwiter e) const { if(const Data* p=findPtr(b,e)) return *p; else return def; } // find container template const Data& find(const cont& c) const { return find(c.begin(),c.end()); } void shrink() { ShrinkToFit(keys); ShrinkToFit(ptr); ShrinkToFit(data); } }; template D PrefixTreeSA::def; ///////////////////////////////////////////////////////////////////////////// /** @todo How is this used in the pb binary phrase table? */ template class PrefixTreeF { public: typedef T Key; typedef D Data; private: typedef PrefixTreeF Self; public: typedef FilePtr Ptr; private: typedef std::vector VK; typedef std::vector VD; typedef std::vector VP; VK keys; VD data; VP ptr; static Data def; OFF_T startPos; FILE* f; public: PrefixTreeF(FILE* f_=0) : f(f_) { if(f) read(); } ~PrefixTreeF() { free(); } void read() { startPos=fTell(f); fReadVector(f,keys); fReadVector(f,data); ptr.clear(); ptr.resize(keys.size()); std::vector rawOffs(keys.size()); size_t bytes_read = fread(&rawOffs[0], sizeof(OFF_T), keys.size(), f); UTIL_THROW_IF2(bytes_read != keys.size(), "Read error at " << HERE); for(size_t i=0; ifree(); } void reserve(size_t s) { keys.reserve(s); data.reserve(s); ptr.reserve(s); } template void changeData(fwiter b,fwiter e,const Data& d) { typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b); if(i==keys.end() || *i!=*b) { TRACE_ERR("ERROR: key not found in changeData!\n"); return; } typename VK::const_iterator kb=keys.begin(); size_t pos=std::distance(kb,i); if(++b==e) { OFF_T p=startPos+keys.size()*sizeof(Key)+2*sizeof(unsigned)+pos*sizeof(Data); TRACE_ERR("elem found at pos "<changeData(b,e,d); else { TRACE_ERR("ERROR: seg not found!in changeData\n"); } } void create(const PrefixTreeSA& psa,const std::string& fname) { FILE* f=fOpen(fname.c_str(),"wb"); create(psa,f); fclose(f); } void create(const PrefixTreeSA& psa,FILE* f,int verbose=0) { setDefault(psa.getDefault()); typedef std::pair*,OFF_T> P; typedef std::deque

Queue; Queue queue; queue.push_back(P(&psa,fTell(f))); bool isFirst=1; size_t ns=1; while(queue.size()) { if(verbose && queue.size()>ns) { TRACE_ERR("stack size in PF create: "<& p=*pp.first; OFF_T pos=pp.second; queue.pop_back(); if(!isFirst) { OFF_T curr=fTell(f); fSeek(f,pos); fWrite(f,curr); fSeek(f,curr); } else isFirst=0; size_t s=0; s+=fWriteVector(f,p.keys); s+=fWriteVector(f,p.data); for(size_t i=0; i const Data* findPtr(fwiter b,fwiter e) const { typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b); if(i==keys.end() || *i!=*b) return 0; size_t pos=std::distance(keys.begin(),i); if(++b==e) return &data[pos]; if(ptr[pos]) return ptr[pos]->findPtr(b,e); else return 0; } // find container template const Data* findPtr(const cont& c) const { return findPtr(c.begin(),c.end()); } // find sequence template const Data& find(fwiter b,fwiter e) const { if(const Data* p=findPtr(b,e)) return *p; else return def; } //return (p?*p:def);} // find container template const Data& find(const cont& c) const { return find(c.begin(),c.end()); } static void setDefault(const Data& d) { def=d; } static const Data& getDefault() { return def; } void print(std::ostream& out,const std::string s="") const { out<print(out,s+" "); } }; template D PrefixTreeF::def; } #endif