From ad240a9f5b13c807e3e5dbe3a6d51f4e40e5ad53 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 28 Jun 2016 10:46:51 +0100 Subject: move Moses classes out of OnDiskPt - Word --- OnDiskPt/Word.cpp | 25 ---------------- OnDiskPt/Word.h | 11 ++++---- .../RuleTable/PhraseDictionaryOnDisk.cpp | 33 ++++++++++++++++++++-- .../RuleTable/PhraseDictionaryOnDisk.h | 5 ++++ 4 files changed, 40 insertions(+), 34 deletions(-) diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp index a68accb34..9ad6bea72 100644 --- a/OnDiskPt/Word.cpp +++ b/OnDiskPt/Word.cpp @@ -19,9 +19,7 @@ ***********************************************************************/ #include -#include "moses/FactorCollection.h" #include "moses/Util.h" -#include "moses/Word.h" #include "Word.h" #include "util/tokenize_piece.hh" @@ -98,29 +96,6 @@ size_t Word::ReadFromFile(std::fstream &file) return memAlloc; } -void Word::ConvertToMoses( - const std::vector &outputFactorsVec, - const Vocab &vocab, - Moses::Word &overwrite) const -{ - Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance(); - overwrite = Moses::Word(m_isNonTerminal); - - if (m_isNonTerminal) { - const std::string &tok = vocab.GetString(m_vocabId); - overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal)); - } else { - // TODO: this conversion should have been done at load time. - util::TokenIter tok(vocab.GetString(m_vocabId), '|'); - - for (std::vector::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) { - UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); - overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal)); - } - UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); - } -} - int Word::Compare(const Word &compare) const { int ret; diff --git a/OnDiskPt/Word.h b/OnDiskPt/Word.h index 39ebf336b..f5cbf39a8 100644 --- a/OnDiskPt/Word.h +++ b/OnDiskPt/Word.h @@ -67,14 +67,13 @@ public: size_t ReadFromMemory(const char *mem); size_t ReadFromFile(std::fstream &file); - void SetVocabId(uint32_t vocabId) { - m_vocabId = vocabId; + uint64_t GetVocabId() const { + return m_vocabId; } - void ConvertToMoses( - const std::vector &outputFactorsVec, - const Vocab &vocab, - Moses::Word &overwrite) const; + void SetVocabId(uint64_t vocabId) { + m_vocabId = vocabId; + } void DebugPrint(std::ostream &out, const Vocab &vocab) const; inline const std::string &GetString(const Vocab &vocab) const { diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp index 06c3bd262..04e401080 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.cpp @@ -30,6 +30,8 @@ #include "OnDiskPt/OnDiskWrapper.h" #include "OnDiskPt/Word.h" +#include "util/tokenize_piece.hh" + using namespace std; @@ -266,7 +268,8 @@ Moses::TargetPhrase *PhraseDictionaryOnDisk::ConvertToMoses(const OnDiskPt::Targ } for (size_t pos = 0; pos < phraseSize; ++pos) { - targetPhraseOnDisk.GetWord(pos).ConvertToMoses(outputFactors, vocab, ret->AddWord()); + const OnDiskPt::Word &wordOnDisk = targetPhraseOnDisk.GetWord(pos); + ConvertToMoses(wordOnDisk, outputFactors, vocab, ret->AddWord()); } // alignments @@ -292,14 +295,15 @@ Moses::TargetPhrase *PhraseDictionaryOnDisk::ConvertToMoses(const OnDiskPt::Targ if (isSyntax) { Moses::Word *lhsTarget = new Moses::Word(true); - targetPhraseOnDisk.GetWord(targetPhraseOnDisk.GetSize() - 1).ConvertToMoses(outputFactors, vocab, *lhsTarget); + const OnDiskPt::Word &lhsOnDisk = targetPhraseOnDisk.GetWord(targetPhraseOnDisk.GetSize() - 1); + ConvertToMoses(lhsOnDisk, outputFactors, vocab, *lhsTarget); ret->SetTargetLHS(lhsTarget); } // set source phrase Moses::Phrase mosesSP(Moses::Input); for (size_t pos = 0; pos < sp->GetSize(); ++pos) { - sp->GetWord(pos).ConvertToMoses(inputFactors, vocab, mosesSP.AddWord()); + ConvertToMoses(sp->GetWord(pos), inputFactors, vocab, mosesSP.AddWord()); } // scores @@ -314,7 +318,30 @@ Moses::TargetPhrase *PhraseDictionaryOnDisk::ConvertToMoses(const OnDiskPt::Targ ret->EvaluateInIsolation(mosesSP, phraseDict.GetFeaturesToApply()); return ret; +} + +void PhraseDictionaryOnDisk::ConvertToMoses( + const OnDiskPt::Word &wordOnDisk, + const std::vector &outputFactorsVec, + const OnDiskPt::Vocab &vocab, + Moses::Word &overwrite) const +{ + Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance(); + overwrite = Moses::Word(wordOnDisk.IsNonTerminal()); + if (wordOnDisk.IsNonTerminal()) { + const std::string &tok = vocab.GetString(wordOnDisk.GetVocabId()); + overwrite.SetFactor(0, factorColl.AddFactor(tok, wordOnDisk.IsNonTerminal())); + } else { + // TODO: this conversion should have been done at load time. + util::TokenIter tok(vocab.GetString(wordOnDisk.GetVocabId()), '|'); + + for (std::vector::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) { + UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(wordOnDisk.GetVocabId()) << "\"; was expecting " << outputFactorsVec.size()); + overwrite.SetFactor(*t, factorColl.AddFactor(*tok, wordOnDisk.IsNonTerminal())); + } + UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(wordOnDisk.GetVocabId()) << "\"; was expecting " << outputFactorsVec.size()); + } } void PhraseDictionaryOnDisk::SetParameter(const std::string& key, const std::string& value) diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h index f3c923b72..dc47936f6 100644 --- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h +++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h @@ -72,6 +72,11 @@ protected: , const std::vector &weightT , bool isSyntax) const; + void ConvertToMoses(const OnDiskPt::Word &wordOnDisk, + const std::vector &outputFactorsVec, + const OnDiskPt::Vocab &vocab, + Moses::Word &overwrite) const; + public: PhraseDictionaryOnDisk(const std::string &line); ~PhraseDictionaryOnDisk(); -- cgit v1.2.3