diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-09-26 15:52:11 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-09-26 15:52:11 +0400 |
commit | ab60d1ad6f93a78e80e665bc6c7d32b61b7c1c52 (patch) | |
tree | 275b97bc4bb9b749bdfa608c5e82ae0ee0b1498a /OnDiskPt | |
parent | a9c890e55e3b0fafd396e679b476f6c448489b6c (diff) |
Slightly less wasteful OnDiskPt word conversion
Ideally, OnDiskPt would convert its entire vocabulary to Word in advance.
Diffstat (limited to 'OnDiskPt')
-rw-r--r-- | OnDiskPt/TargetPhrase.cpp | 8 | ||||
-rw-r--r-- | OnDiskPt/Word.cpp | 29 | ||||
-rw-r--r-- | OnDiskPt/Word.h | 7 |
3 files changed, 21 insertions, 23 deletions
diff --git a/OnDiskPt/TargetPhrase.cpp b/OnDiskPt/TargetPhrase.cpp index 437e9029e..d923df8fd 100644 --- a/OnDiskPt/TargetPhrase.cpp +++ b/OnDiskPt/TargetPhrase.cpp @@ -207,9 +207,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto --phraseSize; for (size_t pos = 0; pos < phraseSize; ++pos) { - Moses::Word *mosesWord = GetWord(pos).ConvertToMoses(Moses::Output, outputFactors, vocab); - ret->AddWord(*mosesWord); - delete mosesWord; + GetWord(pos).ConvertToMoses(outputFactors, vocab, ret->AddWord()); } // scores @@ -223,9 +221,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto } ret->SetAlignmentInfo(alignmentInfo); - Moses::Word *lhs = GetWord(GetSize() - 1).ConvertToMoses(Moses::Output, outputFactors, vocab); - ret->SetTargetLHS(*lhs); - delete lhs; + GetWord(GetSize() - 1).ConvertToMoses(outputFactors, vocab, ret->MutableTargetLHS()); return ret; } diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp index 87d45818f..52e49d8d9 100644 --- a/OnDiskPt/Word.cpp +++ b/OnDiskPt/Word.cpp @@ -23,6 +23,9 @@ #include "../moses/src/Word.h" #include "Word.h" +#include "util/tokenize_piece.hh" +#include "util/exception.hh" + using namespace std; namespace OnDiskPt @@ -94,23 +97,21 @@ size_t Word::ReadFromFile(std::fstream &file) return memUsed; } -Moses::Word *Word::ConvertToMoses(Moses::FactorDirection direction - , const std::vector<Moses::FactorType> &outputFactorsVec - , const Vocab &vocab) const -{ +void Word::ConvertToMoses( + const std::vector<Moses::FactorType> &outputFactorsVec, + const Vocab &vocab, + Moses::Word &overwrite) const { Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance(); - Moses::Word *ret = new Moses::Word(m_isNonTerminal); - - const string &str = vocab.GetString(m_vocabId); - vector<string> toks = Moses::Tokenize(str, "|"); - for (size_t ind = 0; ind < toks.size(); ++ind) { - Moses::FactorType factorType = outputFactorsVec[ind]; - const Moses::Factor *factor = factorColl.AddFactor(direction, factorType, toks[ind]); - ret->SetFactor(factorType, factor); - } + overwrite = Moses::Word(m_isNonTerminal); - return ret; + // TODO: this conversion should have been done at load time. + util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|'); + for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) { + UTIL_THROW_IF(!tok, util::Exception, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); + overwrite.SetFactor(*t, factorColl.AddFactor(*tok)); + } + UTIL_THROW_IF(tok, util::Exception, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); } int Word::Compare(const Word &compare) const diff --git a/OnDiskPt/Word.h b/OnDiskPt/Word.h index ca917f952..497684ecf 100644 --- a/OnDiskPt/Word.h +++ b/OnDiskPt/Word.h @@ -70,9 +70,10 @@ public: m_vocabId = vocabId; } - Moses::Word *ConvertToMoses(Moses::FactorDirection direction - , const std::vector<Moses::FactorType> &outputFactorsVec - , const Vocab &vocab) const; + void ConvertToMoses( + const std::vector<Moses::FactorType> &outputFactorsVec, + const Vocab &vocab, + Moses::Word &overwrite) const; virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const; |