diff options
author | Anoop Kunchukuttan <anoop.kunchukuttan@gmail.com> | 2020-10-05 20:08:15 +0300 |
---|---|---|
committer | Anoop Kunchukuttan <anoop.kunchukuttan@gmail.com> | 2020-10-05 20:08:15 +0300 |
commit | 0b0d5e84926983330d57120987a480e96867b943 (patch) | |
tree | 93c0d5059de342075435b9087e6c9082f266618a | |
parent | a34623140a2d9a2793ab91337d8f3883382c2b20 (diff) |
MSPT changes for parsing phrase table string
-rw-r--r-- | moses2/TranslationModel/MSPT/MSNode.h | 2 | ||||
-rw-r--r-- | moses2/TranslationModel/MSPT/MSPT.cpp | 206 | ||||
-rw-r--r-- | moses2/TranslationModel/MSPT/MSPT.h | 2 |
3 files changed, 109 insertions, 101 deletions
diff --git a/moses2/TranslationModel/MSPT/MSNode.h b/moses2/TranslationModel/MSPT/MSNode.h index ad6d0842d..b02422aa5 100644 --- a/moses2/TranslationModel/MSPT/MSNode.h +++ b/moses2/TranslationModel/MSPT/MSNode.h @@ -67,7 +67,7 @@ public: return m_targetPhrases; } - void SortAndPrune(size_t tableLimit, MemPool &pool, System &system) { + void SortAndPrune(size_t tableLimit, MemPool &pool, const System &system) { BOOST_FOREACH(typename Children::value_type &val, m_children) { Node &child = val.second; child.SortAndPrune(tableLimit, pool, system); diff --git a/moses2/TranslationModel/MSPT/MSPT.cpp b/moses2/TranslationModel/MSPT/MSPT.cpp index 90feb3489..665d6dbea 100644 --- a/moses2/TranslationModel/MSPT/MSPT.cpp +++ b/moses2/TranslationModel/MSPT/MSPT.cpp @@ -53,106 +53,108 @@ MSPT::~MSPT() delete m_rootSCFG; } -// void MSPT::CreatePTForInput(string phraseTableString) -// { -// FactorCollection &vocab = system.GetVocab(); -// MemPool &systemPool = system.GetSystemPool(); -// MemPool tmpSourcePool; - -// if (system.isPb) { -// m_rootPb = new PBNODE(); -// } else { -// m_rootSCFG = new SCFGNODE(); -// //cerr << "m_rootSCFG=" << m_rootSCFG << endl; -// } - -// vector<string> toks; -// size_t lineNum = 0; -// istringstream strme(phraseTableString); -// string line; -// while (getline(strme, line)) { -// if (++lineNum % 1000000 == 0) { -// cerr << lineNum << " "; -// } -// toks.clear(); -// TokenizeMultiCharSeparator(toks, line, "|||"); -// UTIL_THROW_IF2(toks.size() < 3, "Wrong format"); -// //cerr << "line=" << line << endl; -// //cerr << "system.isPb=" << system.isPb << endl; - -// if (system.isPb) { -// PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, -// toks[0]); -// //cerr << "created soure" << endl; -// TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system, -// toks[1]); -// //cerr << "created target" << endl; -// target->GetScores().CreateFromString(toks[2], *this, system, true); -// //cerr << "created scores:" << *target << endl; - -// if (toks.size() >= 4) { -// //cerr << "alignstr=" << toks[3] << endl; -// target->SetAlignmentInfo(toks[3]); -// } - -// // properties -// if (toks.size() == 7) { -// //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); -// //strcpy(target->properties, toks[6].c_str()); -// } - -// system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, -// *target); -// //cerr << "EvaluateInIsolation:" << *target << endl; -// m_rootPb->AddRule(m_input, *source, target); - -// //cerr << "target=" << target->Debug(system) << endl; -// } else { -// SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, -// toks[0]); -// //cerr << "created source:" << *source << endl; -// SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, -// system, toks[1]); - -// //cerr << "created target " << *target << " source=" << *source << endl; - -// target->GetScores().CreateFromString(toks[2], *this, system, true); -// //cerr << "created scores:" << *target << endl; - -// //vector<SCORE> scores = Tokenize<SCORE>(toks[2]); -// //target->sortScore = (scores.size() >= 3) ? TransformScore(scores[2]) : 0; - -// target->SetAlignmentInfo(toks[3]); - -// // properties -// if (toks.size() == 7) { -// //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); -// //strcpy(target->properties, toks[6].c_str()); -// } - -// system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, -// *target); -// //cerr << "EvaluateInIsolation:" << *target << endl; -// m_rootSCFG->AddRule(m_input, *source, target); -// } -// } - -// if (system.isPb) { -// m_rootPb->SortAndPrune(m_tableLimit, systemPool, system); -// //cerr << "root=" << &m_rootPb << endl; -// } else { -// m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system); -// //cerr << "root=" << &m_rootPb << endl; -// } -// /* -// BOOST_FOREACH(const PtMem::Node<Word>::Children::value_type &valPair, m_rootPb.GetChildren()) { -// const Word &word = valPair.first; -// cerr << word << " "; -// } -// cerr << endl; -// */ - -// } +void MSPT::CreatePTForInput(const System &system, string phraseTableString) +{ + cerr << "In CreatePTForInput" << endl << flush; + + FactorCollection &vocab = system.GetVocab(); + MemPool &systemPool = system.GetSystemPool(); + MemPool tmpSourcePool; + + if (system.isPb) { + m_rootPb = new PBNODE(); + } else { + m_rootSCFG = new SCFGNODE(); + //cerr << "m_rootSCFG=" << m_rootSCFG << endl; + } + + vector<string> toks; + size_t lineNum = 0; + istringstream strme(phraseTableString); + string line; + while (getline(strme, line)) { + if (++lineNum % 1000000 == 0) { + cerr << lineNum << " "; + } + toks.clear(); + TokenizeMultiCharSeparator(toks, line, "|||"); + UTIL_THROW_IF2(toks.size() < 3, "Wrong format"); + //cerr << "line=" << line << endl; + //cerr << "system.isPb=" << system.isPb << endl; + + if (system.isPb) { + PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, + toks[0]); + //cerr << "created soure" << endl; + TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system, + toks[1]); + //cerr << "created target" << endl; + target->GetScores().CreateFromString(toks[2], *this, system, true); + //cerr << "created scores:" << *target << endl; + + if (toks.size() >= 4) { + //cerr << "alignstr=" << toks[3] << endl; + target->SetAlignmentInfo(toks[3]); + } + + // properties + if (toks.size() == 7) { + //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); + //strcpy(target->properties, toks[6].c_str()); + } + + system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, + *target); + //cerr << "EvaluateInIsolation:" << *target << endl; + m_rootPb->AddRule(m_input, *source, target); + + //cerr << "target=" << target->Debug(system) << endl; + } else { + SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, + toks[0]); + //cerr << "created source:" << *source << endl; + SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, + system, toks[1]); + + //cerr << "created target " << *target << " source=" << *source << endl; + + target->GetScores().CreateFromString(toks[2], *this, system, true); + //cerr << "created scores:" << *target << endl; + + //vector<SCORE> scores = Tokenize<SCORE>(toks[2]); + //target->sortScore = (scores.size() >= 3) ? TransformScore(scores[2]) : 0; + + target->SetAlignmentInfo(toks[3]); + + // properties + if (toks.size() == 7) { + //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); + //strcpy(target->properties, toks[6].c_str()); + } + + system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, + *target); + //cerr << "EvaluateInIsolation:" << *target << endl; + m_rootSCFG->AddRule(m_input, *source, target); + } + } + + if (system.isPb) { + m_rootPb->SortAndPrune(m_tableLimit, systemPool, system); + //cerr << "root=" << &m_rootPb << endl; + } else { + m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system); + //cerr << "root=" << &m_rootPb << endl; + } + /* + BOOST_FOREACH(const PtMem::Node<Word>::Children::value_type &valPair, m_rootPb.GetChildren()) { + const Word &word = valPair.first; + cerr << word << " "; + } + cerr << endl; + */ + +} void MSPT::InitializeForInput(const System &system, const InputType &input) { @@ -166,6 +168,10 @@ void MSPT::InitializeForInput(const System &system, const InputType &input) cerr << "Casting done." << endl << flush; cerr << "PhraseTableString member: " << inputObj.getPhraseTableString() << endl; + cerr << "Hardcoding sample PhraseTableString" << endl << flush; + string phraseTableString="a ||| x ||| 0.4 $$$ a ||| y ||| 0.6 $$$ b ||| y ||| 0.1 $$$ b ||| z ||| 0.9"; + CreatePTForInput(system,phraseTableString); + } TargetPhrases* MSPT::Lookup(const Manager &mgr, MemPool &pool, diff --git a/moses2/TranslationModel/MSPT/MSPT.h b/moses2/TranslationModel/MSPT/MSPT.h index b3ff99c91..744158ea0 100644 --- a/moses2/TranslationModel/MSPT/MSPT.h +++ b/moses2/TranslationModel/MSPT/MSPT.h @@ -80,6 +80,8 @@ protected: const Moses2::Range &subPhraseRange, SCFG::InputPath &outPath) const; + void CreatePTForInput(const System &system, std::string phraseTableString); + }; } |