Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnoop Kunchukuttan <anoop.kunchukuttan@gmail.com>2020-10-05 20:08:15 +0300
committerAnoop Kunchukuttan <anoop.kunchukuttan@gmail.com>2020-10-05 20:08:15 +0300
commit0b0d5e84926983330d57120987a480e96867b943 (patch)
tree93c0d5059de342075435b9087e6c9082f266618a
parenta34623140a2d9a2793ab91337d8f3883382c2b20 (diff)
MSPT changes for parsing phrase table string
-rw-r--r--moses2/TranslationModel/MSPT/MSNode.h2
-rw-r--r--moses2/TranslationModel/MSPT/MSPT.cpp206
-rw-r--r--moses2/TranslationModel/MSPT/MSPT.h2
3 files changed, 109 insertions, 101 deletions
diff --git a/moses2/TranslationModel/MSPT/MSNode.h b/moses2/TranslationModel/MSPT/MSNode.h
index ad6d0842d..b02422aa5 100644
--- a/moses2/TranslationModel/MSPT/MSNode.h
+++ b/moses2/TranslationModel/MSPT/MSNode.h
@@ -67,7 +67,7 @@ public:
return m_targetPhrases;
}
- void SortAndPrune(size_t tableLimit, MemPool &pool, System &system) {
+ void SortAndPrune(size_t tableLimit, MemPool &pool, const System &system) {
BOOST_FOREACH(typename Children::value_type &val, m_children) {
Node &child = val.second;
child.SortAndPrune(tableLimit, pool, system);
diff --git a/moses2/TranslationModel/MSPT/MSPT.cpp b/moses2/TranslationModel/MSPT/MSPT.cpp
index 90feb3489..665d6dbea 100644
--- a/moses2/TranslationModel/MSPT/MSPT.cpp
+++ b/moses2/TranslationModel/MSPT/MSPT.cpp
@@ -53,106 +53,108 @@ MSPT::~MSPT()
delete m_rootSCFG;
}
-// void MSPT::CreatePTForInput(string phraseTableString)
-// {
-// FactorCollection &vocab = system.GetVocab();
-// MemPool &systemPool = system.GetSystemPool();
-// MemPool tmpSourcePool;
-
-// if (system.isPb) {
-// m_rootPb = new PBNODE();
-// } else {
-// m_rootSCFG = new SCFGNODE();
-// //cerr << "m_rootSCFG=" << m_rootSCFG << endl;
-// }
-
-// vector<string> toks;
-// size_t lineNum = 0;
-// istringstream strme(phraseTableString);
-// string line;
-// while (getline(strme, line)) {
-// if (++lineNum % 1000000 == 0) {
-// cerr << lineNum << " ";
-// }
-// toks.clear();
-// TokenizeMultiCharSeparator(toks, line, "|||");
-// UTIL_THROW_IF2(toks.size() < 3, "Wrong format");
-// //cerr << "line=" << line << endl;
-// //cerr << "system.isPb=" << system.isPb << endl;
-
-// if (system.isPb) {
-// PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
-// toks[0]);
-// //cerr << "created soure" << endl;
-// TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system,
-// toks[1]);
-// //cerr << "created target" << endl;
-// target->GetScores().CreateFromString(toks[2], *this, system, true);
-// //cerr << "created scores:" << *target << endl;
-
-// if (toks.size() >= 4) {
-// //cerr << "alignstr=" << toks[3] << endl;
-// target->SetAlignmentInfo(toks[3]);
-// }
-
-// // properties
-// if (toks.size() == 7) {
-// //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1);
-// //strcpy(target->properties, toks[6].c_str());
-// }
-
-// system.featureFunctions.EvaluateInIsolation(systemPool, system, *source,
-// *target);
-// //cerr << "EvaluateInIsolation:" << *target << endl;
-// m_rootPb->AddRule(m_input, *source, target);
-
-// //cerr << "target=" << target->Debug(system) << endl;
-// } else {
-// SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
-// toks[0]);
-// //cerr << "created source:" << *source << endl;
-// SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
-// system, toks[1]);
-
-// //cerr << "created target " << *target << " source=" << *source << endl;
-
-// target->GetScores().CreateFromString(toks[2], *this, system, true);
-// //cerr << "created scores:" << *target << endl;
-
-// //vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
-// //target->sortScore = (scores.size() >= 3) ? TransformScore(scores[2]) : 0;
-
-// target->SetAlignmentInfo(toks[3]);
-
-// // properties
-// if (toks.size() == 7) {
-// //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1);
-// //strcpy(target->properties, toks[6].c_str());
-// }
-
-// system.featureFunctions.EvaluateInIsolation(systemPool, system, *source,
-// *target);
-// //cerr << "EvaluateInIsolation:" << *target << endl;
-// m_rootSCFG->AddRule(m_input, *source, target);
-// }
-// }
-
-// if (system.isPb) {
-// m_rootPb->SortAndPrune(m_tableLimit, systemPool, system);
-// //cerr << "root=" << &m_rootPb << endl;
-// } else {
-// m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system);
-// //cerr << "root=" << &m_rootPb << endl;
-// }
-// /*
-// BOOST_FOREACH(const PtMem::Node<Word>::Children::value_type &valPair, m_rootPb.GetChildren()) {
-// const Word &word = valPair.first;
-// cerr << word << " ";
-// }
-// cerr << endl;
-// */
-
-// }
+void MSPT::CreatePTForInput(const System &system, string phraseTableString)
+{
+ cerr << "In CreatePTForInput" << endl << flush;
+
+ FactorCollection &vocab = system.GetVocab();
+ MemPool &systemPool = system.GetSystemPool();
+ MemPool tmpSourcePool;
+
+ if (system.isPb) {
+ m_rootPb = new PBNODE();
+ } else {
+ m_rootSCFG = new SCFGNODE();
+ //cerr << "m_rootSCFG=" << m_rootSCFG << endl;
+ }
+
+ vector<string> toks;
+ size_t lineNum = 0;
+ istringstream strme(phraseTableString);
+ string line;
+ while (getline(strme, line)) {
+ if (++lineNum % 1000000 == 0) {
+ cerr << lineNum << " ";
+ }
+ toks.clear();
+ TokenizeMultiCharSeparator(toks, line, "|||");
+ UTIL_THROW_IF2(toks.size() < 3, "Wrong format");
+ //cerr << "line=" << line << endl;
+ //cerr << "system.isPb=" << system.isPb << endl;
+
+ if (system.isPb) {
+ PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
+ toks[0]);
+ //cerr << "created soure" << endl;
+ TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system,
+ toks[1]);
+ //cerr << "created target" << endl;
+ target->GetScores().CreateFromString(toks[2], *this, system, true);
+ //cerr << "created scores:" << *target << endl;
+
+ if (toks.size() >= 4) {
+ //cerr << "alignstr=" << toks[3] << endl;
+ target->SetAlignmentInfo(toks[3]);
+ }
+
+ // properties
+ if (toks.size() == 7) {
+ //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1);
+ //strcpy(target->properties, toks[6].c_str());
+ }
+
+ system.featureFunctions.EvaluateInIsolation(systemPool, system, *source,
+ *target);
+ //cerr << "EvaluateInIsolation:" << *target << endl;
+ m_rootPb->AddRule(m_input, *source, target);
+
+ //cerr << "target=" << target->Debug(system) << endl;
+ } else {
+ SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system,
+ toks[0]);
+ //cerr << "created source:" << *source << endl;
+ SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this,
+ system, toks[1]);
+
+ //cerr << "created target " << *target << " source=" << *source << endl;
+
+ target->GetScores().CreateFromString(toks[2], *this, system, true);
+ //cerr << "created scores:" << *target << endl;
+
+ //vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
+ //target->sortScore = (scores.size() >= 3) ? TransformScore(scores[2]) : 0;
+
+ target->SetAlignmentInfo(toks[3]);
+
+ // properties
+ if (toks.size() == 7) {
+ //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1);
+ //strcpy(target->properties, toks[6].c_str());
+ }
+
+ system.featureFunctions.EvaluateInIsolation(systemPool, system, *source,
+ *target);
+ //cerr << "EvaluateInIsolation:" << *target << endl;
+ m_rootSCFG->AddRule(m_input, *source, target);
+ }
+ }
+
+ if (system.isPb) {
+ m_rootPb->SortAndPrune(m_tableLimit, systemPool, system);
+ //cerr << "root=" << &m_rootPb << endl;
+ } else {
+ m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system);
+ //cerr << "root=" << &m_rootPb << endl;
+ }
+ /*
+ BOOST_FOREACH(const PtMem::Node<Word>::Children::value_type &valPair, m_rootPb.GetChildren()) {
+ const Word &word = valPair.first;
+ cerr << word << " ";
+ }
+ cerr << endl;
+ */
+
+}
void MSPT::InitializeForInput(const System &system, const InputType &input)
{
@@ -166,6 +168,10 @@ void MSPT::InitializeForInput(const System &system, const InputType &input)
cerr << "Casting done." << endl << flush;
cerr << "PhraseTableString member: " << inputObj.getPhraseTableString() << endl;
+ cerr << "Hardcoding sample PhraseTableString" << endl << flush;
+ string phraseTableString="a ||| x ||| 0.4 $$$ a ||| y ||| 0.6 $$$ b ||| y ||| 0.1 $$$ b ||| z ||| 0.9";
+ CreatePTForInput(system,phraseTableString);
+
}
TargetPhrases* MSPT::Lookup(const Manager &mgr, MemPool &pool,
diff --git a/moses2/TranslationModel/MSPT/MSPT.h b/moses2/TranslationModel/MSPT/MSPT.h
index b3ff99c91..744158ea0 100644
--- a/moses2/TranslationModel/MSPT/MSPT.h
+++ b/moses2/TranslationModel/MSPT/MSPT.h
@@ -80,6 +80,8 @@ protected:
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const;
+ void CreatePTForInput(const System &system, std::string phraseTableString);
+
};
}