diff options
author | sid jain <sija@microsoft.com> | 2020-10-05 20:07:27 +0300 |
---|---|---|
committer | sid jain <sija@microsoft.com> | 2020-10-05 20:07:27 +0300 |
commit | 55ea76b26897885b819d88348da6ea74ae1338ae (patch) | |
tree | 8845854a7e6a213b1cfe2c2839d493f10750a3d9 | |
parent | 125f33fa68f2d68b256c065a9e6013b6c16dad18 (diff) | |
parent | 6e78aae34fc58b7e98d463ac56d8485f66e0ec77 (diff) |
2Merge branch 'master' of https://machinetranslation.visualstudio.com/DefaultCollection/moses-mstranslator/_git/moses-mstranslator
-rw-r--r-- | moses2/FF/FeatureFunction.h | 4 | ||||
-rw-r--r-- | moses2/FF/FeatureFunctions.cpp | 4 | ||||
-rw-r--r-- | moses2/InputType.cpp | 8 | ||||
-rw-r--r-- | moses2/InputType.h | 2 | ||||
-rw-r--r-- | moses2/ManagerBase.cpp | 2 | ||||
-rw-r--r-- | moses2/PhraseBased/Manager.cpp | 2 | ||||
-rw-r--r-- | moses2/PhraseBased/SentenceWithCandidates.cpp | 21 | ||||
-rw-r--r-- | moses2/PhraseBased/SentenceWithCandidates.h | 11 | ||||
-rw-r--r-- | moses2/TranslationModel/MSPT/MSPT.cpp | 195 | ||||
-rw-r--r-- | moses2/TranslationModel/MSPT/MSPT.h | 2 |
10 files changed, 160 insertions, 91 deletions
diff --git a/moses2/FF/FeatureFunction.h b/moses2/FF/FeatureFunction.h index 34742efb1..828a712b9 100644 --- a/moses2/FF/FeatureFunction.h +++ b/moses2/FF/FeatureFunction.h @@ -96,10 +96,10 @@ public: const SCFG::TargetPhrases &tps, const Phrase<SCFG::Word> &sourcePhrase) const { } - virtual void InitializeForInput(const InputType &input) { }; + virtual void InitializeForInput(const System &system, const InputType &input) { }; // clean up temporary memory, called after processing each sentence - virtual void CleanUpAfterSentenceProcessing(const InputType &input) const { + virtual void CleanUpAfterSentenceProcessing(const System &system, const InputType &input) const { } protected: diff --git a/moses2/FF/FeatureFunctions.cpp b/moses2/FF/FeatureFunctions.cpp index efe018b7b..39e2436b6 100644 --- a/moses2/FF/FeatureFunctions.cpp +++ b/moses2/FF/FeatureFunctions.cpp @@ -232,14 +232,14 @@ void FeatureFunctions::EvaluateWhenAppliedBatch(const Batch &batch) const void FeatureFunctions::InitializeForInput(const InputType &input) { BOOST_FOREACH(FeatureFunction *ff, m_featureFunctions) { - ff->InitializeForInput(input); + ff->InitializeForInput(m_system, input); } } void FeatureFunctions::CleanUpAfterSentenceProcessing(const InputType &input) const { BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) { - ff->CleanUpAfterSentenceProcessing(input); + ff->CleanUpAfterSentenceProcessing(m_system, input); } } diff --git a/moses2/InputType.cpp b/moses2/InputType.cpp index 60664a85b..af5b61ff9 100644 --- a/moses2/InputType.cpp +++ b/moses2/InputType.cpp @@ -7,6 +7,9 @@ #include "InputType.h" #include "System.h" +#include <iostream> + +using namespace std; namespace Moses2 { @@ -89,4 +92,9 @@ bool InputType::XmlOverlap(size_t startPos, size_t endPos) const return false; } +std::string InputType::Debug(const System &system) const +{ + cerr << "InputType::Debug" << endl; +} + } /* namespace Moses2 */ diff --git a/moses2/InputType.h b/moses2/InputType.h index 8813bc484..b4f901ac6 100644 --- a/moses2/InputType.h +++ b/moses2/InputType.h @@ -73,6 +73,8 @@ public: //! Returns true if there were any XML tags parsed that at least partially covered the range passed bool XmlOverlap(size_t startPos, size_t endPos) const; + virtual std::string Debug(const System &system) const; + protected: ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */ Vector<const XMLOption*> m_xmlOptions; diff --git a/moses2/ManagerBase.cpp b/moses2/ManagerBase.cpp index 0ab60f9f1..41d3a0394 100644 --- a/moses2/ManagerBase.cpp +++ b/moses2/ManagerBase.cpp @@ -29,8 +29,8 @@ ManagerBase::ManagerBase(System &sys, const TranslationTask &task, ,m_pool(NULL) ,m_systemPool(NULL) ,m_hypoRecycle(NULL) + ,m_input(NULL) { - system.featureFunctions.InitializeForInput(*m_input); } ManagerBase::~ManagerBase() diff --git a/moses2/PhraseBased/Manager.cpp b/moses2/PhraseBased/Manager.cpp index bb3c130c5..cce30efb1 100644 --- a/moses2/PhraseBased/Manager.cpp +++ b/moses2/PhraseBased/Manager.cpp @@ -62,6 +62,8 @@ void Manager::Init() FactorCollection &vocab = system.GetVocab(); //TODO: need option to choose Sentence vs SentenceWithCandidates m_input = Moses2::SentenceWithCandidates::CreateFromString(GetPool(), vocab, system, m_inputStr); + //cerr << "Manager::Init: " << m_input->Debug(system) << endl << flush; + system.featureFunctions.InitializeForInput(*m_input); m_bitmaps = new Bitmaps(GetPool()); diff --git a/moses2/PhraseBased/SentenceWithCandidates.cpp b/moses2/PhraseBased/SentenceWithCandidates.cpp index c4115cbe8..6e4190a4e 100644 --- a/moses2/PhraseBased/SentenceWithCandidates.cpp +++ b/moses2/PhraseBased/SentenceWithCandidates.cpp @@ -70,11 +70,28 @@ SentenceWithCandidates *SentenceWithCandidates::CreateFromString(MemPool &pool, //// Parse the phrase table of the input ret->m_phraseTableString = replace_all_copy(input_parts[1],PT_LINE_DELIM,"\n"); - cerr << "Extracted Phrase Table String" << endl; - cerr << ret->m_phraseTableString << endl; + // ret->m_phraseTableString="constant phrase table"; +// cerr << "Extracted Phrase Table String: " << ret->m_phraseTableString << endl; + cerr << "Extracted Phrase Table String: " << ret->getPhraseTableString() << endl; return ret; } +SentenceWithCandidates::SentenceWithCandidates(MemPool &pool, size_t size) +:Sentence(pool, size) +{ + cerr << "SentenceWithCandidates::SentenceWithCandidates" << endl; +} + +SentenceWithCandidates::~SentenceWithCandidates() +{ + cerr << "SentenceWithCandidates::~SentenceWithCandidates" << endl; +} + +std::string SentenceWithCandidates::Debug(const System &system) const +{ + cerr << "SentenceWithCandidates::Debug" << endl; +} + } /* namespace Moses2 */ diff --git a/moses2/PhraseBased/SentenceWithCandidates.h b/moses2/PhraseBased/SentenceWithCandidates.h index 114ff9e67..fb550d577 100644 --- a/moses2/PhraseBased/SentenceWithCandidates.h +++ b/moses2/PhraseBased/SentenceWithCandidates.h @@ -29,12 +29,13 @@ public: static SentenceWithCandidates *CreateFromString(MemPool &pool, FactorCollection &vocab, const System &system, const std::string &str); - SentenceWithCandidates(MemPool &pool, size_t size) - :Sentence(pool, size) - {} + SentenceWithCandidates(MemPool &pool, size_t size); + virtual ~SentenceWithCandidates(); - virtual ~SentenceWithCandidates() - {} + virtual std::string Debug(const System &system) const; + std::string virtual getPhraseTableString() const{ + return m_phraseTableString; + } private: std::string m_phraseTableString; diff --git a/moses2/TranslationModel/MSPT/MSPT.cpp b/moses2/TranslationModel/MSPT/MSPT.cpp index b1f9b6595..ab16c3255 100644 --- a/moses2/TranslationModel/MSPT/MSPT.cpp +++ b/moses2/TranslationModel/MSPT/MSPT.cpp @@ -6,6 +6,7 @@ */ #include <cassert> +#include <sstream> #include <boost/foreach.hpp> #include "MSPT.h" #include "../../PhraseBased/PhraseImpl.h" @@ -19,6 +20,7 @@ #include "../../PhraseBased/InputPath.h" #include "../../PhraseBased/TargetPhraseImpl.h" #include "../../PhraseBased/TargetPhrases.h" +#include "../../PhraseBased/SentenceWithCandidates.h" #include "../../SCFG/PhraseImpl.h" #include "../../SCFG/TargetPhraseImpl.h" @@ -27,6 +29,7 @@ #include "../../SCFG/Stacks.h" #include "../../SCFG/Manager.h" +#include "../../PhraseBased/SentenceWithCandidates.h" using namespace std; @@ -50,9 +53,119 @@ MSPT::~MSPT() delete m_rootSCFG; } -void MSPT::InitializeForInput(const InputType &input) +// void MSPT::CreatePTForInput(string phraseTableString) +// { +// FactorCollection &vocab = system.GetVocab(); +// MemPool &systemPool = system.GetSystemPool(); +// MemPool tmpSourcePool; + +// if (system.isPb) { +// m_rootPb = new PBNODE(); +// } else { +// m_rootSCFG = new SCFGNODE(); +// //cerr << "m_rootSCFG=" << m_rootSCFG << endl; +// } + +// vector<string> toks; +// size_t lineNum = 0; +// istringstream strme(phraseTableString); +// string line; +// while (getline(strme, line)) { +// if (++lineNum % 1000000 == 0) { +// cerr << lineNum << " "; +// } +// toks.clear(); +// TokenizeMultiCharSeparator(toks, line, "|||"); +// UTIL_THROW_IF2(toks.size() < 3, "Wrong format"); +// //cerr << "line=" << line << endl; +// //cerr << "system.isPb=" << system.isPb << endl; + +// if (system.isPb) { +// PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, +// toks[0]); +// //cerr << "created soure" << endl; +// TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system, +// toks[1]); +// //cerr << "created target" << endl; +// target->GetScores().CreateFromString(toks[2], *this, system, true); +// //cerr << "created scores:" << *target << endl; + +// if (toks.size() >= 4) { +// //cerr << "alignstr=" << toks[3] << endl; +// target->SetAlignmentInfo(toks[3]); +// } + +// // properties +// if (toks.size() == 7) { +// //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); +// //strcpy(target->properties, toks[6].c_str()); +// } + +// system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, +// *target); +// //cerr << "EvaluateInIsolation:" << *target << endl; +// m_rootPb->AddRule(m_input, *source, target); + +// //cerr << "target=" << target->Debug(system) << endl; +// } else { +// SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, +// toks[0]); +// //cerr << "created source:" << *source << endl; +// SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, +// system, toks[1]); + +// //cerr << "created target " << *target << " source=" << *source << endl; + +// target->GetScores().CreateFromString(toks[2], *this, system, true); +// //cerr << "created scores:" << *target << endl; + +// //vector<SCORE> scores = Tokenize<SCORE>(toks[2]); +// //target->sortScore = (scores.size() >= 3) ? TransformScore(scores[2]) : 0; + +// target->SetAlignmentInfo(toks[3]); + +// // properties +// if (toks.size() == 7) { +// //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); +// //strcpy(target->properties, toks[6].c_str()); +// } + +// system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, +// *target); +// //cerr << "EvaluateInIsolation:" << *target << endl; +// m_rootSCFG->AddRule(m_input, *source, target); +// } +// } + +// if (system.isPb) { +// m_rootPb->SortAndPrune(m_tableLimit, systemPool, system); +// //cerr << "root=" << &m_rootPb << endl; +// } else { +// m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system); +// //cerr << "root=" << &m_rootPb << endl; +// } +// /* +// BOOST_FOREACH(const PtMem::Node<Word>::Children::value_type &valPair, m_rootPb.GetChildren()) { +// const Word &word = valPair.first; +// cerr << word << " "; +// } +// cerr << endl; +// */ + +// } + +void MSPT::InitializeForInput(const System &system, const InputType &input) { cerr << "InitializeForInput MSPT" << endl; + cerr << input.Debug(system) << endl << flush; + cerr << "HH1" << endl << flush; + + // downcast to SentenceWithCandidates + //const SentenceWithCandidates &inputObj = static_cast<const SentenceWithCandidates&>(input); + const SentenceWithCandidates &inputObj = dynamic_cast<const SentenceWithCandidates&>(input); + cerr << "Casting done." << endl << flush; + cerr << "PhraseTableString member: " << inputObj.getPhraseTableString() << endl; + } TargetPhrases* MSPT::Lookup(const Manager &mgr, MemPool &pool, @@ -68,10 +181,7 @@ void MSPT::InitActiveChart( const SCFG::Manager &mgr, SCFG::InputPath &path) const { - size_t ptInd = GetPtInd(); - ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *m_rootSCFG); - path.AddActiveChartEntry(ptInd, chartEntry); - //cerr << "InitActiveChart=" << path << endl; + abort(); } void MSPT::Lookup(MemPool &pool, @@ -80,38 +190,7 @@ void MSPT::Lookup(MemPool &pool, const SCFG::Stacks &stacks, SCFG::InputPath &path) const { - if (path.range.GetNumWordsCovered() > maxChartSpan) { - return; - } - - size_t endPos = path.range.GetEndPos(); - - const SCFG::InputPath *prevPath = static_cast<const SCFG::InputPath*>(path.prefixPath); - UTIL_THROW_IF2(prevPath == NULL, "prefixPath == NULL"); - - // TERMINAL - const SCFG::Word &lastWord = path.subPhrase.Back(); - - const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1); - - //cerr << "BEFORE LookupGivenWord=" << *prevPath << endl; - LookupGivenWord(pool, mgr, *prevPath, lastWord, NULL, subPhrasePath.range, path); - //cerr << "AFTER LookupGivenWord=" << *prevPath << endl; - - // NON-TERMINAL - //const SCFG::InputPath *prefixPath = static_cast<const SCFG::InputPath*>(path.prefixPath); - while (prevPath) { - const Range &prevRange = prevPath->range; - //cerr << "prevRange=" << prevRange << endl; - - size_t startPos = prevRange.GetEndPos() + 1; - size_t ntSize = endPos - startPos + 1; - const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(startPos, ntSize); - - LookupNT(pool, mgr, subPhrasePath.range, *prevPath, stacks, path); - - prevPath = static_cast<const SCFG::InputPath*>(prevPath->prefixPath); - } + abort(); } void MSPT::LookupGivenNode( @@ -123,47 +202,7 @@ void MSPT::LookupGivenNode( const Moses2::Range &subPhraseRange, SCFG::InputPath &outPath) const { - const ActiveChartEntryMem &prevEntryCast = static_cast<const ActiveChartEntryMem&>(prevEntry); - - const SCFGNODE &prevNode = prevEntryCast.node; - UTIL_THROW_IF2(&prevNode == NULL, "node == NULL"); - - size_t ptInd = GetPtInd(); - const SCFGNODE *nextNode = prevNode.Find(m_input, wordSought); - - /* - if (outPath.range.GetStartPos() == 1 || outPath.range.GetStartPos() == 2) { - cerr << "range=" << outPath.range - << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system) - << " wordSought=" << wordSought.Debug(mgr.system) - << " nextNode=" << nextNode - << endl; - } - */ - if (nextNode) { - // new entries - ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *nextNode, prevEntry); - - chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this); - //cerr << "AFTER Add=" << symbolBind << endl; - - outPath.AddActiveChartEntry(ptInd, chartEntry); - - const SCFG::TargetPhrases *tps = nextNode->GetTargetPhrases(); - if (tps) { - // there are some rules - /* - cerr << "outPath=" << outPath.range - << " bind=" << chartEntry->GetSymbolBind().Debug(mgr.system) - << " pt=" << GetPtInd() - << " tps=" << tps->Debug(mgr.system) << endl; - */ - outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind()); - - } - - //cerr << "AFTER outPath=" << outPath << endl; - } + abort(); } } diff --git a/moses2/TranslationModel/MSPT/MSPT.h b/moses2/TranslationModel/MSPT/MSPT.h index 165565791..b3ff99c91 100644 --- a/moses2/TranslationModel/MSPT/MSPT.h +++ b/moses2/TranslationModel/MSPT/MSPT.h @@ -65,7 +65,7 @@ public: const SCFG::Stacks &stacks, SCFG::InputPath &path) const; - virtual void InitializeForInput(const InputType &input); + virtual void InitializeForInput(const System &system, const InputType &input); protected: PBNODE *m_rootPb; |