diff options
author | Anoop Kunchukuttan <anoop.kunchukuttan@gmail.com> | 2020-10-01 06:45:07 +0300 |
---|---|---|
committer | Anoop Kunchukuttan <anoop.kunchukuttan@gmail.com> | 2020-10-01 06:45:07 +0300 |
commit | e6f5d6f7970c7dc1eb3a72249d2f96f63944a0e2 (patch) | |
tree | 598d95adc1e3ef2403f4e7395b7f78ca5319d205 | |
parent | d97c54b7290b7aced558f3aae569e6c6dce7ce1f (diff) | |
parent | d74ef345918881919e57dca447d15c9fca4e5082 (diff) |
Merge branch 'master' changes
-rw-r--r-- | moses2/Jamfile | 3 | ||||
-rw-r--r-- | moses2/PhraseBased/Manager.cpp | 1 | ||||
-rw-r--r-- | moses2/PhraseBased/SentenceWithCandidates.cpp | 65 | ||||
-rw-r--r-- | moses2/PhraseBased/SentenceWithCandidates.h | 15 | ||||
-rw-r--r-- | moses2/TranslationModel/MSPT/MSNode.h | 131 | ||||
-rw-r--r-- | moses2/TranslationModel/MSPT/MSPT.cpp | 265 | ||||
-rw-r--r-- | moses2/TranslationModel/MSPT/MSPT.h | 85 |
7 files changed, 489 insertions, 76 deletions
diff --git a/moses2/Jamfile b/moses2/Jamfile index 42676c065..e8a6457fd 100644 --- a/moses2/Jamfile +++ b/moses2/Jamfile @@ -108,7 +108,8 @@ alias deps : ..//z ..//boost_iostreams ..//boost_filesystem : : : $(max-factors PhraseBased/ReorderingConstraint.cpp PhraseBased/TargetPhrases.cpp PhraseBased/Search.cpp - PhraseBased/Sentence.cpp + PhraseBased/Sentence.cpp + PhraseBased/SentenceWithCandidates.cpp PhraseBased/TargetPhraseImpl.cpp PhraseBased/TrellisPath.cpp diff --git a/moses2/PhraseBased/Manager.cpp b/moses2/PhraseBased/Manager.cpp index 37c2ec669..bb3c130c5 100644 --- a/moses2/PhraseBased/Manager.cpp +++ b/moses2/PhraseBased/Manager.cpp @@ -13,6 +13,7 @@ #include "TargetPhraseImpl.h" #include "InputPath.h" #include "Sentence.h" +#include "SentenceWithCandidates.h" #include "Normal/Search.h" #include "CubePruningMiniStack/Search.h" diff --git a/moses2/PhraseBased/SentenceWithCandidates.cpp b/moses2/PhraseBased/SentenceWithCandidates.cpp index 484d94cda..f0da67a76 100644 --- a/moses2/PhraseBased/SentenceWithCandidates.cpp +++ b/moses2/PhraseBased/SentenceWithCandidates.cpp @@ -45,8 +45,8 @@ SentenceWithCandidates *SentenceWithCandidates::CreateFromString(MemPool &pool, exit(1); } - const string partstr = input_parts[0] - parseCandidates(input_parts[1]) + const string partstr = input_parts[0]; + // parseCandidates(input_parts[1]); if (system.options.input.xml_policy) { // xml @@ -135,66 +135,5 @@ SentenceWithCandidates *SentenceWithCandidates::CreateFromStringXML(MemPool &poo return ret; } -void SentenceWithCandidates::XMLParse( - MemPool &pool, - const System &system, - size_t depth, - const pugi::xml_node &parentNode, - std::vector<std::string> &toks, - vector<XMLOption*> &xmlOptions) -{ - // pugixml - for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { - string nodeName = childNode.name(); - //cerr << depth << " nodeName=" << nodeName << endl; - - int startPos = toks.size(); - - string value = childNode.value(); - if (!value.empty()) { - //cerr << depth << "childNode text=" << value << endl; - std::vector<std::string> subPhraseToks = Tokenize(value); - for (size_t i = 0; i < subPhraseToks.size(); ++i) { - toks.push_back(subPhraseToks[i]); - } - } - - if (!nodeName.empty()) { - XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos); - - pugi::xml_attribute attr; - attr = childNode.attribute("translation"); - if (!attr.empty()) { - xmlOption->SetTranslation(pool, attr.as_string()); - } - - attr = childNode.attribute("entity"); - if (!attr.empty()) { - xmlOption->SetEntity(pool, attr.as_string()); - } - - attr = childNode.attribute("prob"); - if (!attr.empty()) { - xmlOption->prob = attr.as_float(); - } - - xmlOptions.push_back(xmlOption); - - // recursively call this function. For proper recursive trees - XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions); - - size_t endPos = toks.size(); - xmlOption->phraseSize = endPos - startPos; - - /* - cerr << "xmlOptions="; - xmlOption->Debug(cerr, system); - cerr << endl; - */ - } - - } -} - } /* namespace Moses2 */ diff --git a/moses2/PhraseBased/SentenceWithCandidates.h b/moses2/PhraseBased/SentenceWithCandidates.h index 6cfea1a22..4dafca4af 100644 --- a/moses2/PhraseBased/SentenceWithCandidates.h +++ b/moses2/PhraseBased/SentenceWithCandidates.h @@ -9,7 +9,7 @@ #include <boost/property_tree/ptree.hpp> #include <string> #include "PhraseImpl.h" -#include "../InputType.h" +#include "Sentence.h" #include "../MemPool.h" #include "../pugixml.hpp" #include "../legacy/Util2.h" @@ -19,7 +19,7 @@ namespace Moses2 class FactorCollection; class System; -class SentenceWithCandidates: public InputType, public PhraseImpl +class SentenceWithCandidates: public Sentence { public: @@ -27,8 +27,7 @@ public: const System &system, const std::string &str); SentenceWithCandidates(MemPool &pool, size_t size) - :InputType(pool) - ,PhraseImpl(pool, size) + :Sentence(pool, size) {} virtual ~SentenceWithCandidates() @@ -38,14 +37,6 @@ protected: static SentenceWithCandidates *CreateFromStringXML(MemPool &pool, FactorCollection &vocab, const System &system, const std::string &str); - static void XMLParse( - MemPool &pool, - const System &system, - size_t depth, - const pugi::xml_node &parentNode, - std::vector<std::string> &toks, - std::vector<XMLOption*> &xmlOptions); - }; } /* namespace Moses2 */ diff --git a/moses2/TranslationModel/MSPT/MSNode.h b/moses2/TranslationModel/MSPT/MSNode.h new file mode 100644 index 000000000..ad6d0842d --- /dev/null +++ b/moses2/TranslationModel/MSPT/MSNode.h @@ -0,0 +1,131 @@ +/* + * Node.h + * + * Created on: 22 Apr 2016 + * Author: hieu + */ +#pragma once +#include <boost/unordered_map.hpp> +#include <boost/foreach.hpp> +#include "../../PhraseBased/TargetPhrases.h" +#include "../../System.h" +#include "../../Phrase.h" + +namespace Moses2 +{ +class System; + +namespace MSPTNS +{ + +template<class WORD, class SP, class TP, class TPS> +class Node +{ +public: + typedef boost::unordered_map<size_t, Node> Children; + + Node() + :m_targetPhrases(NULL) + ,m_unsortedTPS(NULL) + {} + + ~Node() + {} + + void AddRule(const std::vector<FactorType> &factors, SP &source, TP *target) { + AddRule(factors, source, target, 0); + } + + TPS *Find(const std::vector<FactorType> &factors, const SP &source, size_t pos = 0) const { + assert(source.GetSize()); + if (pos == source.GetSize()) { + return m_targetPhrases; + } else { + const WORD &word = source[pos]; + //cerr << "word=" << word << endl; + typename Children::const_iterator iter = m_children.find(word.hash(factors)); + if (iter == m_children.end()) { + return NULL; + } else { + const Node &child = iter->second; + return child.Find(factors, source, pos + 1); + } + } + } + + const Node *Find(const std::vector<FactorType> &factors, const WORD &word) const { + typename Children::const_iterator iter = m_children.find(word.hash(factors)); + if (iter == m_children.end()) { + return NULL; + } else { + const Node &child = iter->second; + return &child; + } + } + + const TPS *GetTargetPhrases() const { + return m_targetPhrases; + } + + void SortAndPrune(size_t tableLimit, MemPool &pool, System &system) { + BOOST_FOREACH(typename Children::value_type &val, m_children) { + Node &child = val.second; + child.SortAndPrune(tableLimit, pool, system); + } + + // prune target phrases in this node + if (m_unsortedTPS) { + m_targetPhrases = new (pool.Allocate<TPS>()) TPS(pool, m_unsortedTPS->size()); + + for (size_t i = 0; i < m_unsortedTPS->size(); ++i) { + TP *tp = (*m_unsortedTPS)[i]; + m_targetPhrases->AddTargetPhrase(*tp); + } + + m_targetPhrases->SortAndPrune(tableLimit); + system.featureFunctions.EvaluateAfterTablePruning(system.GetSystemPool(), *m_targetPhrases, *m_source); + + delete m_unsortedTPS; + } + } + + const Children &GetChildren() const { + return m_children; + } + + void Debug(std::ostream &out, const System &system) const { + BOOST_FOREACH(const typename Children::value_type &valPair, m_children) { + const WORD &word = valPair.first; + //std::cerr << word << "(" << word.hash() << ") "; + } + } +protected: + Children m_children; + TPS *m_targetPhrases; + Phrase<WORD> *m_source; + std::vector<TP*> *m_unsortedTPS; + + Node &AddRule(const std::vector<FactorType> &factors, SP &source, TP *target, size_t pos) { + if (pos == source.GetSize()) { + if (m_unsortedTPS == NULL) { + m_unsortedTPS = new std::vector<TP*>(); + m_source = &source; + } + + m_unsortedTPS->push_back(target); + return *this; + } else { + const WORD &word = source[pos]; + Node &child = m_children[word.hash(factors)]; + //std::cerr << "added " << word << " " << &child << " from " << this << std::endl; + + return child.AddRule(factors, source, target, pos + 1); + } + } + +}; + + +} +} // namespace + diff --git a/moses2/TranslationModel/MSPT/MSPT.cpp b/moses2/TranslationModel/MSPT/MSPT.cpp new file mode 100644 index 000000000..c905d5240 --- /dev/null +++ b/moses2/TranslationModel/MSPT/MSPT.cpp @@ -0,0 +1,265 @@ +/* + * MSPT.cpp + * + * Created on: 28 Oct 2015 + * Author: hieu + */ + +#include <cassert> +#include <boost/foreach.hpp> +#include "MSPT.h" +#include "../../PhraseBased/PhraseImpl.h" +#include "../../Phrase.h" +#include "../../System.h" +#include "../../Scores.h" +#include "../../InputPathsBase.h" +#include "../../legacy/InputFileStream.h" +#include "util/exception.hh" + +#include "../../PhraseBased/InputPath.h" +#include "../../PhraseBased/TargetPhraseImpl.h" +#include "../../PhraseBased/TargetPhrases.h" + +#include "../../SCFG/PhraseImpl.h" +#include "../../SCFG/TargetPhraseImpl.h" +#include "../../SCFG/InputPath.h" +#include "../../SCFG/Stack.h" +#include "../../SCFG/Stacks.h" +#include "../../SCFG/Manager.h" + + +using namespace std; + +namespace Moses2 +{ + + +//////////////////////////////////////////////////////////////////////// + +MSPT::MSPT(size_t startInd, const std::string &line) + :PhraseTable(startInd, line) + ,m_rootPb(NULL) + ,m_rootSCFG(NULL) +{ + ReadParameters(); +} + +MSPT::~MSPT() +{ + delete m_rootPb; + delete m_rootSCFG; +} + +void MSPT::Load(System &system) +{ + FactorCollection &vocab = system.GetVocab(); + MemPool &systemPool = system.GetSystemPool(); + MemPool tmpSourcePool; + + if (system.isPb) { + m_rootPb = new PBNODE(); + } else { + m_rootSCFG = new SCFGNODE(); + //cerr << "m_rootSCFG=" << m_rootSCFG << endl; + } + + vector<string> toks; + size_t lineNum = 0; + InputFileStream strme(m_path); + string line; + while (getline(strme, line)) { + if (++lineNum % 1000000 == 0) { + cerr << lineNum << " "; + } + toks.clear(); + TokenizeMultiCharSeparator(toks, line, "|||"); + UTIL_THROW_IF2(toks.size() < 3, "Wrong format"); + //cerr << "line=" << line << endl; + //cerr << "system.isPb=" << system.isPb << endl; + + if (system.isPb) { + PhraseImpl *source = PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, + toks[0]); + //cerr << "created soure" << endl; + TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(systemPool, *this, system, + toks[1]); + //cerr << "created target" << endl; + target->GetScores().CreateFromString(toks[2], *this, system, true); + //cerr << "created scores:" << *target << endl; + + if (toks.size() >= 4) { + //cerr << "alignstr=" << toks[3] << endl; + target->SetAlignmentInfo(toks[3]); + } + + // properties + if (toks.size() == 7) { + //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); + //strcpy(target->properties, toks[6].c_str()); + } + + system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, + *target); + //cerr << "EvaluateInIsolation:" << *target << endl; + m_rootPb->AddRule(m_input, *source, target); + + //cerr << "target=" << target->Debug(system) << endl; + } else { + SCFG::PhraseImpl *source = SCFG::PhraseImpl::CreateFromString(tmpSourcePool, vocab, system, + toks[0]); + //cerr << "created source:" << *source << endl; + SCFG::TargetPhraseImpl *target = SCFG::TargetPhraseImpl::CreateFromString(systemPool, *this, + system, toks[1]); + + //cerr << "created target " << *target << " source=" << *source << endl; + + target->GetScores().CreateFromString(toks[2], *this, system, true); + //cerr << "created scores:" << *target << endl; + + //vector<SCORE> scores = Tokenize<SCORE>(toks[2]); + //target->sortScore = (scores.size() >= 3) ? TransformScore(scores[2]) : 0; + + target->SetAlignmentInfo(toks[3]); + + // properties + if (toks.size() == 7) { + //target->properties = (char*) system.systemPool.Allocate(toks[6].size() + 1); + //strcpy(target->properties, toks[6].c_str()); + } + + system.featureFunctions.EvaluateInIsolation(systemPool, system, *source, + *target); + //cerr << "EvaluateInIsolation:" << *target << endl; + m_rootSCFG->AddRule(m_input, *source, target); + } + } + + if (system.isPb) { + m_rootPb->SortAndPrune(m_tableLimit, systemPool, system); + //cerr << "root=" << &m_rootPb << endl; + } else { + m_rootSCFG->SortAndPrune(m_tableLimit, systemPool, system); + //cerr << "root=" << &m_rootPb << endl; + } + /* + BOOST_FOREACH(const PtMem::Node<Word>::Children::value_type &valPair, m_rootPb.GetChildren()) { + const Word &word = valPair.first; + cerr << word << " "; + } + cerr << endl; + */ +} + +TargetPhrases* MSPT::Lookup(const Manager &mgr, MemPool &pool, + InputPath &inputPath) const +{ + const SubPhrase<Moses2::Word> &phrase = inputPath.subPhrase; + TargetPhrases *tps = m_rootPb->Find(m_input, phrase); + return tps; +} + +void MSPT::InitActiveChart( + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const +{ + size_t ptInd = GetPtInd(); + ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *m_rootSCFG); + path.AddActiveChartEntry(ptInd, chartEntry); + //cerr << "InitActiveChart=" << path << endl; +} + +void MSPT::Lookup(MemPool &pool, + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const +{ + if (path.range.GetNumWordsCovered() > maxChartSpan) { + return; + } + + size_t endPos = path.range.GetEndPos(); + + const SCFG::InputPath *prevPath = static_cast<const SCFG::InputPath*>(path.prefixPath); + UTIL_THROW_IF2(prevPath == NULL, "prefixPath == NULL"); + + // TERMINAL + const SCFG::Word &lastWord = path.subPhrase.Back(); + + const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1); + + //cerr << "BEFORE LookupGivenWord=" << *prevPath << endl; + LookupGivenWord(pool, mgr, *prevPath, lastWord, NULL, subPhrasePath.range, path); + //cerr << "AFTER LookupGivenWord=" << *prevPath << endl; + + // NON-TERMINAL + //const SCFG::InputPath *prefixPath = static_cast<const SCFG::InputPath*>(path.prefixPath); + while (prevPath) { + const Range &prevRange = prevPath->range; + //cerr << "prevRange=" << prevRange << endl; + + size_t startPos = prevRange.GetEndPos() + 1; + size_t ntSize = endPos - startPos + 1; + const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(startPos, ntSize); + + LookupNT(pool, mgr, subPhrasePath.range, *prevPath, stacks, path); + + prevPath = static_cast<const SCFG::InputPath*>(prevPath->prefixPath); + } +} + +void MSPT::LookupGivenNode( + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const +{ + const ActiveChartEntryMem &prevEntryCast = static_cast<const ActiveChartEntryMem&>(prevEntry); + + const SCFGNODE &prevNode = prevEntryCast.node; + UTIL_THROW_IF2(&prevNode == NULL, "node == NULL"); + + size_t ptInd = GetPtInd(); + const SCFGNODE *nextNode = prevNode.Find(m_input, wordSought); + + /* + if (outPath.range.GetStartPos() == 1 || outPath.range.GetStartPos() == 2) { + cerr << "range=" << outPath.range + << " prevEntry=" << prevEntry.GetSymbolBind().Debug(mgr.system) + << " wordSought=" << wordSought.Debug(mgr.system) + << " nextNode=" << nextNode + << endl; + } + */ + if (nextNode) { + // new entries + ActiveChartEntryMem *chartEntry = new (pool.Allocate<ActiveChartEntryMem>()) ActiveChartEntryMem(pool, *nextNode, prevEntry); + + chartEntry->AddSymbolBindElement(subPhraseRange, wordSought, hypos, *this); + //cerr << "AFTER Add=" << symbolBind << endl; + + outPath.AddActiveChartEntry(ptInd, chartEntry); + + const SCFG::TargetPhrases *tps = nextNode->GetTargetPhrases(); + if (tps) { + // there are some rules + /* + cerr << "outPath=" << outPath.range + << " bind=" << chartEntry->GetSymbolBind().Debug(mgr.system) + << " pt=" << GetPtInd() + << " tps=" << tps->Debug(mgr.system) << endl; + */ + outPath.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind()); + + } + + //cerr << "AFTER outPath=" << outPath << endl; + } +} + +} + diff --git a/moses2/TranslationModel/MSPT/MSPT.h b/moses2/TranslationModel/MSPT/MSPT.h new file mode 100644 index 000000000..d3946d353 --- /dev/null +++ b/moses2/TranslationModel/MSPT/MSPT.h @@ -0,0 +1,85 @@ +/* + * MSPT.h + * + * Created on: 28 Oct 2015 + * Author: hieu + */ +#pragma once + +#include "../PhraseTable.h" +#include "../../legacy/Util2.h" +#include "../../SCFG/InputPath.h" +#include "MSNode.h" +#include "../../PhraseBased/PhraseImpl.h" +#include "../../PhraseBased/TargetPhraseImpl.h" +#include "../../PhraseBased/TargetPhrases.h" +#include "../../SCFG/PhraseImpl.h" +#include "../../SCFG/TargetPhraseImpl.h" +#include "../../SCFG/TargetPhrases.h" + +namespace Moses2 +{ + +class MSPT: public PhraseTable +{ + typedef MSPTNS::Node<Word, Phrase<Word>, TargetPhraseImpl, TargetPhrases> PBNODE; + typedef MSPTNS::Node<SCFG::Word, Phrase<SCFG::Word>, SCFG::TargetPhraseImpl, SCFG::TargetPhrases> SCFGNODE; + +////////////////////////////////////// + class ActiveChartEntryMem : public SCFG::ActiveChartEntry + { + typedef SCFG::ActiveChartEntry Parent; + public: + const MSPT::SCFGNODE &node; + + ActiveChartEntryMem(MemPool &pool, const MSPT::SCFGNODE &vnode) + :Parent(pool) + ,node(vnode) + {} + + ActiveChartEntryMem( + MemPool &pool, + const MSPT::SCFGNODE &vnode, + const ActiveChartEntry &prevEntry) + :Parent(prevEntry) + ,node(vnode) + {} + }; + + ////////////////////////////////////// +public: + MSPT(size_t startInd, const std::string &line); + virtual ~MSPT(); + + virtual void Load(System &system); + virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool, + InputPath &inputPath) const; + + virtual void InitActiveChart( + MemPool &pool, + const SCFG::Manager &mgr, + SCFG::InputPath &path) const; + + void Lookup(MemPool &pool, + const SCFG::Manager &mgr, + size_t maxChartSpan, + const SCFG::Stacks &stacks, + SCFG::InputPath &path) const; + +protected: + PBNODE *m_rootPb; + SCFGNODE *m_rootSCFG; + + void LookupGivenNode( + MemPool &pool, + const SCFG::Manager &mgr, + const SCFG::ActiveChartEntry &prevEntry, + const SCFG::Word &wordSought, + const Moses2::Hypotheses *hypos, + const Moses2::Range &subPhraseRange, + SCFG::InputPath &outPath) const; + +}; + +} + |