diff options
author | Anoop Kunchukuttan <anoop.kunchukuttan@gmail.com> | 2020-09-30 19:49:48 +0300 |
---|---|---|
committer | Anoop Kunchukuttan <anoop.kunchukuttan@gmail.com> | 2020-09-30 19:49:48 +0300 |
commit | a6c60a64125f61133a7efd51a3c789c7384bc821 (patch) | |
tree | bbf461f7e6559103d6fbce11bc780df2096b2156 | |
parent | c635efaf2385b2fe692501543fe6a8ab26c87ef9 (diff) |
add SentenceWithCandidates class
-rw-r--r-- | moses2/PhraseBased/Manager.cpp | 2 | ||||
-rw-r--r-- | moses2/PhraseBased/SentenceWithCandidates.cpp | 185 | ||||
-rw-r--r-- | moses2/PhraseBased/SentenceWithCandidates.h | 52 |
3 files changed, 238 insertions, 1 deletions
diff --git a/moses2/PhraseBased/Manager.cpp b/moses2/PhraseBased/Manager.cpp index 28073d4f6..670c33894 100644 --- a/moses2/PhraseBased/Manager.cpp +++ b/moses2/PhraseBased/Manager.cpp @@ -59,7 +59,7 @@ void Manager::Init() InitPools(); FactorCollection &vocab = system.GetVocab(); - m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr); + m_input = Moses2::SentenceWithCandidates::CreateFromString(GetPool(), vocab, system, m_inputStr); m_bitmaps = new Bitmaps(GetPool()); diff --git a/moses2/PhraseBased/SentenceWithCandidates.cpp b/moses2/PhraseBased/SentenceWithCandidates.cpp new file mode 100644 index 000000000..4f55c5578 --- /dev/null +++ b/moses2/PhraseBased/SentenceWithCandidates.cpp @@ -0,0 +1,185 @@ +/* + * SentenceWithCandidates.cpp + * + * Created on: 14 Dec 2015 + * Author: hieu + */ +#include <boost/property_tree/ptree.hpp> +#include <boost/property_tree/xml_parser.hpp> +#include <boost/algorithm/string.hpp> + +#include "SentenceWithCandidates.h" +#include "../System.h" +#include "../parameters/AllOptions.h" +#include "../legacy/Util2.h" + + +using namespace std; + +namespace Moses2 +{ + +SentenceWithCandidates *SentenceWithCandidates::CreateFromString(MemPool &pool, FactorCollection &vocab, + const System &system, const std::string &str) +{ + SentenceWithCandidates *ret; + + vector<string> result; + boost::split(result, str, boost::is_any_of("|||")); + + if (result.size()!=2){ + exit(1); + } + + const string partstr = result[0] + + if (system.options.input.xml_policy) { + // xml + ret = CreateFromStringXML(pool, vocab, system, partstr); + } else { + // no xml + //cerr << "PB SentenceWithCandidates" << endl; + std::vector<std::string> toks = Tokenize(partstr); + + size_t size = toks.size(); + ret = new (pool.Allocate<SentenceWithCandidates>()) SentenceWithCandidates(pool, size); + ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false); + } + + //cerr << "REORDERING CONSTRAINTS:" << ret->GetReorderingConstraint() << endl; + //cerr << "ret=" << ret->Debug(system) << endl; + + return ret; +} + +SentenceWithCandidates *SentenceWithCandidates::CreateFromStringXML(MemPool &pool, FactorCollection &vocab, + const System &system, const std::string &str) +{ + SentenceWithCandidates *ret; + + vector<XMLOption*> xmlOptions; + pugi::xml_document doc; + + string str2 = "<xml>" + str + "</xml>"; + pugi::xml_parse_result result = doc.load(str2.c_str(), + pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments); + pugi::xml_node topNode = doc.child("xml"); + + std::vector<std::string> toks; + XMLParse(pool, system, 0, topNode, toks, xmlOptions); + + // debug + /* + cerr << "xmloptions:" << endl; + for (size_t i = 0; i < xmlOptions.size(); ++i) { + cerr << xmlOptions[i]->Debug(system) << endl; + } + */ + + // create words + size_t size = toks.size(); + ret = new (pool.Allocate<SentenceWithCandidates>()) SentenceWithCandidates(pool, size); + ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false); + + // xml + ret->Init(system, size, system.options.reordering.max_distortion); + + ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint(); + + // set reordering walls, if "-monotone-at-punction" is set + if (system.options.reordering.monotone_at_punct && ret->GetSize()) { + reorderingConstraint.SetMonotoneAtPunctuation(*ret); + } + + // set walls obtained from xml + for(size_t i=0; i<xmlOptions.size(); i++) { + const XMLOption *xmlOption = xmlOptions[i]; + if(strcmp(xmlOption->GetNodeName(), "wall") == 0) { + if (xmlOption->startPos) { + UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the SentenceWithCandidates"); // no buggy walls, please + reorderingConstraint.SetWall(xmlOption->startPos - 1, true); + } + } else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) { + reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 ); + } else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) { + FactorType placeholderFactor = system.options.input.placeholder_factor; + UTIL_THROW_IF2(placeholderFactor == NOT_FOUND, + "Placeholder XML in input. Must have argument -placeholder-factor [NUM]"); + UTIL_THROW_IF2(xmlOption->phraseSize != 1, + "Placeholder must only cover 1 word"); + + const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false); + (*ret)[xmlOption->startPos][placeholderFactor] = factor; + } else { + // default - forced translation. Add to class variable + ret->AddXMLOption(system, xmlOption); + } + } + reorderingConstraint.FinalizeWalls(); + + return ret; +} + +void SentenceWithCandidates::XMLParse( + MemPool &pool, + const System &system, + size_t depth, + const pugi::xml_node &parentNode, + std::vector<std::string> &toks, + vector<XMLOption*> &xmlOptions) +{ + // pugixml + for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { + string nodeName = childNode.name(); + //cerr << depth << " nodeName=" << nodeName << endl; + + int startPos = toks.size(); + + string value = childNode.value(); + if (!value.empty()) { + //cerr << depth << "childNode text=" << value << endl; + std::vector<std::string> subPhraseToks = Tokenize(value); + for (size_t i = 0; i < subPhraseToks.size(); ++i) { + toks.push_back(subPhraseToks[i]); + } + } + + if (!nodeName.empty()) { + XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos); + + pugi::xml_attribute attr; + attr = childNode.attribute("translation"); + if (!attr.empty()) { + xmlOption->SetTranslation(pool, attr.as_string()); + } + + attr = childNode.attribute("entity"); + if (!attr.empty()) { + xmlOption->SetEntity(pool, attr.as_string()); + } + + attr = childNode.attribute("prob"); + if (!attr.empty()) { + xmlOption->prob = attr.as_float(); + } + + xmlOptions.push_back(xmlOption); + + // recursively call this function. For proper recursive trees + XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions); + + size_t endPos = toks.size(); + xmlOption->phraseSize = endPos - startPos; + + /* + cerr << "xmlOptions="; + xmlOption->Debug(cerr, system); + cerr << endl; + */ + } + + } +} + +} /* namespace Moses2 */ + diff --git a/moses2/PhraseBased/SentenceWithCandidates.h b/moses2/PhraseBased/SentenceWithCandidates.h new file mode 100644 index 000000000..6cfea1a22 --- /dev/null +++ b/moses2/PhraseBased/SentenceWithCandidates.h @@ -0,0 +1,52 @@ +/* + * SentenceWithCandidates.h + * + * Created on: 14 Dec 2015 + * Author: hieu + */ +#pragma once + +#include <boost/property_tree/ptree.hpp> +#include <string> +#include "PhraseImpl.h" +#include "../InputType.h" +#include "../MemPool.h" +#include "../pugixml.hpp" +#include "../legacy/Util2.h" + +namespace Moses2 +{ +class FactorCollection; +class System; + +class SentenceWithCandidates: public InputType, public PhraseImpl +{ +public: + + static SentenceWithCandidates *CreateFromString(MemPool &pool, FactorCollection &vocab, + const System &system, const std::string &str); + + SentenceWithCandidates(MemPool &pool, size_t size) + :InputType(pool) + ,PhraseImpl(pool, size) + {} + + virtual ~SentenceWithCandidates() + {} + +protected: + static SentenceWithCandidates *CreateFromStringXML(MemPool &pool, FactorCollection &vocab, + const System &system, const std::string &str); + + static void XMLParse( + MemPool &pool, + const System &system, + size_t depth, + const pugi::xml_node &parentNode, + std::vector<std::string> &toks, + std::vector<XMLOption*> &xmlOptions); + +}; + +} /* namespace Moses2 */ + |