Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnoop Kunchukuttan <anoop.kunchukuttan@gmail.com>2020-09-30 19:49:48 +0300
committerAnoop Kunchukuttan <anoop.kunchukuttan@gmail.com>2020-09-30 19:49:48 +0300
commita6c60a64125f61133a7efd51a3c789c7384bc821 (patch)
treebbf461f7e6559103d6fbce11bc780df2096b2156
parentc635efaf2385b2fe692501543fe6a8ab26c87ef9 (diff)
add SentenceWithCandidates class
-rw-r--r--moses2/PhraseBased/Manager.cpp2
-rw-r--r--moses2/PhraseBased/SentenceWithCandidates.cpp185
-rw-r--r--moses2/PhraseBased/SentenceWithCandidates.h52
3 files changed, 238 insertions, 1 deletions
diff --git a/moses2/PhraseBased/Manager.cpp b/moses2/PhraseBased/Manager.cpp
index 28073d4f6..670c33894 100644
--- a/moses2/PhraseBased/Manager.cpp
+++ b/moses2/PhraseBased/Manager.cpp
@@ -59,7 +59,7 @@ void Manager::Init()
InitPools();
FactorCollection &vocab = system.GetVocab();
- m_input = Moses2::Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr);
+ m_input = Moses2::SentenceWithCandidates::CreateFromString(GetPool(), vocab, system, m_inputStr);
m_bitmaps = new Bitmaps(GetPool());
diff --git a/moses2/PhraseBased/SentenceWithCandidates.cpp b/moses2/PhraseBased/SentenceWithCandidates.cpp
new file mode 100644
index 000000000..4f55c5578
--- /dev/null
+++ b/moses2/PhraseBased/SentenceWithCandidates.cpp
@@ -0,0 +1,185 @@
+/*
+ * SentenceWithCandidates.cpp
+ *
+ * Created on: 14 Dec 2015
+ * Author: hieu
+ */
+#include <boost/property_tree/ptree.hpp>
+#include <boost/property_tree/xml_parser.hpp>
+#include <boost/algorithm/string.hpp>
+
+#include "SentenceWithCandidates.h"
+#include "../System.h"
+#include "../parameters/AllOptions.h"
+#include "../legacy/Util2.h"
+
+
+using namespace std;
+
+namespace Moses2
+{
+
+SentenceWithCandidates *SentenceWithCandidates::CreateFromString(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str)
+{
+ SentenceWithCandidates *ret;
+
+ vector<string> result;
+ boost::split(result, str, boost::is_any_of("|||"));
+
+ if (result.size()!=2){
+ exit(1);
+ }
+
+ const string partstr = result[0]
+
+ if (system.options.input.xml_policy) {
+ // xml
+ ret = CreateFromStringXML(pool, vocab, system, partstr);
+ } else {
+ // no xml
+ //cerr << "PB SentenceWithCandidates" << endl;
+ std::vector<std::string> toks = Tokenize(partstr);
+
+ size_t size = toks.size();
+ ret = new (pool.Allocate<SentenceWithCandidates>()) SentenceWithCandidates(pool, size);
+ ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
+ }
+
+ //cerr << "REORDERING CONSTRAINTS:" << ret->GetReorderingConstraint() << endl;
+ //cerr << "ret=" << ret->Debug(system) << endl;
+
+ return ret;
+}
+
+SentenceWithCandidates *SentenceWithCandidates::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str)
+{
+ SentenceWithCandidates *ret;
+
+ vector<XMLOption*> xmlOptions;
+ pugi::xml_document doc;
+
+ string str2 = "<xml>" + str + "</xml>";
+ pugi::xml_parse_result result = doc.load(str2.c_str(),
+ pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
+ pugi::xml_node topNode = doc.child("xml");
+
+ std::vector<std::string> toks;
+ XMLParse(pool, system, 0, topNode, toks, xmlOptions);
+
+ // debug
+ /*
+ cerr << "xmloptions:" << endl;
+ for (size_t i = 0; i < xmlOptions.size(); ++i) {
+ cerr << xmlOptions[i]->Debug(system) << endl;
+ }
+ */
+
+ // create words
+ size_t size = toks.size();
+ ret = new (pool.Allocate<SentenceWithCandidates>()) SentenceWithCandidates(pool, size);
+ ret->PhraseImplTemplate<Word>::CreateFromString(vocab, system, toks, false);
+
+ // xml
+ ret->Init(system, size, system.options.reordering.max_distortion);
+
+ ReorderingConstraint &reorderingConstraint = ret->GetReorderingConstraint();
+
+ // set reordering walls, if "-monotone-at-punction" is set
+ if (system.options.reordering.monotone_at_punct && ret->GetSize()) {
+ reorderingConstraint.SetMonotoneAtPunctuation(*ret);
+ }
+
+ // set walls obtained from xml
+ for(size_t i=0; i<xmlOptions.size(); i++) {
+ const XMLOption *xmlOption = xmlOptions[i];
+ if(strcmp(xmlOption->GetNodeName(), "wall") == 0) {
+ if (xmlOption->startPos) {
+ UTIL_THROW_IF2(xmlOption->startPos > ret->GetSize(), "wall is beyond the SentenceWithCandidates"); // no buggy walls, please
+ reorderingConstraint.SetWall(xmlOption->startPos - 1, true);
+ }
+ } else if (strcmp(xmlOption->GetNodeName(), "zone") == 0) {
+ reorderingConstraint.SetZone( xmlOption->startPos, xmlOption->startPos + xmlOption->phraseSize -1 );
+ } else if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
+ FactorType placeholderFactor = system.options.input.placeholder_factor;
+ UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
+ "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
+ UTIL_THROW_IF2(xmlOption->phraseSize != 1,
+ "Placeholder must only cover 1 word");
+
+ const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
+ (*ret)[xmlOption->startPos][placeholderFactor] = factor;
+ } else {
+ // default - forced translation. Add to class variable
+ ret->AddXMLOption(system, xmlOption);
+ }
+ }
+ reorderingConstraint.FinalizeWalls();
+
+ return ret;
+}
+
+void SentenceWithCandidates::XMLParse(
+ MemPool &pool,
+ const System &system,
+ size_t depth,
+ const pugi::xml_node &parentNode,
+ std::vector<std::string> &toks,
+ vector<XMLOption*> &xmlOptions)
+{
+ // pugixml
+ for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
+ string nodeName = childNode.name();
+ //cerr << depth << " nodeName=" << nodeName << endl;
+
+ int startPos = toks.size();
+
+ string value = childNode.value();
+ if (!value.empty()) {
+ //cerr << depth << "childNode text=" << value << endl;
+ std::vector<std::string> subPhraseToks = Tokenize(value);
+ for (size_t i = 0; i < subPhraseToks.size(); ++i) {
+ toks.push_back(subPhraseToks[i]);
+ }
+ }
+
+ if (!nodeName.empty()) {
+ XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
+
+ pugi::xml_attribute attr;
+ attr = childNode.attribute("translation");
+ if (!attr.empty()) {
+ xmlOption->SetTranslation(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("entity");
+ if (!attr.empty()) {
+ xmlOption->SetEntity(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("prob");
+ if (!attr.empty()) {
+ xmlOption->prob = attr.as_float();
+ }
+
+ xmlOptions.push_back(xmlOption);
+
+ // recursively call this function. For proper recursive trees
+ XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
+
+ size_t endPos = toks.size();
+ xmlOption->phraseSize = endPos - startPos;
+
+ /*
+ cerr << "xmlOptions=";
+ xmlOption->Debug(cerr, system);
+ cerr << endl;
+ */
+ }
+
+ }
+}
+
+} /* namespace Moses2 */
+
diff --git a/moses2/PhraseBased/SentenceWithCandidates.h b/moses2/PhraseBased/SentenceWithCandidates.h
new file mode 100644
index 000000000..6cfea1a22
--- /dev/null
+++ b/moses2/PhraseBased/SentenceWithCandidates.h
@@ -0,0 +1,52 @@
+/*
+ * SentenceWithCandidates.h
+ *
+ * Created on: 14 Dec 2015
+ * Author: hieu
+ */
+#pragma once
+
+#include <boost/property_tree/ptree.hpp>
+#include <string>
+#include "PhraseImpl.h"
+#include "../InputType.h"
+#include "../MemPool.h"
+#include "../pugixml.hpp"
+#include "../legacy/Util2.h"
+
+namespace Moses2
+{
+class FactorCollection;
+class System;
+
+class SentenceWithCandidates: public InputType, public PhraseImpl
+{
+public:
+
+ static SentenceWithCandidates *CreateFromString(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str);
+
+ SentenceWithCandidates(MemPool &pool, size_t size)
+ :InputType(pool)
+ ,PhraseImpl(pool, size)
+ {}
+
+ virtual ~SentenceWithCandidates()
+ {}
+
+protected:
+ static SentenceWithCandidates *CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str);
+
+ static void XMLParse(
+ MemPool &pool,
+ const System &system,
+ size_t depth,
+ const pugi::xml_node &parentNode,
+ std::vector<std::string> &toks,
+ std::vector<XMLOption*> &xmlOptions);
+
+};
+
+} /* namespace Moses2 */
+