Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/moses2/SCFG/Sentence.cpp')
-rw-r--r--contrib/moses2/SCFG/Sentence.cpp155
1 files changed, 155 insertions, 0 deletions
diff --git a/contrib/moses2/SCFG/Sentence.cpp b/contrib/moses2/SCFG/Sentence.cpp
new file mode 100644
index 000000000..5e69a7e23
--- /dev/null
+++ b/contrib/moses2/SCFG/Sentence.cpp
@@ -0,0 +1,155 @@
+/*
+ * Sentence.cpp
+ *
+ * Created on: 14 Dec 2015
+ * Author: hieu
+ */
+
+#include "Sentence.h"
+#include "../System.h"
+
+using namespace std;
+
+namespace Moses2
+{
+namespace SCFG
+{
+Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str, long translationId)
+{
+ //cerr << "SCFG Sentence" << endl;
+
+ Sentence *ret;
+
+ if (system.options.input.xml_policy) {
+ // xml
+ ret = CreateFromStringXML(pool, vocab, system, str);
+ //cerr << "ret=" << ret->Debug(system) << endl;
+ }
+ else {
+ std::vector<std::string> toks = Tokenize(str);
+ size_t size = toks.size() + 2;
+
+ ret = new (pool.Allocate<SCFG::Sentence>()) Sentence(pool, size);
+ ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
+
+ }
+
+ return ret;
+}
+
+Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
+ const System &system, const std::string &str)
+{
+ Sentence *ret;
+
+ vector<XMLOption*> xmlOptions;
+ pugi::xml_document doc;
+
+ string str2 = "<xml>" + str + "</xml>";
+ pugi::xml_parse_result result = doc.load(str2.c_str(),
+ pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
+ pugi::xml_node topNode = doc.child("xml");
+
+ std::vector<std::string> toks;
+ XMLParse(pool, system, 0, topNode, toks, xmlOptions);
+
+ // debug
+ /*
+ cerr << "xmloptions:" << endl;
+ for (size_t i = 0; i < xmlOptions.size(); ++i) {
+ cerr << xmlOptions[i]->Debug(system) << endl;
+ }
+ */
+
+ // create words
+ size_t size = toks.size() + 2;
+ ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
+ ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
+
+ // xml
+ for(size_t i=0; i<xmlOptions.size(); i++) {
+ const XMLOption *xmlOption = xmlOptions[i];
+ if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
+ FactorType placeholderFactor = system.options.input.placeholder_factor;
+ UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
+ "Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
+ UTIL_THROW_IF2(xmlOption->phraseSize != 1,
+ "Placeholder must only cover 1 word");
+
+ const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
+ (*ret)[xmlOption->startPos + 1][placeholderFactor] = factor;
+ }
+ else {
+ // default - forced translation. Add to class variable
+ ret->AddXMLOption(system, xmlOption);
+ }
+ }
+
+ //cerr << "ret=" << ret->Debug(system) << endl;
+ return ret;
+}
+
+void Sentence::XMLParse(
+ MemPool &pool,
+ const System &system,
+ size_t depth,
+ const pugi::xml_node &parentNode,
+ std::vector<std::string> &toks,
+ vector<XMLOption*> &xmlOptions)
+{ // pugixml
+ for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
+ string nodeName = childNode.name();
+ //cerr << depth << " nodeName=" << nodeName << endl;
+
+ int startPos = toks.size();
+
+ string value = childNode.value();
+ if (!value.empty()) {
+ //cerr << depth << "childNode text=" << value << endl;
+ std::vector<std::string> subPhraseToks = Tokenize(value);
+ for (size_t i = 0; i < subPhraseToks.size(); ++i) {
+ toks.push_back(subPhraseToks[i]);
+ }
+ }
+
+ if (!nodeName.empty()) {
+ XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
+
+ pugi::xml_attribute attr;
+ attr = childNode.attribute("translation");
+ if (!attr.empty()) {
+ xmlOption->SetTranslation(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("entity");
+ if (!attr.empty()) {
+ xmlOption->SetEntity(pool, attr.as_string());
+ }
+
+ attr = childNode.attribute("prob");
+ if (!attr.empty()) {
+ xmlOption->prob = attr.as_float();
+ }
+
+ xmlOptions.push_back(xmlOption);
+
+ // recursively call this function. For proper recursive trees
+ XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
+
+ size_t endPos = toks.size();
+ xmlOption->phraseSize = endPos - startPos;
+
+ /*
+ cerr << "xmlOptions=";
+ xmlOption->Debug(cerr, system);
+ cerr << endl;
+ */
+ }
+
+ }
+}
+
+}
+} /* namespace Moses2 */
+