Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Buck <cbuck@lantis.de>2013-04-12 22:43:53 +0400
committerChristian Buck <cbuck@lantis.de>2013-04-12 22:43:53 +0400
commitbb5d70fc7cdf864f1594e70260633242fb49af26 (patch)
treef7d31dff6852c1960a81cefec2e98a7374ede8bf /moses/Util.cpp
parent9c2c6c603b9f4a37d0e6a4004b11067dc8d58b7f (diff)
integrated xml passthrough handling (by Nicola Bertoldi)
Diffstat (limited to 'moses/Util.cpp')
-rw-r--r--moses/Util.cpp37
1 files changed, 37 insertions, 0 deletions
diff --git a/moses/Util.cpp b/moses/Util.cpp
index 98de1241e..d82774b07 100644
--- a/moses/Util.cpp
+++ b/moses/Util.cpp
@@ -166,6 +166,43 @@ std::map<std::string, std::string> ProcessAndStripSGML(std::string &line)
return meta;
}
+std::string PassthroughSGML(std::string &line, const std::string tagName, const std::string& lbrackStr, const std::string& rbrackStr)
+{
+ string lbrack = lbrackStr; // = "<";
+ string rbrack = rbrackStr; // = ">";
+
+ std::string meta = "";
+
+ std::string lline = ToLower(line);
+ size_t open = lline.find(lbrack+tagName);
+ //check whether the tag exists; if not return the empty string
+ if (open == std::string::npos) return meta;
+
+ size_t close = lline.find(rbrack, open);
+ //check whether the tag is closed with '/>'; if not return the empty string
+ if (close == std::string::npos)
+ {
+ TRACE_ERR("PassthroughSGML error: the <passthrough info/> tag does not end properly\n");
+ return meta;
+ }
+ // extract the tag
+ std::string tmp = line.substr(open, close - open + 1);
+ meta = line.substr(open, close - open + 1);
+
+ // strip the tag from the line
+ line = line.substr(0, open) + line.substr(close + 1, std::string::npos);
+
+ TRACE_ERR("The input contains a <passthrough info/> tag:" << meta << std::endl);
+
+ lline = ToLower(line);
+ open = lline.find(lbrack+tagName);
+ if (open != std::string::npos)
+ {
+ TRACE_ERR("PassthroughSGML error: there are two <passthrough> tags\n");
+ }
+ return meta;
+}
+
}