Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2013-07-18 20:00:07 +0400
committerHieu Hoang <hieu@hoang.co.uk>2013-07-18 20:00:07 +0400
commitf81d7a446f1947fdd4f4ecc8874264555dfb66f9 (patch)
tree5667d0f17ab13603ee0e09b847d6e63e5cfe67c8 /moses/XmlOption.cpp
parent415c5ab6938536b34ea4bfca8f4a2d91a75bc54e (diff)
starting placeholder
Diffstat (limited to 'moses/XmlOption.cpp')
-rw-r--r--moses/XmlOption.cpp19
1 files changed, 16 insertions, 3 deletions
diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp
index 4b703b247..069166e1f 100644
--- a/moses/XmlOption.cpp
+++ b/moses/XmlOption.cpp
@@ -150,10 +150,13 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons
* \param rbrackStr xml tag's right bracket string, typically ">"
*/
bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingConstraint &reorderingConstraint, vector< size_t > &walls,
+ std::vector< std::pair<size_t, std::string> > &placeholders,
const std::string& lbrackStr, const std::string& rbrackStr)
{
//parse XML markup in translation line
+ const StaticData &staticData = StaticData::Instance();
+
// no xml tag? we're done.
//if (line.find_first_of('<') == string::npos) {
if (line.find(lbrackStr) == string::npos) {
@@ -172,8 +175,8 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
string cleanLine; // return string (text without xml)
size_t wordPos = 0; // position in sentence (in terms of number of words)
- const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
- const string &factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+ const vector<FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
+ const string &factorDelimiter = staticData.GetFactorDelimiter();
// loop through the tokens
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) {
@@ -290,6 +293,16 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
reorderingConstraint.SetZone( startPos, endPos-1 );
}
+ // name-entity placeholder
+ else if (tagName == "ne") {
+ if (startPos != (endPos - 1)) {
+ TRACE_ERR("ERROR: Placeholder must only span 1 word: " << line << endl);
+ return false;
+ }
+ string entity = ParseXmlTagAttribute(tagContent,"entity");
+ placeholders.push_back(std::pair<size_t, std::string>(startPos, entity));
+ }
+
// default: opening tag that specifies translation options
else {
if (startPos >= endPos) {
@@ -329,7 +342,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
}
// store translation options into members
- if (StaticData::Instance().GetXmlInputType() != XmlIgnore) {
+ if (staticData.GetXmlInputType() != XmlIgnore) {
// only store options if we aren't ignoring them
for (size_t i=0; i<altTexts.size(); ++i) {
Phrase sourcePhrase; // TODO don't know what the source phrase is