From 2f091ce8f7eed08095b59b5746b8a42c21200458 Mon Sep 17 00:00:00 2001 From: hieuhoang1972 Date: Sat, 9 Feb 2008 11:37:41 +0000 Subject: roll forwards james smith's changes. seg faults seen might have been caused by unknown compiler problem, or FC5/FC6 lib incompatibility on DICE machines. what a joke! git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1554 1f5c12ca-751b-0410-a591-d2e778427230 --- moses/src/XmlOption.cpp | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) (limited to 'moses/src/XmlOption.cpp') diff --git a/moses/src/XmlOption.cpp b/moses/src/XmlOption.cpp index e71dfc97f..6c18aac12 100644 --- a/moses/src/XmlOption.cpp +++ b/moses/src/XmlOption.cpp @@ -26,6 +26,7 @@ #include #include "Util.h" #include "StaticData.h" +#include "TranslationOption.h" namespace { @@ -91,10 +92,11 @@ inline std::vector TokenizeXml(const std::string& str) } -std::vector ProcessAndStripXMLTags(std::string& line) { +std::vector ProcessAndStripXMLTags(std::string& line, const InputType &source) { //parse XML markup in translation line - std::vector res; + std::vector res; std::string rstr; + std::string linkedStr; if (line.find_first_of('<') == std::string::npos) { return res; } std::vector xmlTokens = TokenizeXml(line); std::string tagName = ""; @@ -106,6 +108,7 @@ std::vector ProcessAndStripXMLTags(std::string& line) { size_t curWord=0; int numUnary = 0; bool doClose = false; + bool isLinked = false; for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) { if(!isXmlTag(xmlTokens[xmlTokenPos])) @@ -127,7 +130,30 @@ std::vector ProcessAndStripXMLTags(std::string& line) { nextTagName = tag.substr(0,endOfName); tagContents = tag.substr(endOfName+1); } - if (isOpen) + if (nextTagName == "linked") { + isLinked = true; + linkedStr = ""; + } + else if (nextTagName == "/linked") { + isLinked = false; + // recurse to process linked tags + std::vector tOptions = ProcessAndStripXMLTags(linkedStr, source); + // link them together + std::vector::const_iterator iterTransOpts1; + std::vector::const_iterator iterTransOpts2; + for (iterTransOpts1 = tOptions.begin(); iterTransOpts1 != tOptions.end(); iterTransOpts1++) { + for (iterTransOpts2 = tOptions.begin(); iterTransOpts2 != tOptions.end(); iterTransOpts2++) { + if (iterTransOpts1 != iterTransOpts2) { + (**iterTransOpts1).AddLinkedTransOpt(*iterTransOpts2); + } + } + res.push_back(*iterTransOpts1); + } + } + else if (isLinked) { + linkedStr += xmlTokens[xmlTokenPos]; + } + else if (isOpen) { //this is an open tag tagName = nextTagName; @@ -182,6 +208,7 @@ std::vector ProcessAndStripXMLTags(std::string& line) { //TODO: deal with multiple XML options here if (StaticData::Instance().GetXmlInputType() != XmlIgnore) { + const std::vector &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); for (size_t i=0; i ProcessAndStripXMLTags(std::string& line) { if (altProbs[i] != "") probValue = Scan(altProbs[i]); //Convert from prob to log-prob float scoreValue = FloorScore(TransformScore(probValue)); - XmlOption option(tagStart,tagEnd,altTexts[i],scoreValue); + + TargetPhrase targetPhrase(Output); + targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],StaticData::Instance().GetFactorDelimiter()); + targetPhrase.SetScore(scoreValue); + WordsRange range(tagStart,tagEnd); + + TranslationOption *option = new TranslationOption(range,targetPhrase,source); + assert(option); + res.push_back(option); } } @@ -205,3 +240,4 @@ std::vector ProcessAndStripXMLTags(std::string& line) { return res; } + -- cgit v1.2.3