From 2f091ce8f7eed08095b59b5746b8a42c21200458 Mon Sep 17 00:00:00 2001 From: hieuhoang1972 Date: Sat, 9 Feb 2008 11:37:41 +0000 Subject: roll forwards james smith's changes. seg faults seen might have been caused by unknown compiler problem, or FC5/FC6 lib incompatibility on DICE machines. what a joke! git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1554 1f5c12ca-751b-0410-a591-d2e778427230 --- moses/src/Manager.cpp | 22 ++++++++++++++++++---- moses/src/Sentence.cpp | 27 ++++++++------------------ moses/src/Sentence.h | 3 ++- moses/src/TranslationOption.h | 14 ++++++++++++++ moses/src/XmlOption.cpp | 44 +++++++++++++++++++++++++++++++++++++++---- moses/src/XmlOption.h | 4 +++- 6 files changed, 85 insertions(+), 29 deletions(-) (limited to 'moses') diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp index 8b9ef579e..f077f5173 100755 --- a/moses/src/Manager.cpp +++ b/moses/src/Manager.cpp @@ -147,10 +147,10 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis) for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) { - size_t maxSize = sourceSize - startPos; - size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); - maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; - + size_t maxSize = sourceSize - startPos; + size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); + maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; + for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) { if (!hypoBitmap.Overlap(WordsRange(startPos, endPos))) @@ -296,6 +296,20 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp if (debug2) { std::cerr << "::EXT: " << transOpt << "\n"; } #endif Hypothesis *newHypo = hypothesis.CreateNext(transOpt); + // expand hypothesis further if transOpt was linked + for (std::vector::const_iterator iterLinked = transOpt.GetLinkedTransOpts().begin(); + iterLinked != transOpt.GetLinkedTransOpts().end(); iterLinked++) { + const WordsBitmap hypoBitmap = newHypo->GetWordsBitmap(); + if (hypoBitmap.Overlap((**iterLinked).GetSourceWordsRange())) { + // don't want to add a hypothesis that has some but not all of a linked TO set, so return + return; + } + else + { + newHypo->CalcScore(m_transOptColl->GetFutureScore()); + newHypo = newHypo->CreateNext(**iterLinked); + } + } newHypo->CalcScore(m_transOptColl->GetFutureScore()); // logging for the curious diff --git a/moses/src/Sentence.cpp b/moses/src/Sentence.cpp index a8a720ebc..5a2f7b2be 100755 --- a/moses/src/Sentence.cpp +++ b/moses/src/Sentence.cpp @@ -42,7 +42,7 @@ int Sentence::Read(std::istream& in,const std::vector& factorOrder) //parse XML markup in translation line const StaticData &staticData = StaticData::Instance(); if (staticData.GetXmlInputType() != XmlPassThrough) - m_xmlOptionsList = ProcessAndStripXMLTags(line); + m_xmlOptionsList = ProcessAndStripXMLTags(line, *this); Phrase::CreateFromString(factorOrder, line, factorDelimiter); //only fill the vector if we are parsing XML @@ -50,9 +50,10 @@ int Sentence::Read(std::istream& in,const std::vector& factorOrder) for (size_t i=0; i::const_iterator iterXMLOpts = m_xmlOptionsList.begin(); + iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) { //m_xmlOptionsList will be empty for XmlIgnore - for(size_t j=m_xmlOptionsList[i].startPos;j<=m_xmlOptionsList[i].endPos;j++) { + for(size_t j=(**iterXMLOpts).GetSourceWordsRange().GetStartPos();j<=(**iterXMLOpts).GetSourceWordsRange().GetEndPos();j++) { m_xmlCoverageMap[j]=true; } @@ -88,24 +89,12 @@ bool Sentence::XmlOverlap(size_t startPos, size_t endPos) const { void Sentence::GetXmlTranslationOptions(std::vector &list, size_t startPos, size_t endPos) const { //iterate over XmlOptions list, find exact source/target matches - //we don't worry about creating the objects ahead of time because this should only be called once for each unique start/end when a given sentence is processed const std::vector &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - for(size_t i=0;i::const_iterator iterXMLOpts = m_xmlOptionsList.begin(); + iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) { + if (startPos == (**iterXMLOpts).GetSourceWordsRange().GetStartPos() && endPos == (**iterXMLOpts).GetSourceWordsRange().GetEndPos()) { + list.push_back(*iterXMLOpts); } } } diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h index 8a05787ee..0f0b4aa7a 100755 --- a/moses/src/Sentence.h +++ b/moses/src/Sentence.h @@ -48,7 +48,7 @@ class Sentence : public Phrase, public InputType * and returns the value of that tag if present, empty string otherwise */ static std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName); - std::vector m_xmlOptionsList; + std::vector m_xmlOptionsList; std::vector m_xmlCoverageMap; public: @@ -90,3 +90,4 @@ class Sentence : public Phrase, public InputType TranslationOptionCollection* CreateTranslationOptionCollection() const; }; + diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h index 441cd403c..440d4461d 100755 --- a/moses/src/TranslationOption.h +++ b/moses/src/TranslationOption.h @@ -60,6 +60,7 @@ protected: Phrase *m_sourcePhrase; /*< input phrase translated by this */ const WordsRange m_sourceWordsRange; /*< word position in the input that are covered by this translation option */ float m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */ + std::vector m_linkedTransOpts; /* list of linked TOs which must be included with this in any hypothesis */ //! in TranslationOption, m_scoreBreakdown is not complete. It cannot, //! for example, know the full n-gram score since the length of the @@ -112,6 +113,18 @@ public: { return m_sourcePhrase; } + + /** returns linked TOs */ + inline const std::vector &GetLinkedTransOpts() const + { + return m_linkedTransOpts; + } + + /** add link to another TO */ + inline void AddLinkedTransOpt(TranslationOption* to) + { + m_linkedTransOpts.push_back(to); + } /** whether source span overlaps with those of a hypothesis */ bool Overlap(const Hypothesis &hypothesis) const; @@ -167,3 +180,4 @@ public: }; + diff --git a/moses/src/XmlOption.cpp b/moses/src/XmlOption.cpp index e71dfc97f..6c18aac12 100644 --- a/moses/src/XmlOption.cpp +++ b/moses/src/XmlOption.cpp @@ -26,6 +26,7 @@ #include #include "Util.h" #include "StaticData.h" +#include "TranslationOption.h" namespace { @@ -91,10 +92,11 @@ inline std::vector TokenizeXml(const std::string& str) } -std::vector ProcessAndStripXMLTags(std::string& line) { +std::vector ProcessAndStripXMLTags(std::string& line, const InputType &source) { //parse XML markup in translation line - std::vector res; + std::vector res; std::string rstr; + std::string linkedStr; if (line.find_first_of('<') == std::string::npos) { return res; } std::vector xmlTokens = TokenizeXml(line); std::string tagName = ""; @@ -106,6 +108,7 @@ std::vector ProcessAndStripXMLTags(std::string& line) { size_t curWord=0; int numUnary = 0; bool doClose = false; + bool isLinked = false; for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) { if(!isXmlTag(xmlTokens[xmlTokenPos])) @@ -127,7 +130,30 @@ std::vector ProcessAndStripXMLTags(std::string& line) { nextTagName = tag.substr(0,endOfName); tagContents = tag.substr(endOfName+1); } - if (isOpen) + if (nextTagName == "linked") { + isLinked = true; + linkedStr = ""; + } + else if (nextTagName == "/linked") { + isLinked = false; + // recurse to process linked tags + std::vector tOptions = ProcessAndStripXMLTags(linkedStr, source); + // link them together + std::vector::const_iterator iterTransOpts1; + std::vector::const_iterator iterTransOpts2; + for (iterTransOpts1 = tOptions.begin(); iterTransOpts1 != tOptions.end(); iterTransOpts1++) { + for (iterTransOpts2 = tOptions.begin(); iterTransOpts2 != tOptions.end(); iterTransOpts2++) { + if (iterTransOpts1 != iterTransOpts2) { + (**iterTransOpts1).AddLinkedTransOpt(*iterTransOpts2); + } + } + res.push_back(*iterTransOpts1); + } + } + else if (isLinked) { + linkedStr += xmlTokens[xmlTokenPos]; + } + else if (isOpen) { //this is an open tag tagName = nextTagName; @@ -182,6 +208,7 @@ std::vector ProcessAndStripXMLTags(std::string& line) { //TODO: deal with multiple XML options here if (StaticData::Instance().GetXmlInputType() != XmlIgnore) { + const std::vector &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); for (size_t i=0; i ProcessAndStripXMLTags(std::string& line) { if (altProbs[i] != "") probValue = Scan(altProbs[i]); //Convert from prob to log-prob float scoreValue = FloorScore(TransformScore(probValue)); - XmlOption option(tagStart,tagEnd,altTexts[i],scoreValue); + + TargetPhrase targetPhrase(Output); + targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],StaticData::Instance().GetFactorDelimiter()); + targetPhrase.SetScore(scoreValue); + WordsRange range(tagStart,tagEnd); + + TranslationOption *option = new TranslationOption(range,targetPhrase,source); + assert(option); + res.push_back(option); } } @@ -205,3 +240,4 @@ std::vector ProcessAndStripXMLTags(std::string& line) { return res; } + diff --git a/moses/src/XmlOption.h b/moses/src/XmlOption.h index e211a6398..dc9efe650 100644 --- a/moses/src/XmlOption.h +++ b/moses/src/XmlOption.h @@ -2,6 +2,7 @@ #include #include +#include "InputType.h" /** This struct is used for storing XML force translation data for a given range in the sentence */ @@ -18,5 +19,6 @@ struct XmlOption { }; -std::vector ProcessAndStripXMLTags(std::string& line); +std::vector ProcessAndStripXMLTags(std::string& line, const InputType &source); + -- cgit v1.2.3