diff options
author | hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> | 2008-02-09 14:37:41 +0300 |
---|---|---|
committer | hieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230> | 2008-02-09 14:37:41 +0300 |
commit | 2f091ce8f7eed08095b59b5746b8a42c21200458 (patch) | |
tree | 6c0da57b2f163c0e2088c3b7ba8dc01e5883b64c /moses | |
parent | fd60fe93b9e1bb2f210fbea861d78d7a84b37624 (diff) |
roll forwards james smith's changes.
seg faults seen might have been caused by unknown compiler problem,
or FC5/FC6 lib incompatibility on DICE machines. what a joke!
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1554 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rwxr-xr-x | moses/src/Manager.cpp | 22 | ||||
-rwxr-xr-x | moses/src/Sentence.cpp | 27 | ||||
-rwxr-xr-x | moses/src/Sentence.h | 3 | ||||
-rwxr-xr-x | moses/src/TranslationOption.h | 14 | ||||
-rw-r--r-- | moses/src/XmlOption.cpp | 44 | ||||
-rw-r--r-- | moses/src/XmlOption.h | 4 |
6 files changed, 85 insertions, 29 deletions
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp index 8b9ef579e..f077f5173 100755 --- a/moses/src/Manager.cpp +++ b/moses/src/Manager.cpp @@ -147,10 +147,10 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis) for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) { - size_t maxSize = sourceSize - startPos; - size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); - maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; - + size_t maxSize = sourceSize - startPos; + size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); + maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase; + for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos) { if (!hypoBitmap.Overlap(WordsRange(startPos, endPos))) @@ -296,6 +296,20 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp if (debug2) { std::cerr << "::EXT: " << transOpt << "\n"; } #endif Hypothesis *newHypo = hypothesis.CreateNext(transOpt); + // expand hypothesis further if transOpt was linked + for (std::vector<TranslationOption*>::const_iterator iterLinked = transOpt.GetLinkedTransOpts().begin(); + iterLinked != transOpt.GetLinkedTransOpts().end(); iterLinked++) { + const WordsBitmap hypoBitmap = newHypo->GetWordsBitmap(); + if (hypoBitmap.Overlap((**iterLinked).GetSourceWordsRange())) { + // don't want to add a hypothesis that has some but not all of a linked TO set, so return + return; + } + else + { + newHypo->CalcScore(m_transOptColl->GetFutureScore()); + newHypo = newHypo->CreateNext(**iterLinked); + } + } newHypo->CalcScore(m_transOptColl->GetFutureScore()); // logging for the curious diff --git a/moses/src/Sentence.cpp b/moses/src/Sentence.cpp index a8a720ebc..5a2f7b2be 100755 --- a/moses/src/Sentence.cpp +++ b/moses/src/Sentence.cpp @@ -42,7 +42,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder) //parse XML markup in translation line const StaticData &staticData = StaticData::Instance(); if (staticData.GetXmlInputType() != XmlPassThrough) - m_xmlOptionsList = ProcessAndStripXMLTags(line); + m_xmlOptionsList = ProcessAndStripXMLTags(line, *this); Phrase::CreateFromString(factorOrder, line, factorDelimiter); //only fill the vector if we are parsing XML @@ -50,9 +50,10 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder) for (size_t i=0; i<GetSize();i++) { m_xmlCoverageMap.push_back(false); } - for (size_t i=0; i< m_xmlOptionsList.size();i++) { + for (std::vector<TranslationOption*>::const_iterator iterXMLOpts = m_xmlOptionsList.begin(); + iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) { //m_xmlOptionsList will be empty for XmlIgnore - for(size_t j=m_xmlOptionsList[i].startPos;j<=m_xmlOptionsList[i].endPos;j++) { + for(size_t j=(**iterXMLOpts).GetSourceWordsRange().GetStartPos();j<=(**iterXMLOpts).GetSourceWordsRange().GetEndPos();j++) { m_xmlCoverageMap[j]=true; } @@ -88,24 +89,12 @@ bool Sentence::XmlOverlap(size_t startPos, size_t endPos) const { void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list, size_t startPos, size_t endPos) const { //iterate over XmlOptions list, find exact source/target matches - //we don't worry about creating the objects ahead of time because this should only be called once for each unique start/end when a given sentence is processed const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - for(size_t i=0;i<m_xmlOptionsList.size();i++) { - if (startPos == m_xmlOptionsList[i].startPos && endPos == m_xmlOptionsList[i].endPos) { - //create TranslationOptions - - for (size_t j=0;j<m_xmlOptionsList[i].targetPhrases.size();j++) { - TargetPhrase targetPhrase(Output); - targetPhrase.CreateFromString(outputFactorOrder,m_xmlOptionsList[i].targetPhrases[j],StaticData::Instance().GetFactorDelimiter()); - targetPhrase.SetScore(m_xmlOptionsList[i].targetScores[j]); - WordsRange range(m_xmlOptionsList[i].startPos,m_xmlOptionsList[i].endPos); - - TranslationOption *option = new TranslationOption(range,targetPhrase,*this); - assert(option); - list.push_back(option); - - } + for (std::vector<TranslationOption*>::const_iterator iterXMLOpts = m_xmlOptionsList.begin(); + iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) { + if (startPos == (**iterXMLOpts).GetSourceWordsRange().GetStartPos() && endPos == (**iterXMLOpts).GetSourceWordsRange().GetEndPos()) { + list.push_back(*iterXMLOpts); } } } diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h index 8a05787ee..0f0b4aa7a 100755 --- a/moses/src/Sentence.h +++ b/moses/src/Sentence.h @@ -48,7 +48,7 @@ class Sentence : public Phrase, public InputType * and returns the value of that tag if present, empty string otherwise */ static std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName); - std::vector <XmlOption> m_xmlOptionsList; + std::vector <TranslationOption*> m_xmlOptionsList; std::vector <bool> m_xmlCoverageMap; public: @@ -90,3 +90,4 @@ class Sentence : public Phrase, public InputType TranslationOptionCollection* CreateTranslationOptionCollection() const; }; + diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h index 441cd403c..440d4461d 100755 --- a/moses/src/TranslationOption.h +++ b/moses/src/TranslationOption.h @@ -60,6 +60,7 @@ protected: Phrase *m_sourcePhrase; /*< input phrase translated by this */ const WordsRange m_sourceWordsRange; /*< word position in the input that are covered by this translation option */ float m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */ + std::vector<TranslationOption*> m_linkedTransOpts; /* list of linked TOs which must be included with this in any hypothesis */ //! in TranslationOption, m_scoreBreakdown is not complete. It cannot, //! for example, know the full n-gram score since the length of the @@ -112,6 +113,18 @@ public: { return m_sourcePhrase; } + + /** returns linked TOs */ + inline const std::vector<TranslationOption*> &GetLinkedTransOpts() const + { + return m_linkedTransOpts; + } + + /** add link to another TO */ + inline void AddLinkedTransOpt(TranslationOption* to) + { + m_linkedTransOpts.push_back(to); + } /** whether source span overlaps with those of a hypothesis */ bool Overlap(const Hypothesis &hypothesis) const; @@ -167,3 +180,4 @@ public: }; + diff --git a/moses/src/XmlOption.cpp b/moses/src/XmlOption.cpp index e71dfc97f..6c18aac12 100644 --- a/moses/src/XmlOption.cpp +++ b/moses/src/XmlOption.cpp @@ -26,6 +26,7 @@ #include <iostream> #include "Util.h" #include "StaticData.h" +#include "TranslationOption.h" namespace { @@ -91,10 +92,11 @@ inline std::vector<std::string> TokenizeXml(const std::string& str) } -std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) { +std::vector<TranslationOption*> ProcessAndStripXMLTags(std::string& line, const InputType &source) { //parse XML markup in translation line - std::vector<XmlOption> res; + std::vector<TranslationOption*> res; std::string rstr; + std::string linkedStr; if (line.find_first_of('<') == std::string::npos) { return res; } std::vector<std::string> xmlTokens = TokenizeXml(line); std::string tagName = ""; @@ -106,6 +108,7 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) { size_t curWord=0; int numUnary = 0; bool doClose = false; + bool isLinked = false; for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) { if(!isXmlTag(xmlTokens[xmlTokenPos])) @@ -127,7 +130,30 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) { nextTagName = tag.substr(0,endOfName); tagContents = tag.substr(endOfName+1); } - if (isOpen) + if (nextTagName == "linked") { + isLinked = true; + linkedStr = ""; + } + else if (nextTagName == "/linked") { + isLinked = false; + // recurse to process linked tags + std::vector<TranslationOption*> tOptions = ProcessAndStripXMLTags(linkedStr, source); + // link them together + std::vector<TranslationOption*>::const_iterator iterTransOpts1; + std::vector<TranslationOption*>::const_iterator iterTransOpts2; + for (iterTransOpts1 = tOptions.begin(); iterTransOpts1 != tOptions.end(); iterTransOpts1++) { + for (iterTransOpts2 = tOptions.begin(); iterTransOpts2 != tOptions.end(); iterTransOpts2++) { + if (iterTransOpts1 != iterTransOpts2) { + (**iterTransOpts1).AddLinkedTransOpt(*iterTransOpts2); + } + } + res.push_back(*iterTransOpts1); + } + } + else if (isLinked) { + linkedStr += xmlTokens[xmlTokenPos]; + } + else if (isOpen) { //this is an open tag tagName = nextTagName; @@ -182,6 +208,7 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) { //TODO: deal with multiple XML options here if (StaticData::Instance().GetXmlInputType() != XmlIgnore) { + const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); for (size_t i=0; i<altTexts.size(); ++i) { //only store options if we aren't ignoring them //set default probability @@ -189,7 +216,15 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) { if (altProbs[i] != "") probValue = Scan<float>(altProbs[i]); //Convert from prob to log-prob float scoreValue = FloorScore(TransformScore(probValue)); - XmlOption option(tagStart,tagEnd,altTexts[i],scoreValue); + + TargetPhrase targetPhrase(Output); + targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],StaticData::Instance().GetFactorDelimiter()); + targetPhrase.SetScore(scoreValue); + WordsRange range(tagStart,tagEnd); + + TranslationOption *option = new TranslationOption(range,targetPhrase,source); + assert(option); + res.push_back(option); } } @@ -205,3 +240,4 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) { return res; } + diff --git a/moses/src/XmlOption.h b/moses/src/XmlOption.h index e211a6398..dc9efe650 100644 --- a/moses/src/XmlOption.h +++ b/moses/src/XmlOption.h @@ -2,6 +2,7 @@ #include <vector> #include <string> +#include "InputType.h" /** This struct is used for storing XML force translation data for a given range in the sentence */ @@ -18,5 +19,6 @@ struct XmlOption { }; -std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line); +std::vector<TranslationOption*> ProcessAndStripXMLTags(std::string& line, const InputType &source); + |