Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2008-02-09 14:37:41 +0300
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2008-02-09 14:37:41 +0300
commit2f091ce8f7eed08095b59b5746b8a42c21200458 (patch)
tree6c0da57b2f163c0e2088c3b7ba8dc01e5883b64c /moses
parentfd60fe93b9e1bb2f210fbea861d78d7a84b37624 (diff)
roll forwards james smith's changes.
seg faults seen might have been caused by unknown compiler problem, or FC5/FC6 lib incompatibility on DICE machines. what a joke! git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1554 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rwxr-xr-xmoses/src/Manager.cpp22
-rwxr-xr-xmoses/src/Sentence.cpp27
-rwxr-xr-xmoses/src/Sentence.h3
-rwxr-xr-xmoses/src/TranslationOption.h14
-rw-r--r--moses/src/XmlOption.cpp44
-rw-r--r--moses/src/XmlOption.h4
6 files changed, 85 insertions, 29 deletions
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index 8b9ef579e..f077f5173 100755
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -147,10 +147,10 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos)
{
- size_t maxSize = sourceSize - startPos;
- size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
- maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
-
+ size_t maxSize = sourceSize - startPos;
+ size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
+ maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+
for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
if (!hypoBitmap.Overlap(WordsRange(startPos, endPos)))
@@ -296,6 +296,20 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
if (debug2) { std::cerr << "::EXT: " << transOpt << "\n"; }
#endif
Hypothesis *newHypo = hypothesis.CreateNext(transOpt);
+ // expand hypothesis further if transOpt was linked
+ for (std::vector<TranslationOption*>::const_iterator iterLinked = transOpt.GetLinkedTransOpts().begin();
+ iterLinked != transOpt.GetLinkedTransOpts().end(); iterLinked++) {
+ const WordsBitmap hypoBitmap = newHypo->GetWordsBitmap();
+ if (hypoBitmap.Overlap((**iterLinked).GetSourceWordsRange())) {
+ // don't want to add a hypothesis that has some but not all of a linked TO set, so return
+ return;
+ }
+ else
+ {
+ newHypo->CalcScore(m_transOptColl->GetFutureScore());
+ newHypo = newHypo->CreateNext(**iterLinked);
+ }
+ }
newHypo->CalcScore(m_transOptColl->GetFutureScore());
// logging for the curious
diff --git a/moses/src/Sentence.cpp b/moses/src/Sentence.cpp
index a8a720ebc..5a2f7b2be 100755
--- a/moses/src/Sentence.cpp
+++ b/moses/src/Sentence.cpp
@@ -42,7 +42,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
//parse XML markup in translation line
const StaticData &staticData = StaticData::Instance();
if (staticData.GetXmlInputType() != XmlPassThrough)
- m_xmlOptionsList = ProcessAndStripXMLTags(line);
+ m_xmlOptionsList = ProcessAndStripXMLTags(line, *this);
Phrase::CreateFromString(factorOrder, line, factorDelimiter);
//only fill the vector if we are parsing XML
@@ -50,9 +50,10 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
for (size_t i=0; i<GetSize();i++) {
m_xmlCoverageMap.push_back(false);
}
- for (size_t i=0; i< m_xmlOptionsList.size();i++) {
+ for (std::vector<TranslationOption*>::const_iterator iterXMLOpts = m_xmlOptionsList.begin();
+ iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) {
//m_xmlOptionsList will be empty for XmlIgnore
- for(size_t j=m_xmlOptionsList[i].startPos;j<=m_xmlOptionsList[i].endPos;j++) {
+ for(size_t j=(**iterXMLOpts).GetSourceWordsRange().GetStartPos();j<=(**iterXMLOpts).GetSourceWordsRange().GetEndPos();j++) {
m_xmlCoverageMap[j]=true;
}
@@ -88,24 +89,12 @@ bool Sentence::XmlOverlap(size_t startPos, size_t endPos) const {
void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list, size_t startPos, size_t endPos) const {
//iterate over XmlOptions list, find exact source/target matches
- //we don't worry about creating the objects ahead of time because this should only be called once for each unique start/end when a given sentence is processed
const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
- for(size_t i=0;i<m_xmlOptionsList.size();i++) {
- if (startPos == m_xmlOptionsList[i].startPos && endPos == m_xmlOptionsList[i].endPos) {
- //create TranslationOptions
-
- for (size_t j=0;j<m_xmlOptionsList[i].targetPhrases.size();j++) {
- TargetPhrase targetPhrase(Output);
- targetPhrase.CreateFromString(outputFactorOrder,m_xmlOptionsList[i].targetPhrases[j],StaticData::Instance().GetFactorDelimiter());
- targetPhrase.SetScore(m_xmlOptionsList[i].targetScores[j]);
- WordsRange range(m_xmlOptionsList[i].startPos,m_xmlOptionsList[i].endPos);
-
- TranslationOption *option = new TranslationOption(range,targetPhrase,*this);
- assert(option);
- list.push_back(option);
-
- }
+ for (std::vector<TranslationOption*>::const_iterator iterXMLOpts = m_xmlOptionsList.begin();
+ iterXMLOpts != m_xmlOptionsList.end(); iterXMLOpts++) {
+ if (startPos == (**iterXMLOpts).GetSourceWordsRange().GetStartPos() && endPos == (**iterXMLOpts).GetSourceWordsRange().GetEndPos()) {
+ list.push_back(*iterXMLOpts);
}
}
}
diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h
index 8a05787ee..0f0b4aa7a 100755
--- a/moses/src/Sentence.h
+++ b/moses/src/Sentence.h
@@ -48,7 +48,7 @@ class Sentence : public Phrase, public InputType
* and returns the value of that tag if present, empty string otherwise
*/
static std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
- std::vector <XmlOption> m_xmlOptionsList;
+ std::vector <TranslationOption*> m_xmlOptionsList;
std::vector <bool> m_xmlCoverageMap;
public:
@@ -90,3 +90,4 @@ class Sentence : public Phrase, public InputType
TranslationOptionCollection* CreateTranslationOptionCollection() const;
};
+
diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h
index 441cd403c..440d4461d 100755
--- a/moses/src/TranslationOption.h
+++ b/moses/src/TranslationOption.h
@@ -60,6 +60,7 @@ protected:
Phrase *m_sourcePhrase; /*< input phrase translated by this */
const WordsRange m_sourceWordsRange; /*< word position in the input that are covered by this translation option */
float m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */
+ std::vector<TranslationOption*> m_linkedTransOpts; /* list of linked TOs which must be included with this in any hypothesis */
//! in TranslationOption, m_scoreBreakdown is not complete. It cannot,
//! for example, know the full n-gram score since the length of the
@@ -112,6 +113,18 @@ public:
{
return m_sourcePhrase;
}
+
+ /** returns linked TOs */
+ inline const std::vector<TranslationOption*> &GetLinkedTransOpts() const
+ {
+ return m_linkedTransOpts;
+ }
+
+ /** add link to another TO */
+ inline void AddLinkedTransOpt(TranslationOption* to)
+ {
+ m_linkedTransOpts.push_back(to);
+ }
/** whether source span overlaps with those of a hypothesis */
bool Overlap(const Hypothesis &hypothesis) const;
@@ -167,3 +180,4 @@ public:
};
+
diff --git a/moses/src/XmlOption.cpp b/moses/src/XmlOption.cpp
index e71dfc97f..6c18aac12 100644
--- a/moses/src/XmlOption.cpp
+++ b/moses/src/XmlOption.cpp
@@ -26,6 +26,7 @@
#include <iostream>
#include "Util.h"
#include "StaticData.h"
+#include "TranslationOption.h"
namespace {
@@ -91,10 +92,11 @@ inline std::vector<std::string> TokenizeXml(const std::string& str)
}
-std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) {
+std::vector<TranslationOption*> ProcessAndStripXMLTags(std::string& line, const InputType &source) {
//parse XML markup in translation line
- std::vector<XmlOption> res;
+ std::vector<TranslationOption*> res;
std::string rstr;
+ std::string linkedStr;
if (line.find_first_of('<') == std::string::npos) { return res; }
std::vector<std::string> xmlTokens = TokenizeXml(line);
std::string tagName = "";
@@ -106,6 +108,7 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) {
size_t curWord=0;
int numUnary = 0;
bool doClose = false;
+ bool isLinked = false;
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++)
{
if(!isXmlTag(xmlTokens[xmlTokenPos]))
@@ -127,7 +130,30 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) {
nextTagName = tag.substr(0,endOfName);
tagContents = tag.substr(endOfName+1);
}
- if (isOpen)
+ if (nextTagName == "linked") {
+ isLinked = true;
+ linkedStr = "";
+ }
+ else if (nextTagName == "/linked") {
+ isLinked = false;
+ // recurse to process linked tags
+ std::vector<TranslationOption*> tOptions = ProcessAndStripXMLTags(linkedStr, source);
+ // link them together
+ std::vector<TranslationOption*>::const_iterator iterTransOpts1;
+ std::vector<TranslationOption*>::const_iterator iterTransOpts2;
+ for (iterTransOpts1 = tOptions.begin(); iterTransOpts1 != tOptions.end(); iterTransOpts1++) {
+ for (iterTransOpts2 = tOptions.begin(); iterTransOpts2 != tOptions.end(); iterTransOpts2++) {
+ if (iterTransOpts1 != iterTransOpts2) {
+ (**iterTransOpts1).AddLinkedTransOpt(*iterTransOpts2);
+ }
+ }
+ res.push_back(*iterTransOpts1);
+ }
+ }
+ else if (isLinked) {
+ linkedStr += xmlTokens[xmlTokenPos];
+ }
+ else if (isOpen)
{
//this is an open tag
tagName = nextTagName;
@@ -182,6 +208,7 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) {
//TODO: deal with multiple XML options here
if (StaticData::Instance().GetXmlInputType() != XmlIgnore) {
+ const std::vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
for (size_t i=0; i<altTexts.size(); ++i) {
//only store options if we aren't ignoring them
//set default probability
@@ -189,7 +216,15 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) {
if (altProbs[i] != "") probValue = Scan<float>(altProbs[i]);
//Convert from prob to log-prob
float scoreValue = FloorScore(TransformScore(probValue));
- XmlOption option(tagStart,tagEnd,altTexts[i],scoreValue);
+
+ TargetPhrase targetPhrase(Output);
+ targetPhrase.CreateFromString(outputFactorOrder,altTexts[i],StaticData::Instance().GetFactorDelimiter());
+ targetPhrase.SetScore(scoreValue);
+ WordsRange range(tagStart,tagEnd);
+
+ TranslationOption *option = new TranslationOption(range,targetPhrase,source);
+ assert(option);
+
res.push_back(option);
}
}
@@ -205,3 +240,4 @@ std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line) {
return res;
}
+
diff --git a/moses/src/XmlOption.h b/moses/src/XmlOption.h
index e211a6398..dc9efe650 100644
--- a/moses/src/XmlOption.h
+++ b/moses/src/XmlOption.h
@@ -2,6 +2,7 @@
#include <vector>
#include <string>
+#include "InputType.h"
/** This struct is used for storing XML force translation data for a given range in the sentence
*/
@@ -18,5 +19,6 @@ struct XmlOption {
};
-std::vector<XmlOption> ProcessAndStripXMLTags(std::string& line);
+std::vector<TranslationOption*> ProcessAndStripXMLTags(std::string& line, const InputType &source);
+