Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2007-09-27 09:03:06 +0400
committerredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2007-09-27 09:03:06 +0400
commit251aa600fa1d7f05d834e0e694a34d4e37f2a175 (patch)
tree62f4208edc5b578d2d9bdffc2d75c2639d7111b3 /moses
parent797c1eb98f7705694f96d80f138ef45841d8973a (diff)
fixed a subtle bug in the word lattice decoder. it now acts sane. This experience also gave me more evidence for thinking that numbering the words in a phrase is not good-- we should be numbering the spaces between them. this would fix oh so many nasty things we've got (like the fact that we represent word drops with [n,n-1] etc).
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1463 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/ConfusionNet.cpp2
-rw-r--r--moses/src/FloydWarshall.cpp2
-rwxr-xr-xmoses/src/Hypothesis.cpp1
-rw-r--r--moses/src/InputType.cpp10
-rw-r--r--moses/src/InputType.h7
-rwxr-xr-xmoses/src/Manager.cpp13
-rw-r--r--moses/src/WordLattice.cpp21
-rw-r--r--moses/src/WordLattice.h5
8 files changed, 51 insertions, 10 deletions
diff --git a/moses/src/ConfusionNet.cpp b/moses/src/ConfusionNet.cpp
index 4d0bf1421..449e088ab 100644
--- a/moses/src/ConfusionNet.cpp
+++ b/moses/src/ConfusionNet.cpp
@@ -178,13 +178,11 @@ void ConfusionNet::Print(std::ostream& out) const {
Phrase ConfusionNet::GetSubString(const WordsRange&) const {
TRACE_ERR("ERROR: call to ConfusionNet::GetSubString\n");
- abort();
return Phrase(Input);
}
std::string ConfusionNet::GetStringRep(const vector<FactorType> factorsToPrint) const{ //not well defined yet
TRACE_ERR("ERROR: call to ConfusionNet::GeStringRep\n");
- abort();
return "";
}
#pragma warning(disable:4716)
diff --git a/moses/src/FloydWarshall.cpp b/moses/src/FloydWarshall.cpp
index 3a2d16c8e..b1b8c0bce 100644
--- a/moses/src/FloydWarshall.cpp
+++ b/moses/src/FloydWarshall.cpp
@@ -21,7 +21,7 @@ void floyd_warshall(const std::vector<std::vector<bool> >& edges, std::vector<st
dist[i][j] = 1;
else
dist[i][j] = MAX_DIST;
- if (i == j) dist[i][j] = 0;
+ if (i == j) dist[i][j] = MAX_DIST;
}
}
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index 0e7a84924..d8d909d26 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -386,6 +386,7 @@ const Hypothesis* Hypothesis::GetPrevHypo()const{
*/
void Hypothesis::PrintHypothesis(const InputType &source, float /*weightDistortion*/, float /*weightWordPenalty*/) const
{
+ if (!m_prevHypo) { TRACE_ERR(endl << "NULL hypo" << endl); return; }
TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
int end = (int)(m_prevHypo->m_targetPhrase.GetSize()-1);
int start = end-1;
diff --git a/moses/src/InputType.cpp b/moses/src/InputType.cpp
index 81eea0dbe..e2533505b 100644
--- a/moses/src/InputType.cpp
+++ b/moses/src/InputType.cpp
@@ -46,3 +46,13 @@ int InputType::ComputeDistortionDistance(const WordsRange& prev, const WordsRang
return abs(dist);
}
+bool InputType::IsCoveragePossible(const WordsRange&) const
+{
+ return true; // always possible for CNs and text
+}
+
+bool InputType::IsExtensionPossible(const WordsRange&, const WordsRange&) const
+{
+ return true; // always possible for CNs and text
+}
+
diff --git a/moses/src/InputType.h b/moses/src/InputType.h
index 772623109..ba3f01c0c 100644
--- a/moses/src/InputType.h
+++ b/moses/src/InputType.h
@@ -57,6 +57,13 @@ public:
}
//! returns the number of words moved
virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;
+
+ //! is there a path covering [range] (lattice only, otherwise true)
+ virtual bool InputType::IsCoveragePossible(const WordsRange& range) const;
+
+ //! In a word lattice, you can't always get from node A to node B
+ virtual bool IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const;
+
//! number of words in this sentence/confusion network
virtual size_t GetSize() const =0;
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index 3b88ddcb3..6f413e672 100755
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -106,7 +106,6 @@ void Manager::ProcessSentence()
sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
VERBOSE(3,std::endl);
sourceHypoColl.CleanupArcList();
-
// go through each hypothesis on the stack and try to expand it
HypothesisStack::const_iterator iterHypo;
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
@@ -131,6 +130,7 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
{
// since we check for reordering limits, its good to have that limit handy
int maxDistortion = StaticData::Instance().GetMaxDistortion();
+ bool isWordLattice = StaticData::Instance().GetInputType() == WordLatticeInput;
// no limit of reordering: only check for overlap
if (maxDistortion < 0)
@@ -175,7 +175,15 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
{
// check for overlap
WordsRange extRange(startPos, endPos);
- if (hypoBitmap.Overlap(extRange)) { continue; }
+ if (hypoBitmap.Overlap(extRange) ||
+ (isWordLattice && (!m_source.IsCoveragePossible(extRange) ||
+ !m_source.IsExtensionPossible(hypothesis.GetCurrSourceWordsRange(), extRange))
+ )
+ )
+ {
+ continue;
+ }
+
bool leftMostEdge = (hypoFirstGapPos == startPos);
@@ -199,7 +207,6 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
int required_distortion =
m_source.ComputeDistortionDistance(extRange, bestNextExtension);
-// std::cerr << "CD[" << startPos << "-" << endPos << "]: next distortion required = " << required_distortion << std::endl;
if (required_distortion <= maxDistortion) {
ExpandAllHypotheses(hypothesis
,m_possibleTranslations->GetTranslationOptionList(extRange));
diff --git a/moses/src/WordLattice.cpp b/moses/src/WordLattice.cpp
index 35fb7c20b..bb5297f90 100644
--- a/moses/src/WordLattice.cpp
+++ b/moses/src/WordLattice.cpp
@@ -68,7 +68,7 @@ int WordLattice::Read(std::istream& in,const std::vector<FactorType>& factorOrde
void WordLattice::GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const
{
- edges.resize(data.size(),std::vector<bool>(data.size(), false));
+ edges.resize(data.size()+1,std::vector<bool>(data.size()+1, false));
for (size_t i=0;i<data.size();++i) {
for (size_t j=0;j<data[i].size(); ++j) {
edges[i][i+next_nodes[i][j]] = true;
@@ -79,11 +79,24 @@ void WordLattice::GetAsEdgeMatrix(std::vector<std::vector<bool> >& edges) const
int WordLattice::ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const
{
if (prev.GetStartPos() == NOT_FOUND) {
- return distances[0][current.GetStartPos()];
+ return distances[0][current.GetStartPos()+1] - 1;
} else if (prev.GetEndPos() > current.GetStartPos()) {
- return distances[current.GetStartPos()][prev.GetEndPos()] + 1;
+ return distances[current.GetStartPos()][prev.GetEndPos()+1];
} else {
- return distances[prev.GetEndPos()][current.GetStartPos()] - 1;
+ return distances[prev.GetEndPos()+1][current.GetStartPos()+1] - 1;
}
}
+bool WordLattice::IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const
+{
+ //std::cerr << "CD: IsExtPossible(" << prev << "," << current << ")= " << ComputeDistortionDistance(prev, current) << "\n";
+ return ComputeDistortionDistance(prev, current) < 100000;
+}
+
+bool WordLattice::IsCoveragePossible(const WordsRange& range) const
+{
+ if (range.GetStartPos() == NOT_FOUND) { return true; }
+ //std::cerr << "IsCovPossibe(" << range << ")= " << distances[range.GetStartPos()][range.GetEndPos()+1] << "\n";
+ return distances[range.GetStartPos()][range.GetEndPos()+1] < 100000;
+}
+
diff --git a/moses/src/WordLattice.h b/moses/src/WordLattice.h
index 57260d27c..a114a64c2 100644
--- a/moses/src/WordLattice.h
+++ b/moses/src/WordLattice.h
@@ -17,6 +17,11 @@ public:
/** Get shortest path between two nodes
*/
virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;
+ // is it possible to get from the edge of the previous word range to the current word range
+ virtual bool IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const;
+ virtual bool IsCoveragePossible(const WordsRange& range) const;
+
+
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);