Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2007-09-27 16:14:42 +0400
committerredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2007-09-27 16:14:42 +0400
commit541a486c0c29446033895f63124dbb76f282c0fc (patch)
treeabf44929cd14976ac8d4f53b0ddfd728f6c3f17b /moses
parent251aa600fa1d7f05d834e0e694a34d4e37f2a175 (diff)
stricter checks on word lattice distortion
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1464 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/InputType.cpp9
-rw-r--r--moses/src/InputType.h20
-rwxr-xr-xmoses/src/Manager.cpp16
-rw-r--r--moses/src/WordLattice.cpp13
-rw-r--r--moses/src/WordLattice.h5
-rwxr-xr-xmoses/src/WordsBitmap.h18
6 files changed, 55 insertions, 26 deletions
diff --git a/moses/src/InputType.cpp b/moses/src/InputType.cpp
index e2533505b..e7f721ec8 100644
--- a/moses/src/InputType.cpp
+++ b/moses/src/InputType.cpp
@@ -46,13 +46,8 @@ int InputType::ComputeDistortionDistance(const WordsRange& prev, const WordsRang
return abs(dist);
}
-bool InputType::IsCoveragePossible(const WordsRange&) const
+bool InputType::CanIGetFromAToB(size_t start, size_t end) const
{
- return true; // always possible for CNs and text
-}
-
-bool InputType::IsExtensionPossible(const WordsRange&, const WordsRange&) const
-{
- return true; // always possible for CNs and text
+ return true;
}
diff --git a/moses/src/InputType.h b/moses/src/InputType.h
index ba3f01c0c..3888d3e09 100644
--- a/moses/src/InputType.h
+++ b/moses/src/InputType.h
@@ -58,12 +58,26 @@ public:
//! returns the number of words moved
virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;
+ //! In a word lattice, tells you if there's a path from node start to node end
+ virtual bool CanIGetFromAToB(size_t start, size_t end) const;
+
//! is there a path covering [range] (lattice only, otherwise true)
- virtual bool InputType::IsCoveragePossible(const WordsRange& range) const;
+ inline bool IsCoveragePossible(const WordsRange& range) const
+ {
+ return CanIGetFromAToB(range.GetStartPos(), range.GetEndPos() + 1);
+ }
//! In a word lattice, you can't always get from node A to node B
- virtual bool IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const;
-
+ inline bool IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const
+ {
+ // return ComputeDistortionDistance(prev, current) < 100000;
+ size_t t = prev.GetEndPos()+1;
+ size_t l = current.GetEndPos()+1;
+ size_t r = l;
+ if (l<t) { r = t; } else { l = t; }
+ return CanIGetFromAToB(l,r);
+ }
+
//! number of words in this sentence/confusion network
virtual size_t GetSize() const =0;
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index 6f413e672..83d932660 100755
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -170,6 +170,14 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
size_t maxSize = sourceSize - startPos;
size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength();
maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
+ if (isWordLattice) {
+ // first question: is there a path from the closest translated word to the left
+ // of the hypothesized extension to the start of the hypothesized extension?
+ size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
+ if (closestLeft != startPos && closestLeft != 0 && !m_source.CanIGetFromAToB(closestLeft+1, startPos+1)) {
+ continue;
+ }
+ }
for (size_t endPos = startPos ; endPos < startPos + maxSize ; ++endPos)
{
@@ -183,8 +191,14 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
{
continue;
}
+ // TODO ask second question here
+ if (isWordLattice) {
+ size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
+ if (closestRight != endPos && closestRight != sourceSize && !m_source.CanIGetFromAToB(endPos, closestRight)) {
+ continue;
+ }
+ }
-
bool leftMostEdge = (hypoFirstGapPos == startPos);
// any length extension is okay if starting at left-most edge
diff --git a/moses/src/WordLattice.cpp b/moses/src/WordLattice.cpp
index bb5297f90..aa838e405 100644
--- a/moses/src/WordLattice.cpp
+++ b/moses/src/WordLattice.cpp
@@ -87,16 +87,9 @@ int WordLattice::ComputeDistortionDistance(const WordsRange& prev, const WordsRa
}
}
-bool WordLattice::IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const
+bool WordLattice::CanIGetFromAToB(size_t start, size_t end) const
{
- //std::cerr << "CD: IsExtPossible(" << prev << "," << current << ")= " << ComputeDistortionDistance(prev, current) << "\n";
- return ComputeDistortionDistance(prev, current) < 100000;
+// std::cerr << "CanIgetFromAToB(" << start << "," << end << ")=" << distances[start][end] << std::endl;
+ return distances[start][end] < 100000;
}
-bool WordLattice::IsCoveragePossible(const WordsRange& range) const
-{
- if (range.GetStartPos() == NOT_FOUND) { return true; }
- //std::cerr << "IsCovPossibe(" << range << ")= " << distances[range.GetStartPos()][range.GetEndPos()+1] << "\n";
- return distances[range.GetStartPos()][range.GetEndPos()+1] < 100000;
-}
-
diff --git a/moses/src/WordLattice.h b/moses/src/WordLattice.h
index a114a64c2..63dcb4596 100644
--- a/moses/src/WordLattice.h
+++ b/moses/src/WordLattice.h
@@ -18,10 +18,7 @@ public:
*/
virtual int ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) const;
// is it possible to get from the edge of the previous word range to the current word range
- virtual bool IsExtensionPossible(const WordsRange& prev, const WordsRange& current) const;
- virtual bool IsCoveragePossible(const WordsRange& range) const;
-
-
+ virtual bool CanIGetFromAToB(size_t start, size_t end) const;
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
diff --git a/moses/src/WordsBitmap.h b/moses/src/WordsBitmap.h
index 23efbaee8..266d7fea7 100755
--- a/moses/src/WordsBitmap.h
+++ b/moses/src/WordsBitmap.h
@@ -168,10 +168,26 @@ public:
{
return (thisSize < compareSize) ? -1 : 1;
}
+ return std::memcmp(m_bitmap, compare.m_bitmap, thisSize);
+ }
+
+ inline size_t GetEdgeToTheLeftOf(size_t l) const
+ {
+ if (l == 0) return l;
+ --l;
+ while (!m_bitmap[l] && l) { --l; }
+ return l;
+ }
- return std::memcmp(m_bitmap, compare.m_bitmap, thisSize);
+ inline size_t GetEdgeToTheRightOf(size_t r) const
+ {
+ if (r+1 == m_size) return r;
+ ++r;
+ while (!m_bitmap[r] && r < m_size) { ++r; }
+ return r;
}
+
//! TODO - ??? no idea
int GetFutureCosts(int lastPos) const ;