Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhil Williams <philip.williams@mac.com>2015-04-13 18:31:58 +0300
committerPhil Williams <philip.williams@mac.com>2015-04-13 18:31:58 +0300
commit05b31b53f22abfabb8141b6cd7b0246890d521a1 (patch)
treeabe5bfc773f0d0f581a798c1fdff9d453e81c665
parent2f7c328db9d17be2076ccc5b6ed59a7c612575ac (diff)
Implement -output-unknowns for search algorithms 7 and 9 (T2S/F2S)
-rw-r--r--moses/Syntax/F2S/HyperTreeLoader.cpp24
-rw-r--r--moses/Syntax/F2S/HyperTreeLoader.h10
-rw-r--r--moses/Syntax/F2S/Manager-inl.h24
-rw-r--r--moses/Syntax/F2S/Manager.h3
-rw-r--r--moses/Syntax/RuleTableFF.cpp3
-rw-r--r--moses/Syntax/RuleTableFF.h7
6 files changed, 65 insertions, 6 deletions
diff --git a/moses/Syntax/F2S/HyperTreeLoader.cpp b/moses/Syntax/F2S/HyperTreeLoader.cpp
index f3caa2cec..bd19cbace 100644
--- a/moses/Syntax/F2S/HyperTreeLoader.cpp
+++ b/moses/Syntax/F2S/HyperTreeLoader.cpp
@@ -40,12 +40,12 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &ff,
- HyperTree &trie)
+ HyperTree &trie,
+ boost::unordered_set<std::size_t> &sourceTermSet)
{
PrintUserTime(std::string("Start loading HyperTree"));
- // const StaticData &staticData = StaticData::Instance();
- // const std::string &factorDelimiter = staticData.GetFactorDelimiter();
+ sourceTermSet.clear();
std::size_t count = 0;
@@ -106,6 +106,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
// Source-side
HyperPath sourceFragment;
hyperPathLoader.Load(sourceString, sourceFragment);
+ ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
// Target-side
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
@@ -144,6 +145,23 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
return true;
}
+void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
+ const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
+{
+ for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
+ p != hp.nodeSeqs.end(); ++p) {
+ for (std::vector<std::size_t>::const_iterator q = p->begin();
+ q != p->end(); ++q) {
+ const std::size_t factorId = *q;
+ if (factorId >= moses_MaxNumNonterminals &&
+ factorId != HyperPath::kComma &&
+ factorId != HyperPath::kEpsilon) {
+ sourceTerminalSet.insert(factorId);
+ }
+ }
+ }
+}
+
} // namespace F2S
} // namespace Syntax
} // namespace Moses
diff --git a/moses/Syntax/F2S/HyperTreeLoader.h b/moses/Syntax/F2S/HyperTreeLoader.h
index ea009022d..088c7eaf5 100644
--- a/moses/Syntax/F2S/HyperTreeLoader.h
+++ b/moses/Syntax/F2S/HyperTreeLoader.h
@@ -3,9 +3,12 @@
#include <istream>
#include <vector>
+#include <boost/unordered_set.hpp>
+
#include "moses/TypeDef.h"
#include "moses/Syntax/RuleTableFF.h"
+#include "HyperPath.h"
#include "HyperTree.h"
#include "HyperTreeCreator.h"
@@ -23,7 +26,12 @@ public:
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &,
- HyperTree &);
+ HyperTree &,
+ boost::unordered_set<std::size_t> &);
+
+private:
+ void ExtractSourceTerminalSetFromHyperPath(
+ const HyperPath &, boost::unordered_set<std::size_t> &);
};
} // namespace F2S
diff --git a/moses/Syntax/F2S/Manager-inl.h b/moses/Syntax/F2S/Manager-inl.h
index a422e8085..f7f8f0ae9 100644
--- a/moses/Syntax/F2S/Manager-inl.h
+++ b/moses/Syntax/F2S/Manager-inl.h
@@ -38,6 +38,7 @@ Manager<RuleMatcher>::Manager(const InputType &source)
if (const ForestInput *p = dynamic_cast<const ForestInput*>(&source)) {
m_forest = p->GetForest();
m_rootVertex = p->GetRootVertex();
+ m_sentenceLength = p->GetSize();
} else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&source)) {
T2S::InputTreeBuilder builder;
T2S::InputTree tmpTree;
@@ -45,6 +46,7 @@ Manager<RuleMatcher>::Manager(const InputType &source)
boost::shared_ptr<Forest> forest = boost::make_shared<Forest>();
m_rootVertex = T2S::InputTreeToForest(tmpTree, *forest);
m_forest = forest;
+ m_sentenceLength = p->GetSize();
} else {
UTIL_THROW2("ERROR: F2S::Manager requires input to be a tree or forest");
}
@@ -82,8 +84,13 @@ void Manager<RuleMatcher>::Decode()
p = sortedVertices.begin(); p != sortedVertices.end(); ++p) {
const Forest::Vertex &vertex = **p;
- // Skip terminal vertices.
+ // Skip terminal vertices (after checking if they are OOVs).
if (vertex.incoming.empty()) {
+ if (vertex.pvertex.span.GetStartPos() > 0 &&
+ vertex.pvertex.span.GetEndPos() < m_sentenceLength-1 &&
+ IsUnknownSourceWord(vertex.pvertex.symbol)) {
+ m_oovs.insert(vertex.pvertex.symbol);
+ }
continue;
}
@@ -189,6 +196,21 @@ void Manager<RuleMatcher>::InitializeStacks()
}
}
+template<typename RuleMatcher>
+bool Manager<RuleMatcher>::IsUnknownSourceWord(const Word &w) const
+{
+ const std::size_t factorId = w[0]->GetId();
+ const std::vector<RuleTableFF*> &ffs = RuleTableFF::Instances();
+ for (std::size_t i = 0; i < ffs.size(); ++i) {
+ RuleTableFF *ff = ffs[i];
+ const boost::unordered_set<std::size_t> &sourceTerms =
+ ff->GetSourceTerminalSet();
+ if (sourceTerms.find(factorId) != sourceTerms.end()) {
+ return false;
+ }
+ }
+ return true;
+}
template<typename RuleMatcher>
const SHyperedge *Manager<RuleMatcher>::GetBestSHyperedge() const
diff --git a/moses/Syntax/F2S/Manager.h b/moses/Syntax/F2S/Manager.h
index 3c7ff8da1..90f34c04b 100644
--- a/moses/Syntax/F2S/Manager.h
+++ b/moses/Syntax/F2S/Manager.h
@@ -51,10 +51,13 @@ private:
void InitializeStacks();
+ bool IsUnknownSourceWord(const Word &) const;
+
void RecombineAndSort(const std::vector<SHyperedge*> &, SVertexStack &);
boost::shared_ptr<const Forest> m_forest;
const Forest::Vertex *m_rootVertex;
+ std::size_t m_sentenceLength; // Includes <s> and </s>
PVertexToStackMap m_stackMap;
boost::shared_ptr<HyperTree> m_glueRuleTrie;
std::vector<boost::shared_ptr<RuleMatcher> > m_mainRuleMatchers;
diff --git a/moses/Syntax/RuleTableFF.cpp b/moses/Syntax/RuleTableFF.cpp
index f4e06f489..37063e048 100644
--- a/moses/Syntax/RuleTableFF.cpp
+++ b/moses/Syntax/RuleTableFF.cpp
@@ -35,7 +35,8 @@ void RuleTableFF::Load()
staticData.GetSearchAlgorithm() == SyntaxT2S) {
F2S::HyperTree *trie = new F2S::HyperTree(this);
F2S::HyperTreeLoader loader;
- loader.Load(m_input, m_output, m_filePath, *this, *trie);
+ loader.Load(m_input, m_output, m_filePath, *this, *trie,
+ m_sourceTerminalSet);
m_table = trie;
} else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
diff --git a/moses/Syntax/RuleTableFF.h b/moses/Syntax/RuleTableFF.h
index 4d6132e86..25e7d8428 100644
--- a/moses/Syntax/RuleTableFF.h
+++ b/moses/Syntax/RuleTableFF.h
@@ -43,10 +43,17 @@ public:
return 0;
}
+ // Get the source terminal vocabulary for this table's grammar (as a set of
+ // factor IDs)
+ const boost::unordered_set<std::size_t> &GetSourceTerminalSet() const {
+ return m_sourceTerminalSet;
+ }
+
private:
static std::vector<RuleTableFF*> s_instances;
const RuleTable *m_table;
+ boost::unordered_set<std::size_t> m_sourceTerminalSet;
};
} // Syntax