Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2014-03-03 01:17:09 +0400
committerHieu Hoang <hieuhoang@gmail.com>2014-03-03 01:17:09 +0400
commitfcb07173dadf29378bc8e112dbdc9d258e4dbdab (patch)
tree47433a883ce8be86e10b8d61e84946cf2cd2ee91 /contrib/other-builds/extract-mixed-syntax
parentb521bad865267b9c2370b5bf9c7f153f17d685e2 (diff)
add MaxNonTerm and MaxHieroNonTerm
Diffstat (limited to 'contrib/other-builds/extract-mixed-syntax')
-rw-r--r--contrib/other-builds/extract-mixed-syntax/Main.cpp4
-rw-r--r--contrib/other-builds/extract-mixed-syntax/NonTerm.cpp5
-rw-r--r--contrib/other-builds/extract-mixed-syntax/NonTerm.h1
-rw-r--r--contrib/other-builds/extract-mixed-syntax/Parameter.cpp1
-rw-r--r--contrib/other-builds/extract-mixed-syntax/Parameter.h1
-rw-r--r--contrib/other-builds/extract-mixed-syntax/Rule.cpp14
6 files changed, 26 insertions, 0 deletions
diff --git a/contrib/other-builds/extract-mixed-syntax/Main.cpp b/contrib/other-builds/extract-mixed-syntax/Main.cpp
index 48bd7aab4..105224c3d 100644
--- a/contrib/other-builds/extract-mixed-syntax/Main.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Main.cpp
@@ -28,6 +28,8 @@ int main(int argc, char** argv)
("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
("GZOutput", "Compress extract files")
+ ("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
+ ("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
("SourceSyntax", "Source sentence is a parse tree")
("TargetSyntax", "Target sentence is a parse tree")
@@ -62,6 +64,8 @@ int main(int argc, char** argv)
if (vm.count("GZOutput")) params.gzOutput = true;
if (vm.count("GlueGrammar")) params.gluePath = vm["GlueGrammar"].as<string>();
if (vm.count("SentenceOffset")) params.sentenceOffset = vm["SentenceOffset"].as<long>();
+ if (vm.count("MaxNonTerm")) params.maxNonTerm = vm["MaxNonTerm"].as<int>();
+ if (vm.count("MaxHieroNonTerm")) params.maxHieroNonTerm = vm["MaxHieroNonTerm"].as<int>();
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
if (vm.count("TargetSyntax")) params.targetSyntax = true;
diff --git a/contrib/other-builds/extract-mixed-syntax/NonTerm.cpp b/contrib/other-builds/extract-mixed-syntax/NonTerm.cpp
index 0118709f9..e9c6d74d0 100644
--- a/contrib/other-builds/extract-mixed-syntax/NonTerm.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/NonTerm.cpp
@@ -56,3 +56,8 @@ bool NonTerm::IsHiero(Moses::FactorDirection direction, const Parameter &params)
const std::string &label = NonTerm::GetLabel(direction);
return label == params.hieroNonTerm;
}
+
+bool NonTerm::IsHiero(const Parameter &params) const
+{
+ return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
+}
diff --git a/contrib/other-builds/extract-mixed-syntax/NonTerm.h b/contrib/other-builds/extract-mixed-syntax/NonTerm.h
index 452014da5..3565c5750 100644
--- a/contrib/other-builds/extract-mixed-syntax/NonTerm.h
+++ b/contrib/other-builds/extract-mixed-syntax/NonTerm.h
@@ -36,6 +36,7 @@ public:
const std::string &GetLabel(Moses::FactorDirection direction) const;
bool IsHiero(Moses::FactorDirection direction, const Parameter &params) const;
+ bool IsHiero(const Parameter &params) const;
protected:
const ConsistentPhrase *m_consistentPhrase;
diff --git a/contrib/other-builds/extract-mixed-syntax/Parameter.cpp b/contrib/other-builds/extract-mixed-syntax/Parameter.cpp
index 39901ea17..77aa8436b 100644
--- a/contrib/other-builds/extract-mixed-syntax/Parameter.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Parameter.cpp
@@ -9,6 +9,7 @@
Parameter::Parameter()
:maxSpan(10)
,maxNonTerm(2)
+,maxHieroNonTerm(999)
,maxSymbolsTarget(999)
,maxSymbolsSource(5)
,minHoleSource(2)
diff --git a/contrib/other-builds/extract-mixed-syntax/Parameter.h b/contrib/other-builds/extract-mixed-syntax/Parameter.h
index 491565744..ead94b7f9 100644
--- a/contrib/other-builds/extract-mixed-syntax/Parameter.h
+++ b/contrib/other-builds/extract-mixed-syntax/Parameter.h
@@ -16,6 +16,7 @@ public:
int maxSpan;
int maxNonTerm;
+ int maxHieroNonTerm;
int maxSymbolsTarget;
int maxSymbolsSource;
int minHoleSource;
diff --git a/contrib/other-builds/extract-mixed-syntax/Rule.cpp b/contrib/other-builds/extract-mixed-syntax/Rule.cpp
index 1fd21fefe..b8fa7e385 100644
--- a/contrib/other-builds/extract-mixed-syntax/Rule.cpp
+++ b/contrib/other-builds/extract-mixed-syntax/Rule.cpp
@@ -191,10 +191,16 @@ void Rule::Prevalidate(const Parameter &params)
// check number of non-terms
int numNonTerms = 0;
+ int numHieroNonTerms = 0;
for (size_t i = 0; i < m_source.GetSize(); ++i) {
const RuleSymbol *arc = m_source[i];
if (arc->IsNonTerm()) {
++numNonTerms;
+ const NonTerm &nonTerm = *static_cast<const NonTerm*>(arc);
+ bool isHiero = nonTerm.IsHiero(params);
+ if (isHiero) {
+ ++numHieroNonTerms;
+ }
}
}
@@ -206,6 +212,14 @@ void Rule::Prevalidate(const Parameter &params)
}
}
+ if (numHieroNonTerms >= params.maxHieroNonTerm) {
+ m_canRecurse = false;
+ if (numHieroNonTerms > params.maxHieroNonTerm) {
+ m_isValid = false;
+ return;
+ }
+ }
+
// check if 2 consecutive non-terms in source
if (!params.nonTermConsecSource && m_nonterms.size() >= 2) {
const NonTerm &lastNonTerm = *m_nonterms.back();