Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses/FF
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2015-02-19 15:27:23 +0300
committerHieu Hoang <hieuhoang@gmail.com>2015-02-19 15:27:23 +0300
commit32de07502217879f0d354ec810c8e669f4d45d3c (patch)
treefafdf8f63d9902e4fd2b45f05902faa5d65df996 /moses/FF
parent6d4bad0f78ede5bcf7eaaea1de584f42bce8e91f (diff)
beautify
Diffstat (limited to 'moses/FF')
-rw-r--r--moses/FF/FeatureFunction.cpp4
-rw-r--r--moses/FF/InputFeature.cpp15
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.cpp217
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.h176
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.cpp861
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.h546
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingTable.cpp1088
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingTable.h333
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.cpp2
-rw-r--r--moses/FF/PhraseOrientationFeature.cpp66
-rw-r--r--moses/FF/PhraseOrientationFeature.h14
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.cpp36
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.h2
-rw-r--r--moses/FF/StatefulFeatureFunction.h5
-rw-r--r--moses/FF/UnalignedWordCountFeature.cpp3
-rw-r--r--moses/FF/VW/VW.h10
16 files changed, 1708 insertions, 1670 deletions
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index fa898857d..abe220161 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -124,7 +124,7 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
} else if (key == "tuneable-components") {
- UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription()
+ UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription()
<< ": tuneable-components cannot be set if tuneable=false");
SetTuneableComponents(value);
} else if (key == "require-sorting-after-source-context") {
@@ -158,7 +158,7 @@ void FeatureFunction::SetTuneableComponents(const std::string& value)
UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription()
<< ": Empty tuneable-components");
UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription()
- << ": tuneable-components value has to be a comma-separated list of "
+ << ": tuneable-components value has to be a comma-separated list of "
<< m_numScoreComponents << " boolean values");
m_tuneableComponents.resize(m_numScoreComponents);
diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp
index 10e5347e4..7cbb428a6 100644
--- a/moses/FF/InputFeature.cpp
+++ b/moses/FF/InputFeature.cpp
@@ -52,14 +52,13 @@ void InputFeature::EvaluateWithSourceContext(const InputType &input
, ScoreComponentCollection *estimatedFutureScore) const
{
if (m_legacy) {
- //binary phrase-table does input feature itself
- return;
- }
- else if (input.GetType() == WordLatticeInput){
- const ScorePair *scores = inputPath.GetInputScore();
- if (scores) {
- scoreBreakdown.PlusEquals(this, *scores);
- }
+ //binary phrase-table does input feature itself
+ return;
+ } else if (input.GetType() == WordLatticeInput) {
+ const ScorePair *scores = inputPath.GetInputScore();
+ if (scores) {
+ scoreBreakdown.PlusEquals(this, *scores);
+ }
}
}
diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index 2f0f334dd..9f75ac4bd 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -13,121 +13,116 @@ using namespace boost::algorithm;
namespace Moses
{
- LexicalReordering::
- LexicalReordering(const std::string &line)
- : StatefulFeatureFunction(line)
- {
- VERBOSE(1, "Initializing Lexical Reordering Feature.." << std::endl);
-
- map<string,string> sparseArgs;
- m_haveDefaultScores = false;
- for (size_t i = 0; i < m_args.size(); ++i)
- {
- const vector<string> &args = m_args[i];
-
- if (args[0] == "type")
- {
- m_configuration.reset(new LRModel(args[1]));
- m_configuration->SetScoreProducer(this);
- m_modelTypeString = m_configuration->GetModelString();
- }
- else if (args[0] == "input-factor")
- m_factorsF =Tokenize<FactorType>(args[1]);
- else if (args[0] == "output-factor")
- m_factorsE =Tokenize<FactorType>(args[1]);
- else if (args[0] == "path")
- m_filePath = args[1];
- else if (starts_with(args[0], "sparse-"))
- sparseArgs[args[0].substr(7)] = args[1];
- else if (args[0] == "default-scores")
- {
- vector<string> tokens = Tokenize(args[1],",");
- for(size_t i=0; i<tokens.size(); i++)
- m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i])));
- m_haveDefaultScores = true;
- }
- else UTIL_THROW2("Unknown argument " + args[0]);
- }
-
- switch(m_configuration->GetCondition())
- {
- case LRModel::FE:
- case LRModel::E:
- UTIL_THROW_IF2(m_factorsE.empty(),
- "TL factor mask for lexical reordering is "
- << "unexpectedly empty");
-
- if(m_configuration->GetCondition() == LRModel::E)
- break; // else fall through
- case LRModel::F:
- UTIL_THROW_IF2(m_factorsF.empty(),
- "SL factor mask for lexical reordering is "
- << "unexpectedly empty");
- break;
- default:
- UTIL_THROW2("Unknown conditioning option!");
- }
-
- // sanity check: number of default scores
- size_t numScores = m_configuration->GetNumScoreComponents();
- UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
- "wrong number of default scores (" << m_defaultScores.size()
- << ") for lexicalized reordering model (expected "
- << m_configuration->GetNumScoreComponents() << ")");
-
- m_configuration->ConfigureSparse(sparseArgs, this);
- }
-
- LexicalReordering::
- ~LexicalReordering()
- { }
-
- void
- LexicalReordering::
- Load()
- {
- typedef LexicalReorderingTable LRTable;
- m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF,
- m_factorsE, std::vector<FactorType>()));
- }
-
- Scores
- LexicalReordering::
- GetProb(const Phrase& f, const Phrase& e) const
- {
- return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR));
- }
-
- FFState*
- LexicalReordering::
- EvaluateWhenApplied(const Hypothesis& hypo,
- const FFState* prev_state,
- ScoreComponentCollection* out) const
- {
- VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
- Scores score(GetNumScoreComponents(), 0);
- const LRState *prev = dynamic_cast<const LRState *>(prev_state);
- LRState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
-
- out->PlusEquals(this, score);
- VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl);
-
- return next_state;
+LexicalReordering::
+LexicalReordering(const std::string &line)
+ : StatefulFeatureFunction(line)
+{
+ VERBOSE(1, "Initializing Lexical Reordering Feature.." << std::endl);
+
+ map<string,string> sparseArgs;
+ m_haveDefaultScores = false;
+ for (size_t i = 0; i < m_args.size(); ++i) {
+ const vector<string> &args = m_args[i];
+
+ if (args[0] == "type") {
+ m_configuration.reset(new LRModel(args[1]));
+ m_configuration->SetScoreProducer(this);
+ m_modelTypeString = m_configuration->GetModelString();
+ } else if (args[0] == "input-factor")
+ m_factorsF =Tokenize<FactorType>(args[1]);
+ else if (args[0] == "output-factor")
+ m_factorsE =Tokenize<FactorType>(args[1]);
+ else if (args[0] == "path")
+ m_filePath = args[1];
+ else if (starts_with(args[0], "sparse-"))
+ sparseArgs[args[0].substr(7)] = args[1];
+ else if (args[0] == "default-scores") {
+ vector<string> tokens = Tokenize(args[1],",");
+ for(size_t i=0; i<tokens.size(); i++)
+ m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i])));
+ m_haveDefaultScores = true;
+ } else UTIL_THROW2("Unknown argument " + args[0]);
}
- FFState const*
- LexicalReordering::EmptyHypothesisState(const InputType &input) const
- {
- return m_configuration->CreateLRState(input);
+ switch(m_configuration->GetCondition()) {
+ case LRModel::FE:
+ case LRModel::E:
+ UTIL_THROW_IF2(m_factorsE.empty(),
+ "TL factor mask for lexical reordering is "
+ << "unexpectedly empty");
+
+ if(m_configuration->GetCondition() == LRModel::E)
+ break; // else fall through
+ case LRModel::F:
+ UTIL_THROW_IF2(m_factorsF.empty(),
+ "SL factor mask for lexical reordering is "
+ << "unexpectedly empty");
+ break;
+ default:
+ UTIL_THROW2("Unknown conditioning option!");
}
- bool
- LexicalReordering::
- IsUseable(const FactorMask &mask) const
- {
- BOOST_FOREACH(FactorType const& f, m_factorsE)
- { if (!mask[f]) return false; }
- return true;
+ // sanity check: number of default scores
+ size_t numScores = m_configuration->GetNumScoreComponents();
+ UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
+ "wrong number of default scores (" << m_defaultScores.size()
+ << ") for lexicalized reordering model (expected "
+ << m_configuration->GetNumScoreComponents() << ")");
+
+ m_configuration->ConfigureSparse(sparseArgs, this);
+}
+
+LexicalReordering::
+~LexicalReordering()
+{ }
+
+void
+LexicalReordering::
+Load()
+{
+ typedef LexicalReorderingTable LRTable;
+ m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF,
+ m_factorsE, std::vector<FactorType>()));
+}
+
+Scores
+LexicalReordering::
+GetProb(const Phrase& f, const Phrase& e) const
+{
+ return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR));
+}
+
+FFState*
+LexicalReordering::
+EvaluateWhenApplied(const Hypothesis& hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* out) const
+{
+ VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl);
+ Scores score(GetNumScoreComponents(), 0);
+ const LRState *prev = dynamic_cast<const LRState *>(prev_state);
+ LRState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
+
+ out->PlusEquals(this, score);
+ VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl);
+
+ return next_state;
+}
+
+FFState const*
+LexicalReordering::EmptyHypothesisState(const InputType &input) const
+{
+ return m_configuration->CreateLRState(input);
+}
+
+bool
+LexicalReordering::
+IsUseable(const FactorMask &mask) const
+{
+ BOOST_FOREACH(FactorType const& f, m_factorsE) {
+ if (!mask[f]) return false;
}
+ return true;
+}
}
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 071f90751..882d31e54 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -20,91 +20,97 @@
namespace Moses
{
- class Factor;
- class Phrase;
- class Hypothesis;
- class InputType;
-
- // implementation of lexical reordering (Tilman ...) for phrase-based
- // decoding
- class LexicalReordering : public StatefulFeatureFunction
- {
- public:
- LexicalReordering(const std::string &line);
- virtual ~LexicalReordering();
- void Load();
-
- virtual
- bool
- IsUseable(const FactorMask &mask) const;
-
- virtual
- FFState const*
- EmptyHypothesisState(const InputType &input) const;
-
- void
- InitializeForInput(const InputType& i)
- { m_table->InitializeForInput(i); }
-
- Scores
- GetProb(const Phrase& f, const Phrase& e) const;
-
- virtual
- FFState*
- EvaluateWhenApplied(const Hypothesis& cur_hypo,
- const FFState* prev_state,
- ScoreComponentCollection* accumulator) const;
-
- virtual
- FFState*
- EvaluateWhenApplied(const ChartHypothesis&, int featureID,
- ScoreComponentCollection*) const
- { UTIL_THROW2("LexicalReordering is not valid for chart decoder"); }
-
- void
- EvaluateWithSourceContext
- (const InputType &input,
- const InputPath &inputPath,
- const TargetPhrase &targetPhrase,
- const StackVec *stackVec,
- ScoreComponentCollection& scoreBreakdown,
- ScoreComponentCollection* estimatedFutureScore = NULL) const
- { }
-
- void
- EvaluateTranslationOptionListWithSourceContext
- (const InputType &input, const TranslationOptionList &transOptList) const
- { }
-
- void
- EvaluateInIsolation(const Phrase &source,
- const TargetPhrase &targetPhrase,
- ScoreComponentCollection &scoreBreakdown,
- ScoreComponentCollection &estimatedFutureScore) const
- { }
-
- bool
- GetHaveDefaultScores() { return m_haveDefaultScores; }
-
- float
- GetDefaultScore( size_t i ) { return m_defaultScores[i]; }
-
- private:
- bool DecodeCondition(std::string s);
- bool DecodeDirection(std::string s);
- bool DecodeNumFeatureFunctions(std::string s);
-
- boost::scoped_ptr<LRModel> m_configuration;
- std::string m_modelTypeString;
- std::vector<std::string> m_modelType;
- boost::scoped_ptr<LexicalReorderingTable> m_table;
- std::vector<LRModel::Condition> m_condition;
- std::vector<FactorType> m_factorsE, m_factorsF;
- std::string m_filePath;
- bool m_haveDefaultScores;
- Scores m_defaultScores;
- };
-
+class Factor;
+class Phrase;
+class Hypothesis;
+class InputType;
+
+// implementation of lexical reordering (Tilman ...) for phrase-based
+// decoding
+class LexicalReordering : public StatefulFeatureFunction
+{
+public:
+ LexicalReordering(const std::string &line);
+ virtual ~LexicalReordering();
+ void Load();
+
+ virtual
+ bool
+ IsUseable(const FactorMask &mask) const;
+
+ virtual
+ FFState const*
+ EmptyHypothesisState(const InputType &input) const;
+
+ void
+ InitializeForInput(const InputType& i) {
+ m_table->InitializeForInput(i);
+ }
+
+ Scores
+ GetProb(const Phrase& f, const Phrase& e) const;
+
+ virtual
+ FFState*
+ EvaluateWhenApplied(const Hypothesis& cur_hypo,
+ const FFState* prev_state,
+ ScoreComponentCollection* accumulator) const;
+
+ virtual
+ FFState*
+ EvaluateWhenApplied(const ChartHypothesis&, int featureID,
+ ScoreComponentCollection*) const {
+ UTIL_THROW2("LexicalReordering is not valid for chart decoder");
+ }
+
+ void
+ EvaluateWithSourceContext
+ (const InputType &input,
+ const InputPath &inputPath,
+ const TargetPhrase &targetPhrase,
+ const StackVec *stackVec,
+ ScoreComponentCollection& scoreBreakdown,
+ ScoreComponentCollection* estimatedFutureScore = NULL) const
+ { }
+
+ void
+ EvaluateTranslationOptionListWithSourceContext
+ (const InputType &input, const TranslationOptionList &transOptList) const
+ { }
+
+ void
+ EvaluateInIsolation(const Phrase &source,
+ const TargetPhrase &targetPhrase,
+ ScoreComponentCollection &scoreBreakdown,
+ ScoreComponentCollection &estimatedFutureScore) const
+ { }
+
+ bool
+ GetHaveDefaultScores() {
+ return m_haveDefaultScores;
+ }
+
+ float
+ GetDefaultScore( size_t i ) {
+ return m_defaultScores[i];
+ }
+
+private:
+ bool DecodeCondition(std::string s);
+ bool DecodeDirection(std::string s);
+ bool DecodeNumFeatureFunctions(std::string s);
+
+ boost::scoped_ptr<LRModel> m_configuration;
+ std::string m_modelTypeString;
+ std::vector<std::string> m_modelType;
+ boost::scoped_ptr<LexicalReorderingTable> m_table;
+ std::vector<LRModel::Condition> m_condition;
+ std::vector<FactorType> m_factorsE, m_factorsF;
+ std::string m_filePath;
+ bool m_haveDefaultScores;
+ Scores m_defaultScores;
+};
+
}
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index c35f0cd65..cbc04eddb 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -1,4 +1,4 @@
-// -*- c++ -*-
+// -*- c++ -*-
#include <vector>
#include <string>
@@ -14,460 +14,471 @@
namespace Moses
{
- bool
- IsMonotonicStep(WordsRange const& prev, // words range of last source phrase
- WordsRange const& cur, // words range of current source phrase
- WordsBitmap const& cov) // coverage bitmap
- {
- size_t e = prev.GetEndPos() + 1;
- size_t s = cur.GetStartPos();
- return (s == e || (s >= e && !cov.GetValue(e)));
- }
-
- bool
- IsSwap(WordsRange const& prev, WordsRange const& cur, WordsBitmap const& cov)
- {
- size_t s = prev.GetStartPos();
- size_t e = cur.GetEndPos();
- return (e+1 == s || (e < s && !cov.GetValue(s-1)));
- }
+bool
+IsMonotonicStep(WordsRange const& prev, // words range of last source phrase
+ WordsRange const& cur, // words range of current source phrase
+ WordsBitmap const& cov) // coverage bitmap
+{
+ size_t e = prev.GetEndPos() + 1;
+ size_t s = cur.GetStartPos();
+ return (s == e || (s >= e && !cov.GetValue(e)));
+}
- size_t
- LRModel::
- GetNumberOfTypes() const
- {
- return ((m_modelType == MSD) ? 3 :
- (m_modelType == MSLR) ? 4 : 2);
- }
-
- size_t
- LRModel::
- GetNumScoreComponents() const
- {
- size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
- return ((m_direction == Bidirectional)
- ? 2 * score_per_dir + m_additionalScoreComponents
- : score_per_dir + m_additionalScoreComponents);
+bool
+IsSwap(WordsRange const& prev, WordsRange const& cur, WordsBitmap const& cov)
+{
+ size_t s = prev.GetStartPos();
+ size_t e = cur.GetEndPos();
+ return (e+1 == s || (e < s && !cov.GetValue(s-1)));
+}
+
+size_t
+LRModel::
+GetNumberOfTypes() const
+{
+ return ((m_modelType == MSD) ? 3 :
+ (m_modelType == MSLR) ? 4 : 2);
+}
+
+size_t
+LRModel::
+GetNumScoreComponents() const
+{
+ size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
+ return ((m_direction == Bidirectional)
+ ? 2 * score_per_dir + m_additionalScoreComponents
+ : score_per_dir + m_additionalScoreComponents);
+}
+
+void
+LRModel::
+ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
+ const LexicalReordering* producer)
+{
+ if (sparseArgs.size()) {
+ m_sparse.reset(new SparseReordering(sparseArgs, producer));
}
+}
+
+void
+LRModel::
+SetAdditionalScoreComponents(size_t number)
+{
+ m_additionalScoreComponents = number;
+}
+
+/// return orientation for the first phrase
+LRModel::ReorderingType
+LRModel::
+GetOrientation(WordsRange const& cur) const
+{
+ UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
+ return ((m_modelType == LeftRight) ? R :
+ (cur.GetStartPos() == 0) ? M :
+ (m_modelType == MSD) ? D :
+ (m_modelType == MSLR) ? DR : NM);
+}
+
+LRModel::ReorderingType
+LRModel::
+GetOrientation(WordsRange const& prev, WordsRange const& cur) const
+{
+ UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
+ return ((m_modelType == LeftRight)
+ ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
+ : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M
+ : (m_modelType == Monotonic) ? NM
+ : (prev.GetStartPos() == cur.GetEndPos() + 1) ? S
+ : (m_modelType == MSD) ? D
+ : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
+}
- void
- LRModel::
- ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
- const LexicalReordering* producer)
- {
- if (sparseArgs.size()) {
- m_sparse.reset(new SparseReordering(sparseArgs, producer));
+LRModel::ReorderingType
+LRModel::
+GetOrientation(int const reoDistance) const
+{
+ // this one is for HierarchicalReorderingBackwardState
+ return ((m_modelType == LeftRight)
+ ? (reoDistance >= 1) ? R : L
+ : (reoDistance == 1) ? M
+ : (m_modelType == Monotonic) ? NM
+ : (reoDistance == -1) ? S
+ : (m_modelType == MSD) ? D
+ : (reoDistance > 1) ? DR : DL);
+}
+
+LRModel::ReorderingType
+LRModel::
+GetOrientation(WordsRange const& prev, WordsRange const& cur,
+ WordsBitmap const& cov) const
+{
+ return ((m_modelType == LeftRight)
+ ? cur.GetStartPos() > prev.GetEndPos() ? R : L
+ : IsMonotonicStep(prev,cur,cov) ? M
+ : (m_modelType == Monotonic) ? NM
+ : IsSwap(prev,cur,cov) ? S
+ : (m_modelType == MSD) ? D
+ : cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
+}
+
+LRModel::
+LRModel(const std::string &modelType)
+ : m_modelString(modelType)
+ , m_scoreProducer(NULL)
+ , m_modelType(None)
+ , m_phraseBased(true)
+ , m_collapseScores(false)
+ , m_direction(Backward)
+ , m_additionalScoreComponents(0)
+{
+ std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
+
+ for (size_t i=0; i<config.size(); ++i) {
+ if (config[i] == "hier") {
+ m_phraseBased = false;
+ } else if (config[i] == "phrase") {
+ m_phraseBased = true;
+ } else if (config[i] == "wbe") {
+ m_phraseBased = true;
+ }
+ // no word-based decoding available, fall-back to phrase-based
+ // This is the old lexical reordering model combination of moses
+
+ else if (config[i] == "msd") {
+ m_modelType = MSD;
+ } else if (config[i] == "mslr") {
+ m_modelType = MSLR;
+ } else if (config[i] == "monotonicity") {
+ m_modelType = Monotonic;
+ } else if (config[i] == "leftright") {
+ m_modelType = LeftRight;
}
- }
- void
- LRModel::
- SetAdditionalScoreComponents(size_t number)
- {
- m_additionalScoreComponents = number;
- }
+ // unidirectional is deprecated, use backward instead
+ else if (config[i] == "unidirectional") {
+ m_direction = Backward;
+ } else if (config[i] == "backward") {
+ m_direction = Backward;
+ } else if (config[i] == "forward") {
+ m_direction = Forward;
+ } else if (config[i] == "bidirectional") {
+ m_direction = Bidirectional;
+ }
- /// return orientation for the first phrase
- LRModel::ReorderingType
- LRModel::
- GetOrientation(WordsRange const& cur) const
- {
- UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
- return ((m_modelType == LeftRight) ? R :
- (cur.GetStartPos() == 0) ? M :
- (m_modelType == MSD) ? D :
- (m_modelType == MSLR) ? DR : NM);
- }
-
- LRModel::ReorderingType
- LRModel::
- GetOrientation(WordsRange const& prev, WordsRange const& cur) const
- {
- UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
- return ((m_modelType == LeftRight)
- ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
- : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M
- : (m_modelType == Monotonic) ? NM
- : (prev.GetStartPos() == cur.GetEndPos() + 1) ? S
- : (m_modelType == MSD) ? D
- : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
- }
+ else if (config[i] == "f") {
+ m_condition = F;
+ } else if (config[i] == "fe") {
+ m_condition = FE;
+ }
- LRModel::ReorderingType
- LRModel::
- GetOrientation(int const reoDistance) const
- { // this one is for HierarchicalReorderingBackwardState
- return ((m_modelType == LeftRight)
- ? (reoDistance >= 1) ? R : L
- : (reoDistance == 1) ? M
- : (m_modelType == Monotonic) ? NM
- : (reoDistance == -1) ? S
- : (m_modelType == MSD) ? D
- : (reoDistance > 1) ? DR : DL);
- }
-
- LRModel::ReorderingType
- LRModel::
- GetOrientation(WordsRange const& prev, WordsRange const& cur,
- WordsBitmap const& cov) const
- {
- return ((m_modelType == LeftRight)
- ? cur.GetStartPos() > prev.GetEndPos() ? R : L
- : IsMonotonicStep(prev,cur,cov) ? M
- : (m_modelType == Monotonic) ? NM
- : IsSwap(prev,cur,cov) ? S
- : (m_modelType == MSD) ? D
- : cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
+ else if (config[i] == "collapseff") {
+ m_collapseScores = true;
+ } else if (config[i] == "allff") {
+ m_collapseScores = false;
+ } else {
+ std::cerr
+ << "Illegal part in the lexical reordering configuration string: "
+ << config[i] << std::endl;
+ exit(1);
+ }
}
- LRModel::
- LRModel(const std::string &modelType)
- : m_modelString(modelType)
- , m_scoreProducer(NULL)
- , m_modelType(None)
- , m_phraseBased(true)
- , m_collapseScores(false)
- , m_direction(Backward)
- , m_additionalScoreComponents(0)
- {
- std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
-
- for (size_t i=0; i<config.size(); ++i)
- {
- if (config[i] == "hier") { m_phraseBased = false; }
- else if (config[i] == "phrase") { m_phraseBased = true; }
- else if (config[i] == "wbe") { m_phraseBased = true; }
- // no word-based decoding available, fall-back to phrase-based
- // This is the old lexical reordering model combination of moses
-
- else if (config[i] == "msd") { m_modelType = MSD; }
- else if (config[i] == "mslr") { m_modelType = MSLR; }
- else if (config[i] == "monotonicity") { m_modelType = Monotonic; }
- else if (config[i] == "leftright") { m_modelType = LeftRight; }
-
- // unidirectional is deprecated, use backward instead
- else if (config[i] == "unidirectional") { m_direction = Backward; }
- else if (config[i] == "backward") { m_direction = Backward; }
- else if (config[i] == "forward") { m_direction = Forward; }
- else if (config[i] == "bidirectional") { m_direction = Bidirectional; }
-
- else if (config[i] == "f") { m_condition = F; }
- else if (config[i] == "fe") { m_condition = FE; }
-
- else if (config[i] == "collapseff") { m_collapseScores = true; }
- else if (config[i] == "allff") { m_collapseScores = false; }
- else
- {
- std::cerr
- << "Illegal part in the lexical reordering configuration string: "
- << config[i] << std::endl;
- exit(1);
- }
- }
-
- if (m_modelType == None)
- {
- std::cerr
- << "You need to specify the type of the reordering model "
- << "(msd, monotonicity,...)" << std::endl;
- exit(1);
- }
+ if (m_modelType == None) {
+ std::cerr
+ << "You need to specify the type of the reordering model "
+ << "(msd, monotonicity,...)" << std::endl;
+ exit(1);
}
-
- LRState *
- LRModel::
- CreateLRState(const InputType &input) const
- {
- LRState *bwd = NULL, *fwd = NULL;
- size_t offset = 0;
-
- switch(m_direction)
- {
- case Backward:
- case Bidirectional:
- if (m_phraseBased)
- bwd = new PhraseBasedReorderingState(*this, Backward, offset);
- else
- bwd = new HReorderingBackwardState(*this, offset);
- offset += m_collapseScores ? 1 : GetNumberOfTypes();
- if (m_direction == Backward) return bwd; // else fall through
- case Forward:
- if (m_phraseBased)
- fwd = new PhraseBasedReorderingState(*this, Forward, offset);
- else
- fwd = new HReorderingForwardState(*this, input.GetSize(), offset);
- offset += m_collapseScores ? 1 : GetNumberOfTypes();
- if (m_direction == Forward) return fwd;
- }
- return new BidirectionalReorderingState(*this, bwd, fwd, 0);
+}
+
+LRState *
+LRModel::
+CreateLRState(const InputType &input) const
+{
+ LRState *bwd = NULL, *fwd = NULL;
+ size_t offset = 0;
+
+ switch(m_direction) {
+ case Backward:
+ case Bidirectional:
+ if (m_phraseBased)
+ bwd = new PhraseBasedReorderingState(*this, Backward, offset);
+ else
+ bwd = new HReorderingBackwardState(*this, offset);
+ offset += m_collapseScores ? 1 : GetNumberOfTypes();
+ if (m_direction == Backward) return bwd; // else fall through
+ case Forward:
+ if (m_phraseBased)
+ fwd = new PhraseBasedReorderingState(*this, Forward, offset);
+ else
+ fwd = new HReorderingForwardState(*this, input.GetSize(), offset);
+ offset += m_collapseScores ? 1 : GetNumberOfTypes();
+ if (m_direction == Forward) return fwd;
}
+ return new BidirectionalReorderingState(*this, bwd, fwd, 0);
+}
- void
- LRState::
- CopyScores(ScoreComponentCollection* accum,
- const TranslationOption &topt,
- const InputType& input,
- ReorderingType reoType) const
- {
- // don't call this on a bidirectional object
- UTIL_THROW_IF2(m_direction != LRModel::Backward &&
- m_direction != LRModel::Forward,
- "Unknown direction: " << m_direction);
-
- TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward)
- ? &topt : m_prevOption);
-
- LexicalReordering* producer = m_configuration.GetScoreProducer();
- Scores const* cached = relevantOpt->GetLexReorderingScores(producer);
-
- // The approach here is bizarre! Why create a whole vector and do
- // vector addition (acumm->PlusEquals) to update a single value? - UG
- size_t off_remote = m_offset + reoType;
- size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
-
- UTIL_THROW_IF2(off_remote >= producer->GetNumScoreComponents(),
- "offset out of vector bounds!");
-
- // look up applicable score from vectore of scores
- if(cached)
- {
- Scores scores(producer->GetNumScoreComponents(),0);
- scores[off_local ] = (*cached)[off_remote];
- accum->PlusEquals(producer, scores);
- }
-
- // else: use default scores (if specified)
- else if (producer->GetHaveDefaultScores())
- {
- Scores scores(producer->GetNumScoreComponents(),0);
- scores[off_local] = producer->GetDefaultScore(off_remote);
- accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
- }
- // note: if no default score, no cost
-
- const SparseReordering* sparse = m_configuration.GetSparseReordering();
- if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
- m_direction, accum);
- }
-
-
- int
- LRState::
- ComparePrevScores(const TranslationOption *other) const
- {
- LexicalReordering* producer = m_configuration.GetScoreProducer();
- const Scores* myScores = m_prevOption->GetLexReorderingScores(producer);
- const Scores* yrScores = other->GetLexReorderingScores(producer);
-
- if(myScores == yrScores) return 0;
-
- // The pointers are NULL if a phrase pair isn't found in the reordering table.
- if(yrScores == NULL) return -1;
- if(myScores == NULL) return 1;
-
- size_t stop = m_offset + m_configuration.GetNumberOfTypes();
- for(size_t i = m_offset; i < stop; i++)
- {
- if((*myScores)[i] < (*yrScores)[i]) return -1;
- if((*myScores)[i] > (*yrScores)[i]) return 1;
- }
- return 0;
+void
+LRState::
+CopyScores(ScoreComponentCollection* accum,
+ const TranslationOption &topt,
+ const InputType& input,
+ ReorderingType reoType) const
+{
+ // don't call this on a bidirectional object
+ UTIL_THROW_IF2(m_direction != LRModel::Backward &&
+ m_direction != LRModel::Forward,
+ "Unknown direction: " << m_direction);
+
+ TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward)
+ ? &topt : m_prevOption);
+
+ LexicalReordering* producer = m_configuration.GetScoreProducer();
+ Scores const* cached = relevantOpt->GetLexReorderingScores(producer);
+
+ // The approach here is bizarre! Why create a whole vector and do
+ // vector addition (acumm->PlusEquals) to update a single value? - UG
+ size_t off_remote = m_offset + reoType;
+ size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
+
+ UTIL_THROW_IF2(off_remote >= producer->GetNumScoreComponents(),
+ "offset out of vector bounds!");
+
+ // look up applicable score from vectore of scores
+ if(cached) {
+ Scores scores(producer->GetNumScoreComponents(),0);
+ scores[off_local ] = (*cached)[off_remote];
+ accum->PlusEquals(producer, scores);
}
- // ===========================================================================
- // PHRASE BASED REORDERING STATE
- // ===========================================================================
- bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
-
- PhraseBasedReorderingState::
- PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
- const TranslationOption &topt)
- : LRState(prev, topt)
- , m_prevRange(topt.GetSourceWordsRange())
- , m_first(false)
- { }
-
-
- PhraseBasedReorderingState::
- PhraseBasedReorderingState(const LRModel &config,
- LRModel::Direction dir, size_t offset)
- : LRState(config, dir, offset)
- , m_prevRange(NOT_FOUND,NOT_FOUND)
- , m_first(true)
- { }
-
-
- int
- PhraseBasedReorderingState::
- Compare(const FFState& o) const
- {
- if (&o == this) return 0;
-
- const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o);
- if (m_prevRange == other->m_prevRange) {
- if (m_direction == LRModel::Forward) {
- return ComparePrevScores(other->m_prevOption);
- } else {
- return 0;
- }
- } else if (m_prevRange < other->m_prevRange) {
- return -1;
- }
- return 1;
+ // else: use default scores (if specified)
+ else if (producer->GetHaveDefaultScores()) {
+ Scores scores(producer->GetNumScoreComponents(),0);
+ scores[off_local] = producer->GetDefaultScore(off_remote);
+ accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
}
+ // note: if no default score, no cost
- LRState*
- PhraseBasedReorderingState::
- Expand(const TranslationOption& topt, const InputType& input,
- ScoreComponentCollection* scores) const
- {
- // const LRModel::ModelType modelType = m_configuration.GetModelType();
-
- if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first)
- {
- LRModel const& lrmodel = m_configuration;
- WordsRange const cur = topt.GetSourceWordsRange();
- ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
- : lrmodel.GetOrientation(m_prevRange,cur));
- CopyScores(scores, topt, input, reoType);
- }
- return new PhraseBasedReorderingState(this, topt);
- }
+ const SparseReordering* sparse = m_configuration.GetSparseReordering();
+ if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
+ m_direction, accum);
+}
- ///////////////////////////
- //BidirectionalReorderingState
+int
+LRState::
+ComparePrevScores(const TranslationOption *other) const
+{
+ LexicalReordering* producer = m_configuration.GetScoreProducer();
+ const Scores* myScores = m_prevOption->GetLexReorderingScores(producer);
+ const Scores* yrScores = other->GetLexReorderingScores(producer);
- int
- BidirectionalReorderingState::
- Compare(FFState const& o) const
- {
- if (&o == this) return 0;
-
- BidirectionalReorderingState const &other
- = static_cast<BidirectionalReorderingState const&>(o);
+ if(myScores == yrScores) return 0;
- int cmp = m_backward->Compare(*other.m_backward);
- return (cmp < 0) ? -1 : cmp ? 1 : m_forward->Compare(*other.m_forward);
- }
+ // The pointers are NULL if a phrase pair isn't found in the reordering table.
+ if(yrScores == NULL) return -1;
+ if(myScores == NULL) return 1;
- LRState*
- BidirectionalReorderingState::
- Expand(const TranslationOption& topt, const InputType& input,
- ScoreComponentCollection* scores) const
- {
- LRState *newbwd = m_backward->Expand(topt,input, scores);
- LRState *newfwd = m_forward->Expand(topt, input, scores);
- return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
+ size_t stop = m_offset + m_configuration.GetNumberOfTypes();
+ for(size_t i = m_offset; i < stop; i++) {
+ if((*myScores)[i] < (*yrScores)[i]) return -1;
+ if((*myScores)[i] > (*yrScores)[i]) return 1;
}
+ return 0;
+}
+
+// ===========================================================================
+// PHRASE BASED REORDERING STATE
+// ===========================================================================
+bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
- ///////////////////////////
- //HierarchicalReorderingBackwardState
-
- HReorderingBackwardState::
- HReorderingBackwardState(const HReorderingBackwardState *prev,
- const TranslationOption &topt,
- ReorderingStack reoStack)
- : LRState(prev, topt), m_reoStack(reoStack)
- { }
-
- HReorderingBackwardState::
- HReorderingBackwardState(const LRModel &config, size_t offset)
- : LRState(config, LRModel::Backward, offset)
- { }
-
-
- int
- HReorderingBackwardState::
- Compare(const FFState& o) const
- {
- const HReorderingBackwardState& other
- = static_cast<const HReorderingBackwardState&>(o);
- return m_reoStack.Compare(other.m_reoStack);
+PhraseBasedReorderingState::
+PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
+ const TranslationOption &topt)
+ : LRState(prev, topt)
+ , m_prevRange(topt.GetSourceWordsRange())
+ , m_first(false)
+{ }
+
+
+PhraseBasedReorderingState::
+PhraseBasedReorderingState(const LRModel &config,
+ LRModel::Direction dir, size_t offset)
+ : LRState(config, dir, offset)
+ , m_prevRange(NOT_FOUND,NOT_FOUND)
+ , m_first(true)
+{ }
+
+
+int
+PhraseBasedReorderingState::
+Compare(const FFState& o) const
+{
+ if (&o == this) return 0;
+
+ const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o);
+ if (m_prevRange == other->m_prevRange) {
+ if (m_direction == LRModel::Forward) {
+ return ComparePrevScores(other->m_prevOption);
+ } else {
+ return 0;
+ }
+ } else if (m_prevRange < other->m_prevRange) {
+ return -1;
}
-
- LRState*
- HReorderingBackwardState::
- Expand(const TranslationOption& topt, const InputType& input,
- ScoreComponentCollection* scores) const
- {
- HReorderingBackwardState* nextState;
- nextState = new HReorderingBackwardState(this, topt, m_reoStack);
- WordsRange swrange = topt.GetSourceWordsRange();
- int reoDistance = nextState->m_reoStack.ShiftReduce(swrange);
- ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
+ return 1;
+}
+
+LRState*
+PhraseBasedReorderingState::
+Expand(const TranslationOption& topt, const InputType& input,
+ ScoreComponentCollection* scores) const
+{
+ // const LRModel::ModelType modelType = m_configuration.GetModelType();
+
+ if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) {
+ LRModel const& lrmodel = m_configuration;
+ WordsRange const cur = topt.GetSourceWordsRange();
+ ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
+ : lrmodel.GetOrientation(m_prevRange,cur));
CopyScores(scores, topt, input, reoType);
- return nextState;
}
+ return new PhraseBasedReorderingState(this, topt);
+}
- ///////////////////////////
- //HReorderingForwardState
-
- HReorderingForwardState::
- HReorderingForwardState(const LRModel &config,
- size_t size, size_t offset)
- : LRState(config, LRModel::Forward, offset)
- , m_first(true)
- , m_prevRange(NOT_FOUND,NOT_FOUND)
- , m_coverage(size)
- { }
-
- HReorderingForwardState::
- HReorderingForwardState(const HReorderingForwardState *prev,
- const TranslationOption &topt)
- : LRState(prev, topt)
- , m_first(false)
- , m_prevRange(topt.GetSourceWordsRange())
- , m_coverage(prev->m_coverage)
- {
- m_coverage.SetValue(topt.GetSourceWordsRange(), true);
- }
- int
- HReorderingForwardState::
- Compare(const FFState& o) const
- {
- if (&o == this) return 0;
-
- HReorderingForwardState const& other
- = static_cast<HReorderingForwardState const&>(o);
-
- return ((m_prevRange == other.m_prevRange)
- ? ComparePrevScores(other.m_prevOption)
- : (m_prevRange < other.m_prevRange) ? -1 : 1);
- }
+///////////////////////////
+//BidirectionalReorderingState
+
+int
+BidirectionalReorderingState::
+Compare(FFState const& o) const
+{
+ if (&o == this) return 0;
+
+ BidirectionalReorderingState const &other
+ = static_cast<BidirectionalReorderingState const&>(o);
+
+ int cmp = m_backward->Compare(*other.m_backward);
+ return (cmp < 0) ? -1 : cmp ? 1 : m_forward->Compare(*other.m_forward);
+}
- // For compatibility with the phrase-based reordering model, scoring is one
- // step delayed.
- // The forward model takes determines orientations heuristically as follows:
- // mono: if the next phrase comes after the conditioning phrase and
- // - there is a gap to the right of the conditioning phrase, or
- // - the next phrase immediately follows it
- // swap: if the next phrase goes before the conditioning phrase and
- // - there is a gap to the left of the conditioning phrase, or
- // - the next phrase immediately precedes it
- // dright: if the next phrase follows the conditioning phrase and other
- // stuff comes in between
- // dleft: if the next phrase precedes the conditioning phrase and other
- // stuff comes in between
-
- LRState*
- HReorderingForwardState::
- Expand(TranslationOption const& topt, InputType const& input,
- ScoreComponentCollection* scores) const
- {
- const WordsRange cur = topt.GetSourceWordsRange();
- // keep track of the current coverage ourselves so we don't need the hypothesis
- WordsBitmap cov = m_coverage;
- cov.SetValue(cur, true);
- if (!m_first)
- {
- LRModel::ReorderingType reoType;
- reoType = m_configuration.GetOrientation(m_prevRange,cur,cov);
- CopyScores(scores, topt, input, reoType);
- }
- return new HReorderingForwardState(this, topt);
+LRState*
+BidirectionalReorderingState::
+Expand(const TranslationOption& topt, const InputType& input,
+ ScoreComponentCollection* scores) const
+{
+ LRState *newbwd = m_backward->Expand(topt,input, scores);
+ LRState *newfwd = m_forward->Expand(topt, input, scores);
+ return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
+}
+
+///////////////////////////
+//HierarchicalReorderingBackwardState
+
+HReorderingBackwardState::
+HReorderingBackwardState(const HReorderingBackwardState *prev,
+ const TranslationOption &topt,
+ ReorderingStack reoStack)
+ : LRState(prev, topt), m_reoStack(reoStack)
+{ }
+
+HReorderingBackwardState::
+HReorderingBackwardState(const LRModel &config, size_t offset)
+ : LRState(config, LRModel::Backward, offset)
+{ }
+
+
+int
+HReorderingBackwardState::
+Compare(const FFState& o) const
+{
+ const HReorderingBackwardState& other
+ = static_cast<const HReorderingBackwardState&>(o);
+ return m_reoStack.Compare(other.m_reoStack);
+}
+
+LRState*
+HReorderingBackwardState::
+Expand(const TranslationOption& topt, const InputType& input,
+ ScoreComponentCollection* scores) const
+{
+ HReorderingBackwardState* nextState;
+ nextState = new HReorderingBackwardState(this, topt, m_reoStack);
+ WordsRange swrange = topt.GetSourceWordsRange();
+ int reoDistance = nextState->m_reoStack.ShiftReduce(swrange);
+ ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
+ CopyScores(scores, topt, input, reoType);
+ return nextState;
+}
+
+///////////////////////////
+//HReorderingForwardState
+
+HReorderingForwardState::
+HReorderingForwardState(const LRModel &config,
+ size_t size, size_t offset)
+ : LRState(config, LRModel::Forward, offset)
+ , m_first(true)
+ , m_prevRange(NOT_FOUND,NOT_FOUND)
+ , m_coverage(size)
+{ }
+
+HReorderingForwardState::
+HReorderingForwardState(const HReorderingForwardState *prev,
+ const TranslationOption &topt)
+ : LRState(prev, topt)
+ , m_first(false)
+ , m_prevRange(topt.GetSourceWordsRange())
+ , m_coverage(prev->m_coverage)
+{
+ m_coverage.SetValue(topt.GetSourceWordsRange(), true);
+}
+
+int
+HReorderingForwardState::
+Compare(const FFState& o) const
+{
+ if (&o == this) return 0;
+
+ HReorderingForwardState const& other
+ = static_cast<HReorderingForwardState const&>(o);
+
+ return ((m_prevRange == other.m_prevRange)
+ ? ComparePrevScores(other.m_prevOption)
+ : (m_prevRange < other.m_prevRange) ? -1 : 1);
+}
+
+// For compatibility with the phrase-based reordering model, scoring is one
+// step delayed.
+// The forward model takes determines orientations heuristically as follows:
+// mono: if the next phrase comes after the conditioning phrase and
+// - there is a gap to the right of the conditioning phrase, or
+// - the next phrase immediately follows it
+// swap: if the next phrase goes before the conditioning phrase and
+// - there is a gap to the left of the conditioning phrase, or
+// - the next phrase immediately precedes it
+// dright: if the next phrase follows the conditioning phrase and other
+// stuff comes in between
+// dleft: if the next phrase precedes the conditioning phrase and other
+// stuff comes in between
+
+LRState*
+HReorderingForwardState::
+Expand(TranslationOption const& topt, InputType const& input,
+ ScoreComponentCollection* scores) const
+{
+ const WordsRange cur = topt.GetSourceWordsRange();
+ // keep track of the current coverage ourselves so we don't need the hypothesis
+ WordsBitmap cov = m_coverage;
+ cov.SetValue(cur, true);
+ if (!m_first) {
+ LRModel::ReorderingType reoType;
+ reoType = m_configuration.GetOrientation(m_prevRange,cur,cov);
+ CopyScores(scores, topt, input, reoType);
}
+ return new HReorderingForwardState(this, topt);
+}
}
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 83607a66a..b11eed0cf 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -17,272 +17,284 @@
namespace Moses
{
- class LRState;
- class LexicalReordering;
- class SparseReordering;
-
- //! Factory class for lexical reordering states
- class LRModel
- {
- public:
- typedef int ReorderingType;
- friend class LexicalReordering;
- enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
- enum Direction { Forward, Backward, Bidirectional };
- enum Condition { F, E, FE };
-
- // constants for the different types of reordering
- // (correspond to indices in the respective table)
- static const ReorderingType M = 0; // monotonic
- static const ReorderingType NM = 1; // non-monotonic
- static const ReorderingType S = 1; // swap
- static const ReorderingType D = 2; // discontinuous
- static const ReorderingType DL = 2; // discontinuous, left
- static const ReorderingType DR = 3; // discontinuous, right
- static const ReorderingType R = 0; // right
- static const ReorderingType L = 1; // left
- static const ReorderingType MAX = 3; // largest possible
-
- // determine orientation, depending on model:
-
-
- ReorderingType // for first phrase in phrase-based
- GetOrientation(WordsRange const& cur) const;
-
- ReorderingType // for non-first phrases in phrase-based
- GetOrientation(WordsRange const& prev, WordsRange const& cur) const;
-
- ReorderingType // for HReorderingForwardState
- GetOrientation(WordsRange const& prev, WordsRange const& cur,
- WordsBitmap const& cov) const;
-
- ReorderingType // for HReorderingBackwarddState
- GetOrientation(int const reoDistance) const;
-
- LRModel(const std::string &modelType);
-
- void
- ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
- const LexicalReordering* producer);
-
- LRState*
- CreateLRState(const InputType &input) const;
-
- size_t GetNumberOfTypes() const;
- size_t GetNumScoreComponents() const;
- void SetAdditionalScoreComponents(size_t number);
-
- LexicalReordering*
- GetScoreProducer() const { return m_scoreProducer; }
-
- ModelType GetModelType() const { return m_modelType; }
- Direction GetDirection() const { return m_direction; }
- Condition GetCondition() const { return m_condition; }
-
- bool
- IsPhraseBased() const
- { return m_phraseBased; }
-
- bool
- CollapseScores() const
- { return m_collapseScores; }
-
- SparseReordering const*
- GetSparseReordering() const
- { return m_sparse.get(); }
-
- private:
- void
- SetScoreProducer(LexicalReordering* scoreProducer)
- { m_scoreProducer = scoreProducer; }
-
- std::string const&
- GetModelString() const
- { return m_modelString; }
-
- std::string m_modelString;
- LexicalReordering *m_scoreProducer;
- ModelType m_modelType;
- bool m_phraseBased;
- bool m_collapseScores;
- Direction m_direction;
- Condition m_condition;
- size_t m_additionalScoreComponents;
- boost::scoped_ptr<SparseReordering> m_sparse;
- };
-
- //! Abstract class for lexical reordering model states
- class LRState : public FFState
- {
- public:
-
- typedef int ReorderingType;
-
- virtual
- int
- Compare(const FFState& o) const = 0;
-
- virtual
- LRState*
- Expand(const TranslationOption& hypo, const InputType& input,
- ScoreComponentCollection* scores) const = 0;
-
- static
- LRState*
- CreateLRState(const std::vector<std::string>& config,
- LRModel::Direction dir,
- const InputType &input);
-
- protected:
-
- const LRModel& m_configuration;
-
- // The following is the true direction of the object, which can be
- // Backward or Forward even if the Configuration has Bidirectional.
- LRModel::Direction m_direction;
- size_t m_offset;
- //forward scores are conditioned on prev option, so need to remember it
- const TranslationOption *m_prevOption;
-
- inline
- LRState(const LRState *prev,
- const TranslationOption &topt)
- : m_configuration(prev->m_configuration)
- , m_direction(prev->m_direction)
- , m_offset(prev->m_offset)
- , m_prevOption(&topt)
- { }
-
- inline
- LRState(const LRModel &config,
- LRModel::Direction dir,
- size_t offset)
- : m_configuration(config)
- , m_direction(dir)
- , m_offset(offset)
- , m_prevOption(NULL)
- { }
-
- // copy the right scores in the right places, taking into account
- // forward/backward, offset, collapse
- void
- CopyScores(ScoreComponentCollection* scores,
- const TranslationOption& topt,
- const InputType& input, ReorderingType reoType) const;
-
- int
- ComparePrevScores(const TranslationOption *other) const;
- };
-
- //! @todo what is this?
- class BidirectionalReorderingState
- : public LRState
- {
- private:
- const LRState *m_backward;
- const LRState *m_forward;
- public:
- BidirectionalReorderingState(const LRModel &config,
- const LRState *bw,
- const LRState *fw, size_t offset)
- : LRState(config,
- LRModel::Bidirectional,
- offset)
- , m_backward(bw)
- , m_forward(fw)
- { }
-
- ~BidirectionalReorderingState()
- {
- delete m_backward;
- delete m_forward;
- }
-
- virtual
- int
- Compare(const FFState& o) const;
-
- virtual
- LRState*
- Expand(const TranslationOption& topt, const InputType& input,
- ScoreComponentCollection* scores) const;
- };
-
- //! State for the standard Moses implementation of lexical reordering models
- //! (see Koehn et al, Edinburgh System Description for the 2005 NIST MT
- //! Evaluation)
- class PhraseBasedReorderingState
- : public LRState
- {
- private:
- WordsRange m_prevRange;
- bool m_first;
- public:
- static bool m_useFirstBackwardScore;
- PhraseBasedReorderingState(const LRModel &config,
- LRModel::Direction dir,
- size_t offset);
- PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
- const TranslationOption &topt);
-
- virtual
- int
- Compare(const FFState& o) const;
-
- virtual
- LRState*
- Expand(const TranslationOption& topt,const InputType& input,
- ScoreComponentCollection* scores) const;
-
- ReorderingType GetOrientationTypeMSD(WordsRange currRange) const;
- ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const;
- ReorderingType GetOrientationTypeMonotonic(WordsRange currRange) const;
- ReorderingType GetOrientationTypeLeftRight(WordsRange currRange) const;
- };
-
- //! State for a hierarchical reordering model (see Galley and Manning, A
- //! Simple and Effective Hierarchical Phrase Reordering Model, EMNLP 2008)
- //! backward state (conditioned on the previous phrase)
- class HReorderingBackwardState : public LRState
- {
- private:
- ReorderingStack m_reoStack;
- public:
- HReorderingBackwardState(const LRModel &config, size_t offset);
- HReorderingBackwardState(const HReorderingBackwardState *prev,
- const TranslationOption &topt,
- ReorderingStack reoStack);
-
- virtual int Compare(const FFState& o) const;
- virtual LRState* Expand(const TranslationOption& hypo, const InputType& input,
- ScoreComponentCollection* scores) const;
-
- private:
- ReorderingType GetOrientationTypeMSD(int reoDistance) const;
- ReorderingType GetOrientationTypeMSLR(int reoDistance) const;
- ReorderingType GetOrientationTypeMonotonic(int reoDistance) const;
- ReorderingType GetOrientationTypeLeftRight(int reoDistance) const;
- };
-
-
- //!forward state (conditioned on the next phrase)
- class HReorderingForwardState : public LRState
- {
- private:
- bool m_first;
- WordsRange m_prevRange;
- WordsBitmap m_coverage;
-
- public:
- HReorderingForwardState(const LRModel &config, size_t sentenceLength,
- size_t offset);
- HReorderingForwardState(const HReorderingForwardState *prev,
- const TranslationOption &topt);
-
- virtual int Compare(const FFState& o) const;
- virtual LRState* Expand(const TranslationOption& hypo,
- const InputType& input,
- ScoreComponentCollection* scores) const;
- };
+class LRState;
+class LexicalReordering;
+class SparseReordering;
+
+//! Factory class for lexical reordering states
+class LRModel
+{
+public:
+ typedef int ReorderingType;
+ friend class LexicalReordering;
+ enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
+ enum Direction { Forward, Backward, Bidirectional };
+ enum Condition { F, E, FE };
+
+ // constants for the different types of reordering
+ // (correspond to indices in the respective table)
+ static const ReorderingType M = 0; // monotonic
+ static const ReorderingType NM = 1; // non-monotonic
+ static const ReorderingType S = 1; // swap
+ static const ReorderingType D = 2; // discontinuous
+ static const ReorderingType DL = 2; // discontinuous, left
+ static const ReorderingType DR = 3; // discontinuous, right
+ static const ReorderingType R = 0; // right
+ static const ReorderingType L = 1; // left
+ static const ReorderingType MAX = 3; // largest possible
+
+ // determine orientation, depending on model:
+
+
+ ReorderingType // for first phrase in phrase-based
+ GetOrientation(WordsRange const& cur) const;
+
+ ReorderingType // for non-first phrases in phrase-based
+ GetOrientation(WordsRange const& prev, WordsRange const& cur) const;
+
+ ReorderingType // for HReorderingForwardState
+ GetOrientation(WordsRange const& prev, WordsRange const& cur,
+ WordsBitmap const& cov) const;
+
+ ReorderingType // for HReorderingBackwarddState
+ GetOrientation(int const reoDistance) const;
+
+ LRModel(const std::string &modelType);
+
+ void
+ ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
+ const LexicalReordering* producer);
+
+ LRState*
+ CreateLRState(const InputType &input) const;
+
+ size_t GetNumberOfTypes() const;
+ size_t GetNumScoreComponents() const;
+ void SetAdditionalScoreComponents(size_t number);
+
+ LexicalReordering*
+ GetScoreProducer() const {
+ return m_scoreProducer;
+ }
+
+ ModelType GetModelType() const {
+ return m_modelType;
+ }
+ Direction GetDirection() const {
+ return m_direction;
+ }
+ Condition GetCondition() const {
+ return m_condition;
+ }
+
+ bool
+ IsPhraseBased() const {
+ return m_phraseBased;
+ }
+
+ bool
+ CollapseScores() const {
+ return m_collapseScores;
+ }
+
+ SparseReordering const*
+ GetSparseReordering() const {
+ return m_sparse.get();
+ }
+
+private:
+ void
+ SetScoreProducer(LexicalReordering* scoreProducer) {
+ m_scoreProducer = scoreProducer;
+ }
+
+ std::string const&
+ GetModelString() const {
+ return m_modelString;
+ }
+
+ std::string m_modelString;
+ LexicalReordering *m_scoreProducer;
+ ModelType m_modelType;
+ bool m_phraseBased;
+ bool m_collapseScores;
+ Direction m_direction;
+ Condition m_condition;
+ size_t m_additionalScoreComponents;
+ boost::scoped_ptr<SparseReordering> m_sparse;
+};
+
+//! Abstract class for lexical reordering model states
+class LRState : public FFState
+{
+public:
+
+ typedef int ReorderingType;
+
+ virtual
+ int
+ Compare(const FFState& o) const = 0;
+
+ virtual
+ LRState*
+ Expand(const TranslationOption& hypo, const InputType& input,
+ ScoreComponentCollection* scores) const = 0;
+
+ static
+ LRState*
+ CreateLRState(const std::vector<std::string>& config,
+ LRModel::Direction dir,
+ const InputType &input);
+
+protected:
+
+ const LRModel& m_configuration;
+
+ // The following is the true direction of the object, which can be
+ // Backward or Forward even if the Configuration has Bidirectional.
+ LRModel::Direction m_direction;
+ size_t m_offset;
+ //forward scores are conditioned on prev option, so need to remember it
+ const TranslationOption *m_prevOption;
+
+ inline
+ LRState(const LRState *prev,
+ const TranslationOption &topt)
+ : m_configuration(prev->m_configuration)
+ , m_direction(prev->m_direction)
+ , m_offset(prev->m_offset)
+ , m_prevOption(&topt)
+ { }
+
+ inline
+ LRState(const LRModel &config,
+ LRModel::Direction dir,
+ size_t offset)
+ : m_configuration(config)
+ , m_direction(dir)
+ , m_offset(offset)
+ , m_prevOption(NULL)
+ { }
+
+ // copy the right scores in the right places, taking into account
+ // forward/backward, offset, collapse
+ void
+ CopyScores(ScoreComponentCollection* scores,
+ const TranslationOption& topt,
+ const InputType& input, ReorderingType reoType) const;
+
+ int
+ ComparePrevScores(const TranslationOption *other) const;
+};
+
+//! @todo what is this?
+class BidirectionalReorderingState
+ : public LRState
+{
+private:
+ const LRState *m_backward;
+ const LRState *m_forward;
+public:
+ BidirectionalReorderingState(const LRModel &config,
+ const LRState *bw,
+ const LRState *fw, size_t offset)
+ : LRState(config,
+ LRModel::Bidirectional,
+ offset)
+ , m_backward(bw)
+ , m_forward(fw)
+ { }
+
+ ~BidirectionalReorderingState() {
+ delete m_backward;
+ delete m_forward;
+ }
+
+ virtual
+ int
+ Compare(const FFState& o) const;
+
+ virtual
+ LRState*
+ Expand(const TranslationOption& topt, const InputType& input,
+ ScoreComponentCollection* scores) const;
+};
+
+//! State for the standard Moses implementation of lexical reordering models
+//! (see Koehn et al, Edinburgh System Description for the 2005 NIST MT
+//! Evaluation)
+class PhraseBasedReorderingState
+ : public LRState
+{
+private:
+ WordsRange m_prevRange;
+ bool m_first;
+public:
+ static bool m_useFirstBackwardScore;
+ PhraseBasedReorderingState(const LRModel &config,
+ LRModel::Direction dir,
+ size_t offset);
+ PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
+ const TranslationOption &topt);
+
+ virtual
+ int
+ Compare(const FFState& o) const;
+
+ virtual
+ LRState*
+ Expand(const TranslationOption& topt,const InputType& input,
+ ScoreComponentCollection* scores) const;
+
+ ReorderingType GetOrientationTypeMSD(WordsRange currRange) const;
+ ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const;
+ ReorderingType GetOrientationTypeMonotonic(WordsRange currRange) const;
+ ReorderingType GetOrientationTypeLeftRight(WordsRange currRange) const;
+};
+
+//! State for a hierarchical reordering model (see Galley and Manning, A
+//! Simple and Effective Hierarchical Phrase Reordering Model, EMNLP 2008)
+//! backward state (conditioned on the previous phrase)
+class HReorderingBackwardState : public LRState
+{
+private:
+ ReorderingStack m_reoStack;
+public:
+ HReorderingBackwardState(const LRModel &config, size_t offset);
+ HReorderingBackwardState(const HReorderingBackwardState *prev,
+ const TranslationOption &topt,
+ ReorderingStack reoStack);
+
+ virtual int Compare(const FFState& o) const;
+ virtual LRState* Expand(const TranslationOption& hypo, const InputType& input,
+ ScoreComponentCollection* scores) const;
+
+private:
+ ReorderingType GetOrientationTypeMSD(int reoDistance) const;
+ ReorderingType GetOrientationTypeMSLR(int reoDistance) const;
+ ReorderingType GetOrientationTypeMonotonic(int reoDistance) const;
+ ReorderingType GetOrientationTypeLeftRight(int reoDistance) const;
+};
+
+
+//!forward state (conditioned on the next phrase)
+class HReorderingForwardState : public LRState
+{
+private:
+ bool m_first;
+ WordsRange m_prevRange;
+ WordsBitmap m_coverage;
+
+public:
+ HReorderingForwardState(const LRModel &config, size_t sentenceLength,
+ size_t offset);
+ HReorderingForwardState(const HReorderingForwardState *prev,
+ const TranslationOption &topt);
+
+ virtual int Compare(const FFState& o) const;
+ virtual LRState* Expand(const TranslationOption& hypo,
+ const InputType& input,
+ ScoreComponentCollection* scores) const;
+};
}
diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
index ec79163a7..387874c34 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
@@ -15,597 +15,577 @@
namespace Moses
{
- //cleans str of leading and tailing spaces
- std::string auxClearString(const std::string& str)
- {
- int i = 0, j = str.size()-1;
- while(i <= j) {
- if(' ' != str[i]) {
- break;
- } else {
- ++i;
- }
+//cleans str of leading and tailing spaces
+std::string auxClearString(const std::string& str)
+{
+ int i = 0, j = str.size()-1;
+ while(i <= j) {
+ if(' ' != str[i]) {
+ break;
+ } else {
+ ++i;
}
- while(j >= i) {
- if(' ' != str[j]) {
- break;
- } else {
- --j;
- }
+ }
+ while(j >= i) {
+ if(' ' != str[j]) {
+ break;
+ } else {
+ --j;
}
- return str.substr(i,j-i+1);
}
+ return str.substr(i,j-i+1);
+}
- void auxAppend(IPhrase& head, const IPhrase& tail)
- {
- head.reserve(head.size()+tail.size());
- for(size_t i = 0; i < tail.size(); ++i) {
- head.push_back(tail[i]);
- }
+void auxAppend(IPhrase& head, const IPhrase& tail)
+{
+ head.reserve(head.size()+tail.size());
+ for(size_t i = 0; i < tail.size(); ++i) {
+ head.push_back(tail[i]);
}
+}
- LexicalReorderingTable*
- LexicalReorderingTable::
- LoadAvailable(const std::string& filePath,
- const FactorList& f_factors,
- const FactorList& e_factors,
- const FactorList& c_factors)
- {
- //decide use Compact or Tree or Memory table
+LexicalReorderingTable*
+LexicalReorderingTable::
+LoadAvailable(const std::string& filePath,
+ const FactorList& f_factors,
+ const FactorList& e_factors,
+ const FactorList& c_factors)
+{
+ //decide use Compact or Tree or Memory table
#ifdef HAVE_CMPH
- LexicalReorderingTable *compactLexr = NULL;
- compactLexr = LexicalReorderingTableCompact::CheckAndLoad(filePath + ".minlexr", f_factors, e_factors, c_factors);
- if(compactLexr)
- return compactLexr;
+ LexicalReorderingTable *compactLexr = NULL;
+ compactLexr = LexicalReorderingTableCompact::CheckAndLoad(filePath + ".minlexr", f_factors, e_factors, c_factors);
+ if(compactLexr)
+ return compactLexr;
#endif
- LexicalReorderingTable* ret;
- if (FileExists(filePath+".binlexr.idx") )
- ret = new LexicalReorderingTableTree(filePath, f_factors,
- e_factors, c_factors);
- else
- ret = new LexicalReorderingTableMemory(filePath, f_factors,
- e_factors, c_factors);
- return ret;
- }
+ LexicalReorderingTable* ret;
+ if (FileExists(filePath+".binlexr.idx") )
+ ret = new LexicalReorderingTableTree(filePath, f_factors,
+ e_factors, c_factors);
+ else
+ ret = new LexicalReorderingTableMemory(filePath, f_factors,
+ e_factors, c_factors);
+ return ret;
+}
- LexicalReorderingTableMemory::
- LexicalReorderingTableMemory(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors)
- : LexicalReorderingTable(f_factors, e_factors, c_factors)
- {
- LoadFromFile(filePath);
- }
-
- LexicalReorderingTableMemory::
- ~LexicalReorderingTableMemory() { }
-
- std::vector<float>
- LexicalReorderingTableMemory::GetScore(const Phrase& f,
- const Phrase& e,
- const Phrase& c)
- {
- //rather complicated because of const can't use []... as [] might enter new things into std::map
- //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large
- TableType::const_iterator r;
- std::string key;
- if(0 == c.GetSize()) {
- key = MakeKey(f,e,c);
+LexicalReorderingTableMemory::
+LexicalReorderingTableMemory(const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+ : LexicalReorderingTable(f_factors, e_factors, c_factors)
+{
+ LoadFromFile(filePath);
+}
+
+LexicalReorderingTableMemory::
+~LexicalReorderingTableMemory() { }
+
+std::vector<float>
+LexicalReorderingTableMemory::GetScore(const Phrase& f,
+ const Phrase& e,
+ const Phrase& c)
+{
+ //rather complicated because of const can't use []... as [] might enter new things into std::map
+ //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large
+ TableType::const_iterator r;
+ std::string key;
+ if(0 == c.GetSize()) {
+ key = MakeKey(f,e,c);
+ r = m_Table.find(key);
+ if(m_Table.end() != r) {
+ return r->second;
+ }
+ } else {
+ //right try from large to smaller context
+ for(size_t i = 0; i <= c.GetSize(); ++i) {
+ Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
+ key = MakeKey(f,e,sub_c);
r = m_Table.find(key);
if(m_Table.end() != r) {
- return r->second;
- }
- } else {
- //right try from large to smaller context
- for(size_t i = 0; i <= c.GetSize(); ++i) {
- Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
- key = MakeKey(f,e,sub_c);
- r = m_Table.find(key);
- if(m_Table.end() != r) {
- return r->second;
- }
+ return r->second;
}
}
- return Scores();
}
+ return Scores();
+}
- void
- LexicalReorderingTableMemory::
- DbgDump(std::ostream* out) const
- {
- TableType::const_iterator i;
- for(i = m_Table.begin(); i != m_Table.end(); ++i)
- {
- *out << " key: '" << i->first << "' score: ";
- *out << "(num scores: " << (i->second).size() << ")";
- for(size_t j = 0; j < (i->second).size(); ++j)
- *out << (i->second)[j] << " ";
-
- *out << "\n";
- }
- };
-
- std::string
- LexicalReorderingTableMemory::MakeKey(const Phrase& f,
- const Phrase& e,
- const Phrase& c) const
- {
- return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)),
- auxClearString(e.GetStringRep(m_FactorsE)),
- auxClearString(c.GetStringRep(m_FactorsC)));
+void
+LexicalReorderingTableMemory::
+DbgDump(std::ostream* out) const
+{
+ TableType::const_iterator i;
+ for(i = m_Table.begin(); i != m_Table.end(); ++i) {
+ *out << " key: '" << i->first << "' score: ";
+ *out << "(num scores: " << (i->second).size() << ")";
+ for(size_t j = 0; j < (i->second).size(); ++j)
+ *out << (i->second)[j] << " ";
+
+ *out << "\n";
}
+};
+
+std::string
+LexicalReorderingTableMemory::MakeKey(const Phrase& f,
+ const Phrase& e,
+ const Phrase& c) const
+{
+ return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)),
+ auxClearString(e.GetStringRep(m_FactorsE)),
+ auxClearString(c.GetStringRep(m_FactorsC)));
+}
- std::string
- LexicalReorderingTableMemory::MakeKey(const std::string& f,
- const std::string& e,
- const std::string& c) const
- {
- std::string key;
- if(!f.empty()) key += f;
- if(!m_FactorsE.empty()) { if(!key.empty()) { key += "|||"; } key += e; }
- if(!m_FactorsC.empty()) { if(!key.empty()) { key += "|||"; } key += c; }
- return key;
+std::string
+LexicalReorderingTableMemory::MakeKey(const std::string& f,
+ const std::string& e,
+ const std::string& c) const
+{
+ std::string key;
+ if(!f.empty()) key += f;
+ if(!m_FactorsE.empty()) {
+ if(!key.empty()) {
+ key += "|||";
+ }
+ key += e;
+ }
+ if(!m_FactorsC.empty()) {
+ if(!key.empty()) {
+ key += "|||";
+ }
+ key += c;
+ }
+ return key;
+}
+
+void
+LexicalReorderingTableMemory::
+LoadFromFile(const std::string& filePath)
+{
+ std::string fileName = filePath;
+ if(!FileExists(fileName) && FileExists(fileName+".gz"))
+ fileName += ".gz";
+
+ InputFileStream file(fileName);
+ std::string line(""), key("");
+ int numScores = -1;
+ std::cerr << "Loading table into memory...";
+ while(!getline(file, line).eof()) {
+ std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
+ int t = 0 ;
+ std::string f(""),e(""),c("");
+
+ if(!m_FactorsF.empty()) {
+ //there should be something for f
+ f = auxClearString(tokens.at(t));
+ ++t;
+ }
+ if(!m_FactorsE.empty()) {
+ //there should be something for e
+ e = auxClearString(tokens.at(t));
+ ++t;
+ }
+ if(!m_FactorsC.empty()) {
+ //there should be something for c
+ c = auxClearString(tokens.at(t));
+ ++t;
+ }
+ //last token are the probs
+ std::vector<float> p = Scan<float>(Tokenize(tokens.at(t)));
+ //sanity check: all lines must have equall number of probs
+ if(-1 == numScores) {
+ numScores = (int)p.size(); //set in first line
+ }
+ if((int)p.size() != numScores) {
+ TRACE_ERR( "found inconsistent number of probabilities... found "
+ << p.size() << " expected " << numScores << std::endl);
+ exit(0);
+ }
+ std::transform(p.begin(),p.end(),p.begin(),TransformScore);
+ std::transform(p.begin(),p.end(),p.begin(),FloorScore);
+ //save it all into our map
+ m_Table[MakeKey(f,e,c)] = p;
+ }
+ std::cerr << "done.\n";
+}
+
+LexicalReorderingTableTree::
+LexicalReorderingTableTree(const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+ : LexicalReorderingTable(f_factors, e_factors, c_factors)
+ , m_UseCache(false)
+ , m_FilePath(filePath)
+{
+ m_Table.reset(new PrefixTreeMap());
+ m_Table->Read(m_FilePath+".binlexr");
+}
+
+LexicalReorderingTableTree::
+~LexicalReorderingTableTree()
+{ }
+
+Scores
+LexicalReorderingTableTree::
+GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
+{
+ if((!m_FactorsF.empty() && 0 == f.GetSize())
+ || (!m_FactorsE.empty() && 0 == e.GetSize())) {
+ //NOTE: no check for c as c might be empty, e.g. start of sentence
+ //not a proper key
+ // phi: commented out, since e may be empty (drop-unknown)
+ //std::cerr << "Not a proper key!\n";
+ return Scores();
}
- void
- LexicalReorderingTableMemory::
- LoadFromFile(const std::string& filePath)
- {
- std::string fileName = filePath;
- if(!FileExists(fileName) && FileExists(fileName+".gz"))
- fileName += ".gz";
-
- InputFileStream file(fileName);
- std::string line(""), key("");
- int numScores = -1;
- std::cerr << "Loading table into memory...";
- while(!getline(file, line).eof())
- {
- std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
- int t = 0 ;
- std::string f(""),e(""),c("");
-
- if(!m_FactorsF.empty()) {
- //there should be something for f
- f = auxClearString(tokens.at(t));
- ++t;
- }
- if(!m_FactorsE.empty()) {
- //there should be something for e
- e = auxClearString(tokens.at(t));
- ++t;
- }
- if(!m_FactorsC.empty()) {
- //there should be something for c
- c = auxClearString(tokens.at(t));
- ++t;
- }
- //last token are the probs
- std::vector<float> p = Scan<float>(Tokenize(tokens.at(t)));
- //sanity check: all lines must have equall number of probs
- if(-1 == numScores) {
- numScores = (int)p.size(); //set in first line
- }
- if((int)p.size() != numScores) {
- TRACE_ERR( "found inconsistent number of probabilities... found "
- << p.size() << " expected " << numScores << std::endl);
- exit(0);
- }
- std::transform(p.begin(),p.end(),p.begin(),TransformScore);
- std::transform(p.begin(),p.end(),p.begin(),FloorScore);
- //save it all into our map
- m_Table[MakeKey(f,e,c)] = p;
+ CacheType::iterator i;
+
+ if(m_UseCache) {
+ std::pair<CacheType::iterator, bool> r;
+ r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates()));
+ if(!r.second) return auxFindScoreForContext((r.first)->second, c);
+ i = r.first;
+ } else if((i = m_Cache.find(MakeCacheKey(f,e))) != m_Cache.end())
+ // although we might not be caching now, cache might be none empty!
+ return auxFindScoreForContext(i->second, c);
+
+ // not in cache => go to file...
+ Candidates cands;
+ m_Table->GetCandidates(MakeTableKey(f,e), &cands);
+ if(cands.empty()) return Scores();
+ if(m_UseCache) i->second = cands;
+
+ if(m_FactorsC.empty()) {
+ UTIL_THROW_IF2(1 != cands.size(), "Error");
+ return cands[0].GetScore(0);
+ } else return auxFindScoreForContext(cands, c);
+};
+
+Scores
+LexicalReorderingTableTree::
+auxFindScoreForContext(const Candidates& cands, const Phrase& context)
+{
+ if(m_FactorsC.empty()) {
+ UTIL_THROW_IF2(cands.size() > 1, "Error");
+ return (cands.size() == 1) ? cands[0].GetScore(0) : Scores();
+ } else {
+ std::vector<std::string> cvec;
+ for(size_t i = 0; i < context.GetSize(); ++i)
+ cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
+
+ IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
+ IPhrase sub_c;
+ IPhrase::iterator start = c.begin();
+ for(size_t j = 0; j <= context.GetSize(); ++j, ++start) {
+ sub_c.assign(start, c.end());
+ for(size_t cand = 0; cand < cands.size(); ++cand) {
+ IPhrase p = cands[cand].GetPhrase(0);
+ if(cands[cand].GetPhrase(0) == sub_c)
+ return cands[cand].GetScore(0);
}
- std::cerr << "done.\n";
+ }
+ return Scores();
}
-
- LexicalReorderingTableTree::
- LexicalReorderingTableTree(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors)
- : LexicalReorderingTable(f_factors, e_factors, c_factors)
- , m_UseCache(false)
- , m_FilePath(filePath)
- {
+}
+
+void
+LexicalReorderingTableTree::
+InitializeForInput(const InputType& input)
+{
+ ClearCache();
+ if(ConfusionNet const* cn = dynamic_cast<ConfusionNet const*>(&input)) {
+ Cache(*cn);
+ } else if (dynamic_cast<Sentence const*>(&input)) {
+ // Cache(*s); ... this just takes up too much memory, we cache elsewhere
+ DisableCache();
+ }
+ if (!m_Table.get()) {
+ //load thread specific table.
m_Table.reset(new PrefixTreeMap());
m_Table->Read(m_FilePath+".binlexr");
}
-
- LexicalReorderingTableTree::
- ~LexicalReorderingTableTree()
- { }
-
- Scores
- LexicalReorderingTableTree::
- GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
- {
- if((!m_FactorsF.empty() && 0 == f.GetSize())
- || (!m_FactorsE.empty() && 0 == e.GetSize()))
- {
- //NOTE: no check for c as c might be empty, e.g. start of sentence
- //not a proper key
- // phi: commented out, since e may be empty (drop-unknown)
- //std::cerr << "Not a proper key!\n";
- return Scores();
+};
+
+bool
+LexicalReorderingTableTree::
+Create(std::istream& inFile, const std::string& outFileName)
+{
+ typedef PrefixTreeSA<LabelId,OFF_T> PSA;
+
+ std::string
+ line,
+ ofn(outFileName+".binlexr.srctree"),
+ oft(outFileName+".binlexr.tgtdata"),
+ ofi(outFileName+".binlexr.idx"),
+ ofsv(outFileName+".binlexr.voc0"),
+ oftv(outFileName+".binlexr.voc1");
+
+ FILE *os = fOpen(ofn.c_str(),"wb");
+ FILE *ot = fOpen(oft.c_str(),"wb");
+
+ PSA *psa = new PSA;
+ PSA::setDefault(InvalidOffT);
+ WordVoc* voc[3];
+
+ LabelId currFirstWord = InvalidLabelId;
+ IPhrase currKey;
+
+ Candidates cands;
+ std::vector<OFF_T> vo;
+ size_t lnc = 0;
+ size_t numTokens = 0;
+ size_t numKeyTokens = 0;
+ while(getline(inFile, line)) {
+ ++lnc;
+ if(0 == lnc % 10000) TRACE_ERR(".");
+ IPhrase key;
+ Scores score;
+
+ std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
+ std::string w;
+ if(1 == lnc) {
+ //do some init stuff in the first line
+ numTokens = tokens.size();
+ if(tokens.size() == 2) {
+ // f ||| score
+ numKeyTokens = 1;
+ voc[0] = new WordVoc();
+ voc[1] = 0;
+ } else if(3 == tokens.size() || 4 == tokens.size()) {
+ //either f ||| e ||| score or f ||| e ||| c ||| score
+ numKeyTokens = 2;
+ voc[0] = new WordVoc(); //f voc
+ voc[1] = new WordVoc(); //e voc
+ voc[2] = voc[1]; //c & e share voc
}
+ } else {
+ //sanity check ALL lines must have same number of tokens
+ UTIL_THROW_IF2(numTokens != tokens.size(),
+ "Lines do not have the same number of tokens");
+ }
+ size_t phrase = 0;
+ for(; phrase < numKeyTokens; ++phrase) {
+ //conditioned on more than just f... need |||
+ if(phrase >=1) key.push_back(PrefixTreeMap::MagicWord);
+ std::istringstream is(tokens[phrase]);
+ while(is >> w) key.push_back(voc[phrase]->add(w));
+ }
- CacheType::iterator i;
+ //collect all non key phrases, i.e. c
+ std::vector<IPhrase> tgt_phrases;
+ tgt_phrases.resize(numTokens - numKeyTokens - 1);
+ for(size_t j = 0; j < tgt_phrases.size(); ++j, ++phrase) {
+ std::istringstream is(tokens[numKeyTokens + j]);
+ while(is >> w) tgt_phrases[j].push_back(voc[phrase]->add(w));
+ }
- if(m_UseCache)
- {
- std::pair<CacheType::iterator, bool> r;
- r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates()));
- if(!r.second) return auxFindScoreForContext((r.first)->second, c);
- i = r.first;
- }
- else if((i = m_Cache.find(MakeCacheKey(f,e))) != m_Cache.end())
- // although we might not be caching now, cache might be none empty!
- return auxFindScoreForContext(i->second, c);
+ //last token is score
+ std::istringstream is(tokens[numTokens-1]);
+ while(is >> w) score.push_back(atof(w.c_str()));
- // not in cache => go to file...
- Candidates cands;
- m_Table->GetCandidates(MakeTableKey(f,e), &cands);
- if(cands.empty()) return Scores();
- if(m_UseCache) i->second = cands;
-
- if(m_FactorsC.empty())
- {
- UTIL_THROW_IF2(1 != cands.size(), "Error");
- return cands[0].GetScore(0);
- }
- else return auxFindScoreForContext(cands, c);
- };
-
- Scores
- LexicalReorderingTableTree::
- auxFindScoreForContext(const Candidates& cands, const Phrase& context)
- {
- if(m_FactorsC.empty())
- {
- UTIL_THROW_IF2(cands.size() > 1, "Error");
- return (cands.size() == 1) ? cands[0].GetScore(0) : Scores();
- }
- else
- {
- std::vector<std::string> cvec;
- for(size_t i = 0; i < context.GetSize(); ++i)
- cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
-
- IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
- IPhrase sub_c;
- IPhrase::iterator start = c.begin();
- for(size_t j = 0; j <= context.GetSize(); ++j, ++start)
- {
- sub_c.assign(start, c.end());
- for(size_t cand = 0; cand < cands.size(); ++cand)
- {
- IPhrase p = cands[cand].GetPhrase(0);
- if(cands[cand].GetPhrase(0) == sub_c)
- return cands[cand].GetScore(0);
- }
- }
- return Scores();
- }
- }
+ //transform score now...
+ std::transform(score.begin(),score.end(),score.begin(),TransformScore);
+ std::transform(score.begin(),score.end(),score.begin(),FloorScore);
+ std::vector<Scores> scores;
+ scores.push_back(score);
- void
- LexicalReorderingTableTree::
- InitializeForInput(const InputType& input)
- {
- ClearCache();
- if(ConfusionNet const* cn = dynamic_cast<ConfusionNet const*>(&input))
- {
- Cache(*cn);
- }
- else if (dynamic_cast<Sentence const*>(&input))
- {
- // Cache(*s); ... this just takes up too much memory, we cache elsewhere
- DisableCache();
- }
- if (!m_Table.get())
- {
- //load thread specific table.
- m_Table.reset(new PrefixTreeMap());
- m_Table->Read(m_FilePath+".binlexr");
+ if(key.empty()) {
+ TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");
+ continue;
+ }
+
+ //first time inits
+ if(currFirstWord == InvalidLabelId) currFirstWord = key[0];
+ if(currKey.empty()) {
+ currKey = key;
+ //insert key into tree
+ UTIL_THROW_IF2(psa == NULL, "Object not yet created");
+ PSA::Data& d = psa->insert(key);
+ if(d == InvalidOffT) d = fTell(ot);
+ else {
+ TRACE_ERR("ERROR: source phrase already inserted (A)!\nline("
+ << lnc << "): '" << line << "\n");
+ return false;
}
- };
-
- bool
- LexicalReorderingTableTree::
- Create(std::istream& inFile, const std::string& outFileName)
- {
- typedef PrefixTreeSA<LabelId,OFF_T> PSA;
-
- std::string
- line,
- ofn(outFileName+".binlexr.srctree"),
- oft(outFileName+".binlexr.tgtdata"),
- ofi(outFileName+".binlexr.idx"),
- ofsv(outFileName+".binlexr.voc0"),
- oftv(outFileName+".binlexr.voc1");
-
- FILE *os = fOpen(ofn.c_str(),"wb");
- FILE *ot = fOpen(oft.c_str(),"wb");
-
- PSA *psa = new PSA;
- PSA::setDefault(InvalidOffT);
- WordVoc* voc[3];
-
- LabelId currFirstWord = InvalidLabelId;
- IPhrase currKey;
-
- Candidates cands;
- std::vector<OFF_T> vo;
- size_t lnc = 0;
- size_t numTokens = 0;
- size_t numKeyTokens = 0;
- while(getline(inFile, line))
- {
- ++lnc;
- if(0 == lnc % 10000) TRACE_ERR(".");
- IPhrase key;
- Scores score;
-
- std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||");
- std::string w;
- if(1 == lnc)
- {
- //do some init stuff in the first line
- numTokens = tokens.size();
- if(tokens.size() == 2)
- { // f ||| score
- numKeyTokens = 1;
- voc[0] = new WordVoc();
- voc[1] = 0;
- }
- else if(3 == tokens.size() || 4 == tokens.size())
- { //either f ||| e ||| score or f ||| e ||| c ||| score
- numKeyTokens = 2;
- voc[0] = new WordVoc(); //f voc
- voc[1] = new WordVoc(); //e voc
- voc[2] = voc[1]; //c & e share voc
- }
- }
- else
- {
- //sanity check ALL lines must have same number of tokens
- UTIL_THROW_IF2(numTokens != tokens.size(),
- "Lines do not have the same number of tokens");
- }
- size_t phrase = 0;
- for(; phrase < numKeyTokens; ++phrase)
- {
- //conditioned on more than just f... need |||
- if(phrase >=1) key.push_back(PrefixTreeMap::MagicWord);
- std::istringstream is(tokens[phrase]);
- while(is >> w) key.push_back(voc[phrase]->add(w));
- }
-
- //collect all non key phrases, i.e. c
- std::vector<IPhrase> tgt_phrases;
- tgt_phrases.resize(numTokens - numKeyTokens - 1);
- for(size_t j = 0; j < tgt_phrases.size(); ++j, ++phrase)
- {
- std::istringstream is(tokens[numKeyTokens + j]);
- while(is >> w) tgt_phrases[j].push_back(voc[phrase]->add(w));
- }
-
- //last token is score
- std::istringstream is(tokens[numTokens-1]);
- while(is >> w) score.push_back(atof(w.c_str()));
-
- //transform score now...
- std::transform(score.begin(),score.end(),score.begin(),TransformScore);
- std::transform(score.begin(),score.end(),score.begin(),FloorScore);
- std::vector<Scores> scores;
- scores.push_back(score);
-
- if(key.empty()) {
- TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n");
- continue;
- }
-
- //first time inits
- if(currFirstWord == InvalidLabelId) currFirstWord = key[0];
- if(currKey.empty())
- {
- currKey = key;
- //insert key into tree
- UTIL_THROW_IF2(psa == NULL, "Object not yet created");
- PSA::Data& d = psa->insert(key);
- if(d == InvalidOffT) d = fTell(ot);
- else
- {
- TRACE_ERR("ERROR: source phrase already inserted (A)!\nline("
- << lnc << "): '" << line << "\n");
- return false;
- }
- }
-
- if(currKey != key) {
- //ok new key
- currKey = key;
- //a) write cands for old key
- cands.writeBin(ot);
- cands.clear();
- //b) check if we need to move on to new tree root
- if(key[0] != currFirstWord) {
- // write key prefix tree to file and clear
- PTF pf;
- if(currFirstWord >= vo.size())
- vo.resize(currFirstWord+1,InvalidOffT);
- vo[currFirstWord] = fTell(os);
- pf.create(*psa, os);
- delete psa;
- psa = new PSA;
- currFirstWord = key[0];
- }
-
- // c) insert key into tree
- UTIL_THROW_IF2(psa == NULL, "Object not yet created");
- PSA::Data& d = psa->insert(key);
- if(d == InvalidOffT) d = fTell(ot);
- else
- {
- TRACE_ERR("ERROR: source phrase already inserted (A)!\nline("
- << lnc << "): '" << line << "\n");
- return false;
- }
- }
- cands.push_back(GenericCandidate(tgt_phrases, scores));
+ }
+
+ if(currKey != key) {
+ //ok new key
+ currKey = key;
+ //a) write cands for old key
+ cands.writeBin(ot);
+ cands.clear();
+ //b) check if we need to move on to new tree root
+ if(key[0] != currFirstWord) {
+ // write key prefix tree to file and clear
+ PTF pf;
+ if(currFirstWord >= vo.size())
+ vo.resize(currFirstWord+1,InvalidOffT);
+ vo[currFirstWord] = fTell(os);
+ pf.create(*psa, os);
+ delete psa;
+ psa = new PSA;
+ currFirstWord = key[0];
}
- if (lnc == 0)
- {
- TRACE_ERR("ERROR: empty lexicalised reordering file\n" << std::endl);
- return false;
+
+ // c) insert key into tree
+ UTIL_THROW_IF2(psa == NULL, "Object not yet created");
+ PSA::Data& d = psa->insert(key);
+ if(d == InvalidOffT) d = fTell(ot);
+ else {
+ TRACE_ERR("ERROR: source phrase already inserted (A)!\nline("
+ << lnc << "): '" << line << "\n");
+ return false;
}
- cands.writeBin(ot);
- cands.clear();
-
- PTF pf;
- if(currFirstWord >= vo.size())
- vo.resize(currFirstWord+1,InvalidOffT);
- vo[currFirstWord] = fTell(os);
- pf.create(*psa,os);
- delete psa;
- psa=0;
-
- fClose(os);
- fClose(ot);
- FILE *oi = fOpen(ofi.c_str(),"wb");
- fWriteVector(oi,vo);
- fClose(oi);
-
- if(voc[0]) { voc[0]->Write(ofsv); delete voc[0]; }
- if(voc[1]) { voc[1]->Write(oftv); delete voc[1]; }
- return true;
+ }
+ cands.push_back(GenericCandidate(tgt_phrases, scores));
}
-
- std::string
- LexicalReorderingTableTree::
- MakeCacheKey(const Phrase& f, const Phrase& e) const
- {
- std::string key;
- if(!m_FactorsF.empty())
- key += auxClearString(f.GetStringRep(m_FactorsF));
-
- if(!m_FactorsE.empty()) {
- if(!key.empty()) { key += "|||"; }
- key += auxClearString(e.GetStringRep(m_FactorsE));
+ if (lnc == 0) {
+ TRACE_ERR("ERROR: empty lexicalised reordering file\n" << std::endl);
+ return false;
+ }
+ cands.writeBin(ot);
+ cands.clear();
+
+ PTF pf;
+ if(currFirstWord >= vo.size())
+ vo.resize(currFirstWord+1,InvalidOffT);
+ vo[currFirstWord] = fTell(os);
+ pf.create(*psa,os);
+ delete psa;
+ psa=0;
+
+ fClose(os);
+ fClose(ot);
+ FILE *oi = fOpen(ofi.c_str(),"wb");
+ fWriteVector(oi,vo);
+ fClose(oi);
+
+ if(voc[0]) {
+ voc[0]->Write(ofsv);
+ delete voc[0];
+ }
+ if(voc[1]) {
+ voc[1]->Write(oftv);
+ delete voc[1];
+ }
+ return true;
+}
+
+std::string
+LexicalReorderingTableTree::
+MakeCacheKey(const Phrase& f, const Phrase& e) const
+{
+ std::string key;
+ if(!m_FactorsF.empty())
+ key += auxClearString(f.GetStringRep(m_FactorsF));
+
+ if(!m_FactorsE.empty()) {
+ if(!key.empty()) {
+ key += "|||";
}
- return key;
- };
+ key += auxClearString(e.GetStringRep(m_FactorsE));
+ }
+ return key;
+};
- IPhrase
- LexicalReorderingTableTree::
- MakeTableKey(const Phrase& f, const Phrase& e) const
- {
- IPhrase key;
- std::vector<std::string> keyPart;
- if(!m_FactorsF.empty())
- {
- for(size_t i = 0; i < f.GetSize(); ++i)
- keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
- auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId));
- keyPart.clear();
- }
- if(!m_FactorsE.empty())
- {
- if(!key.empty()) key.push_back(PrefixTreeMap::MagicWord);
- for(size_t i = 0; i < e.GetSize(); ++i)
- keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
- auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId));
- }
- return key;
- };
-
-
- struct State
- {
- State(PPimp* t, const std::string& p)
- : pos(t), path(p) { }
-
- PPimp* pos;
- std::string path;
- };
-
- void
- LexicalReorderingTableTree::
- auxCacheForSrcPhrase(const Phrase& f)
- {
- if(m_FactorsE.empty())
- {
- //f is all of key...
- Candidates cands;
- m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands);
- m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands;
- }
- else
- {
- ObjectPool<PPimp> pool;
- PPimp* pPos = m_Table->GetRoot();
-
- // 1) goto subtree for f
- for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i)
- pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
-
- if(pPos && pPos->isValid())
- pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord);
-
- if(!pPos || !pPos->isValid())
- return;
-
- //2) explore whole subtree depth first & cache
- std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||";
-
- std::vector<State> stack;
- stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),""));
- Candidates cands;
- while(!stack.empty())
- {
- if(stack.back().pos->isValid())
- {
- LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
- std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId);
- //cache this
- m_Table->GetCandidates(*stack.back().pos,&cands);
- if(!cands.empty()) m_Cache[cache_key + auxClearString(next_path)] = cands;
- cands.clear();
- PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0));
- ++stack.back().pos->idx;
- stack.push_back(State(next_pos,next_path));
- }
- else stack.pop_back();
- }
- }
+IPhrase
+LexicalReorderingTableTree::
+MakeTableKey(const Phrase& f, const Phrase& e) const
+{
+ IPhrase key;
+ std::vector<std::string> keyPart;
+ if(!m_FactorsF.empty()) {
+ for(size_t i = 0; i < f.GetSize(); ++i)
+ keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
+ auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId));
+ keyPart.clear();
}
-
- void
- LexicalReorderingTableTree::
- Cache(const ConfusionNet& /*input*/)
- {
- return;
+ if(!m_FactorsE.empty()) {
+ if(!key.empty()) key.push_back(PrefixTreeMap::MagicWord);
+ for(size_t i = 0; i < e.GetSize(); ++i)
+ keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
+ auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId));
}
-
- void
- LexicalReorderingTableTree::
- Cache(const Sentence& input)
- {
- //only works with sentences...
- size_t prev_cache_size = m_Cache.size();
- size_t max_phrase_length = input.GetSize();
- for(size_t len = 0; len <= max_phrase_length; ++len)
- {
- for(size_t start = 0; start+len <= input.GetSize(); ++start)
- {
- Phrase f = input.GetSubString(WordsRange(start, start+len));
- auxCacheForSrcPhrase(f);
- }
- }
- std::cerr << "Cached " << m_Cache.size() - prev_cache_size
- << " new primary reordering table keys\n";
+ return key;
+};
+
+
+struct State {
+ State(PPimp* t, const std::string& p)
+ : pos(t), path(p) { }
+
+ PPimp* pos;
+ std::string path;
+};
+
+void
+LexicalReorderingTableTree::
+auxCacheForSrcPhrase(const Phrase& f)
+{
+ if(m_FactorsE.empty()) {
+ //f is all of key...
+ Candidates cands;
+ m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands);
+ m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands;
+ } else {
+ ObjectPool<PPimp> pool;
+ PPimp* pPos = m_Table->GetRoot();
+
+ // 1) goto subtree for f
+ for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i)
+ pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
+
+ if(pPos && pPos->isValid())
+ pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord);
+
+ if(!pPos || !pPos->isValid())
+ return;
+
+ //2) explore whole subtree depth first & cache
+ std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||";
+
+ std::vector<State> stack;
+ stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),""));
+ Candidates cands;
+ while(!stack.empty()) {
+ if(stack.back().pos->isValid()) {
+ LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
+ std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId);
+ //cache this
+ m_Table->GetCandidates(*stack.back().pos,&cands);
+ if(!cands.empty()) m_Cache[cache_key + auxClearString(next_path)] = cands;
+ cands.clear();
+ PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0));
+ ++stack.back().pos->idx;
+ stack.push_back(State(next_pos,next_path));
+ } else stack.pop_back();
+ }
+ }
+}
+
+void
+LexicalReorderingTableTree::
+Cache(const ConfusionNet& /*input*/)
+{
+ return;
+}
+
+void
+LexicalReorderingTableTree::
+Cache(const Sentence& input)
+{
+ //only works with sentences...
+ size_t prev_cache_size = m_Cache.size();
+ size_t max_phrase_length = input.GetSize();
+ for(size_t len = 0; len <= max_phrase_length; ++len) {
+ for(size_t start = 0; start+len <= input.GetSize(); ++start) {
+ Phrase f = input.GetSubString(WordsRange(start, start+len));
+ auxCacheForSrcPhrase(f);
+ }
}
+ std::cerr << "Cached " << m_Cache.size() - prev_cache_size
+ << " new primary reordering table keys\n";
+}
}
diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.h b/moses/FF/LexicalReordering/LexicalReorderingTable.h
index f4eceb72e..6c8e7e03c 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingTable.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingTable.h
@@ -22,174 +22,191 @@
namespace Moses
{
- class Phrase;
- class InputType;
- class ConfusionNet;
-
- //! additional types
- class LexicalReorderingTable
- {
- public:
- LexicalReorderingTable(const FactorList& f_factors,
- const FactorList& e_factors,
- const FactorList& c_factors)
- : m_FactorsF(f_factors)
- , m_FactorsE(e_factors)
- , m_FactorsC(c_factors) { }
-
- virtual
- ~LexicalReorderingTable() { }
-
- public:
- static
- LexicalReorderingTable*
- LoadAvailable(const std::string& filePath,
- const FactorList& f_factors,
- const FactorList& e_factors,
- const FactorList& c_factors);
-
- virtual
- Scores
- GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0;
-
- virtual
- void
- InitializeForInput(const InputType&) { /* override for on-demand loading */ };
-
- virtual
- void
- InitializeForInputPhrase(const Phrase&) { }
-
-
- const FactorList& GetFFactorMask() const { return m_FactorsF; }
- const FactorList& GetEFactorMask() const { return m_FactorsE; }
- const FactorList& GetCFactorMask() const { return m_FactorsC; }
-
- virtual
- void
- DbgDump(std::ostream* out) const { *out << "Overwrite in subclass...\n"; };
- // why is this not a pure virtual function? - UG
-
- protected:
- FactorList m_FactorsF;
- FactorList m_FactorsE;
- FactorList m_FactorsC;
+class Phrase;
+class InputType;
+class ConfusionNet;
+
+//! additional types
+class LexicalReorderingTable
+{
+public:
+ LexicalReorderingTable(const FactorList& f_factors,
+ const FactorList& e_factors,
+ const FactorList& c_factors)
+ : m_FactorsF(f_factors)
+ , m_FactorsE(e_factors)
+ , m_FactorsC(c_factors) { }
+
+ virtual
+ ~LexicalReorderingTable() { }
+
+public:
+ static
+ LexicalReorderingTable*
+ LoadAvailable(const std::string& filePath,
+ const FactorList& f_factors,
+ const FactorList& e_factors,
+ const FactorList& c_factors);
+
+ virtual
+ Scores
+ GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0;
+
+ virtual
+ void
+ InitializeForInput(const InputType&) {
+ /* override for on-demand loading */
};
- //! @todo what is this?
- class LexicalReorderingTableMemory
- : public LexicalReorderingTable
- {
- typedef std::map< std::string, std::vector<float> > TableType;
- TableType m_Table;
-
- //implements LexicalReorderingTable saving all scores in one large std::map<> thingy
- //to be used for non binary tables... uses a LOT of memory
- public:
- LexicalReorderingTableMemory(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
-
- virtual
- ~LexicalReorderingTableMemory();
-
- public:
- virtual
- std::vector<float>
- GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
-
- void
- DbgDump(std::ostream* out) const;
-
- private:
-
- std::string
- MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
-
- std::string
- MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
-
- void
- LoadFromFile(const std::string& filePath);
+ virtual
+ void
+ InitializeForInputPhrase(const Phrase&) { }
+
+
+ const FactorList& GetFFactorMask() const {
+ return m_FactorsF;
+ }
+ const FactorList& GetEFactorMask() const {
+ return m_FactorsE;
+ }
+ const FactorList& GetCFactorMask() const {
+ return m_FactorsC;
+ }
+
+ virtual
+ void
+ DbgDump(std::ostream* out) const {
+ *out << "Overwrite in subclass...\n";
};
-
- class LexicalReorderingTableTree
- : public LexicalReorderingTable
- {
- //implements LexicalReorderingTable using the crafty PDT code...
+ // why is this not a pure virtual function? - UG
+
+protected:
+ FactorList m_FactorsF;
+ FactorList m_FactorsE;
+ FactorList m_FactorsC;
+};
+
+//! @todo what is this?
+class LexicalReorderingTableMemory
+ : public LexicalReorderingTable
+{
+ typedef std::map< std::string, std::vector<float> > TableType;
+ TableType m_Table;
+
+ //implements LexicalReorderingTable saving all scores in one large std::map<> thingy
+ //to be used for non binary tables... uses a LOT of memory
+public:
+ LexicalReorderingTableMemory(const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
+
+ virtual
+ ~LexicalReorderingTableMemory();
+
+public:
+ virtual
+ std::vector<float>
+ GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+
+ void
+ DbgDump(std::ostream* out) const;
- typedef std::map< std::string, Candidates > CacheType;
+private:
+
+ std::string
+ MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
+
+ std::string
+ MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
+
+ void
+ LoadFromFile(const std::string& filePath);
+};
+
+class LexicalReorderingTableTree
+ : public LexicalReorderingTable
+{
+ //implements LexicalReorderingTable using the crafty PDT code...
+
+ typedef std::map< std::string, Candidates > CacheType;
#ifdef WITH_THREADS
- typedef boost::thread_specific_ptr<PrefixTreeMap> TableType;
+ typedef boost::thread_specific_ptr<PrefixTreeMap> TableType;
#else
- typedef std::auto_ptr<PrefixTreeMap> TableType;
+ typedef std::auto_ptr<PrefixTreeMap> TableType;
#endif
- static const int SourceVocId = 0;
- static const int TargetVocId = 1;
-
- bool m_UseCache;
- std::string m_FilePath;
- CacheType m_Cache;
- TableType m_Table;
-
- public:
-
- static
- bool
- Create(std::istream& inFile, const std::string& outFileName);
-
- LexicalReorderingTableTree(const std::string& filePath,
- const std::vector<FactorType>& f_factors,
- const std::vector<FactorType>& e_factors,
- const std::vector<FactorType>& c_factors);
-
- ~LexicalReorderingTableTree();
-
- bool IsCacheEnabled() const { return m_UseCache; };
- void EnableCache() { m_UseCache = true; };
- void DisableCache() { m_UseCache = false; };
- void ClearCache() { if (m_UseCache) m_Cache.clear(); };
-
- virtual
- std::vector<float>
- GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
-
- virtual
- void
- InitializeForInput(const InputType& input);
-
- virtual
- void
- InitializeForInputPhrase(const Phrase& f)
- {
- ClearCache();
- auxCacheForSrcPhrase(f);
- }
-
-
- private:
- std::string
- MakeCacheKey(const Phrase& f, const Phrase& e) const;
-
- IPhrase
- MakeTableKey(const Phrase& f, const Phrase& e) const;
-
- void
- Cache(const ConfusionNet& input);
-
- void
- Cache(const Sentence& input);
-
- void
- auxCacheForSrcPhrase(const Phrase& f);
-
- Scores
- auxFindScoreForContext(const Candidates& cands, const Phrase& contex);
-
+ static const int SourceVocId = 0;
+ static const int TargetVocId = 1;
+
+ bool m_UseCache;
+ std::string m_FilePath;
+ CacheType m_Cache;
+ TableType m_Table;
+
+public:
+
+ static
+ bool
+ Create(std::istream& inFile, const std::string& outFileName);
+
+ LexicalReorderingTableTree(const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
+
+ ~LexicalReorderingTableTree();
+
+ bool IsCacheEnabled() const {
+ return m_UseCache;
+ };
+ void EnableCache() {
+ m_UseCache = true;
+ };
+ void DisableCache() {
+ m_UseCache = false;
};
+ void ClearCache() {
+ if (m_UseCache) m_Cache.clear();
+ };
+
+ virtual
+ std::vector<float>
+ GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+
+ virtual
+ void
+ InitializeForInput(const InputType& input);
+
+ virtual
+ void
+ InitializeForInputPhrase(const Phrase& f) {
+ ClearCache();
+ auxCacheForSrcPhrase(f);
+ }
+
+
+private:
+ std::string
+ MakeCacheKey(const Phrase& f, const Phrase& e) const;
+
+ IPhrase
+ MakeTableKey(const Phrase& f, const Phrase& e) const;
+
+ void
+ Cache(const ConfusionNet& input);
+
+ void
+ Cache(const Sentence& input);
+
+ void
+ auxCacheForSrcPhrase(const Phrase& f);
+
+ Scores
+ auxFindScoreForContext(const Candidates& cands, const Phrase& contex);
+
+};
}
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 22eca8520..54f314574 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -199,7 +199,7 @@ void SparseReordering::CopyScores(
ScoreComponentCollection* scores) const
{
if (m_useBetween && direction == LRModel::Backward &&
- (reoType == LRModel::D || reoType == LRModel::DL || reoType == LRModel::DR)){
+ (reoType == LRModel::D || reoType == LRModel::DL || reoType == LRModel::DR)) {
size_t gapStart, gapEnd;
//NB: Using a static cast for speed, but could be nasty if
//using non-sentence input
diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp
index d2d4f881c..76ceb971c 100644
--- a/moses/FF/PhraseOrientationFeature.cpp
+++ b/moses/FF/PhraseOrientationFeature.cpp
@@ -53,13 +53,13 @@ void PhraseOrientationFeature::SetParameter(const std::string& key, const std::s
} else if (key == "distinguishStates") {
m_distinguishStates = Scan<bool>(value);
} else if (key == "sparseWord") {
- m_useSparseWord = Scan<bool>(value);
+ m_useSparseWord = Scan<bool>(value);
} else if (key == "sparseNT") {
- m_useSparseNT = Scan<bool>(value);
+ m_useSparseNT = Scan<bool>(value);
} else if (key == "targetWordList") {
- m_filenameTargetWordList = value;
+ m_filenameTargetWordList = value;
} else if (key == "sourceWordList") {
- m_filenameSourceWordList = value;
+ m_filenameSourceWordList = value;
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
@@ -80,7 +80,7 @@ void PhraseOrientationFeature::Load()
void PhraseOrientationFeature::LoadWordList(const std::string& filename,
- boost::unordered_set<const Factor*>& list)
+ boost::unordered_set<const Factor*>& list)
{
FEATUREVERBOSE(2, "Loading word list from file " << filename << std::endl);
FactorCollection &factorCollection = FactorCollection::Instance();
@@ -97,10 +97,10 @@ void PhraseOrientationFeature::LoadWordList(const std::string& filename,
}
-void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
- const TargetPhrase &targetPhrase,
- ScoreComponentCollection &scoreBreakdown,
- ScoreComponentCollection &estimatedFutureScore) const
+void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
+ const TargetPhrase &targetPhrase,
+ ScoreComponentCollection &scoreBreakdown,
+ ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
@@ -116,9 +116,9 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
}
-void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
- ScoreComponentCollection &scoreBreakdown,
- bool subtract) const
+void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
+ ScoreComponentCollection &scoreBreakdown,
+ bool subtract) const
{
size_t ffScoreIndex = scoreBreakdown.GetIndexes(this).first;
@@ -129,10 +129,10 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o
size_t heuristicScoreIndexL2R = GetHeuristicScoreIndex(scoresL2R, 0);
if (subtract) {
- scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R,
+ scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R,
-scoresL2R[heuristicScoreIndexL2R]);
} else {
- scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R,
+ scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R,
scoresL2R[heuristicScoreIndexL2R]);
}
@@ -143,10 +143,10 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o
size_t heuristicScoreIndexR2L = GetHeuristicScoreIndex(scoresR2L, m_offsetR2LScores);
if (subtract) {
- scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L,
+ scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L,
-scoresR2L[heuristicScoreIndexR2L]);
} else {
- scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L,
+ scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L,
scoresR2L[heuristicScoreIndexR2L]);
}
}
@@ -221,7 +221,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
<< " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright()
<< " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft()
<< std::endl);
-
+
LookaheadScore(orientationPhraseProperty, *accumulator, true);
const PhraseOrientationFeatureState* prevState =
@@ -490,8 +490,8 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
size_t PhraseOrientationFeature::GetHeuristicScoreIndex(const std::vector<float>& scores,
- size_t weightsVectorOffset,
- const std::bitset<3> possibleFutureOrientations) const
+ size_t weightsVectorOffset,
+ const std::bitset<3> possibleFutureOrientations) const
{
std::vector<float> weightedScores;
for ( size_t i=0; i<3; ++i ) {
@@ -532,8 +532,8 @@ void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID,
const PhraseOrientationFeatureState *state,
const std::bitset<3> orientation,
std::vector<float>& newScores,
- ScoreComponentCollection* scoreBreakdown) const
- // TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
+ ScoreComponentCollection* scoreBreakdown) const
+// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
{
if (state->m_leftBoundaryIsSet) {
const std::string* recursiveOrientationString;
@@ -593,8 +593,8 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
const PhraseOrientationFeatureState *state,
const std::bitset<3> orientation,
std::vector<float>& newScores,
- ScoreComponentCollection* scoreBreakdown) const
- // TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
+ ScoreComponentCollection* scoreBreakdown) const
+// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores)
{
if (state->m_rightBoundaryIsSet) {
const std::string* recursiveOrientationString;
@@ -651,8 +651,8 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID,
void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo,
- ScoreComponentCollection* scoreBreakdown,
- const std::string* o) const
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
{
// target word
@@ -686,7 +686,7 @@ void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo,
}
// source word
-
+
WordsRange sourceSpan = hypo->GetCurrSourceRange();
const InputType& input = hypo->GetManager().GetSource();
const Sentence& sourceSentence = static_cast<const Sentence&>(input);
@@ -710,8 +710,8 @@ void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo,
void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo,
- ScoreComponentCollection* scoreBreakdown,
- const std::string* o) const
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
{
// target word
@@ -745,7 +745,7 @@ void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo,
}
// source word
-
+
WordsRange sourceSpan = hypo->GetCurrSourceRange();
const InputType& input = hypo->GetManager().GetSource();
const Sentence& sourceSentence = static_cast<const Sentence&>(input);
@@ -769,8 +769,8 @@ void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo,
void PhraseOrientationFeature::SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol,
- ScoreComponentCollection* scoreBreakdown,
- const std::string* o) const
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
{
if ( nonTerminalSymbol != m_glueTargetLHS ) {
const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string();
@@ -783,8 +783,8 @@ void PhraseOrientationFeature::SparseNonTerminalL2RScore(const Factor* nonTermin
void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol,
- ScoreComponentCollection* scoreBreakdown,
- const std::string* o) const
+ ScoreComponentCollection* scoreBreakdown,
+ const std::string* o) const
{
if ( nonTerminalSymbol != m_glueTargetLHS ) {
const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string();
diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h
index 9abbe9a8e..0ad566632 100644
--- a/moses/FF/PhraseOrientationFeature.h
+++ b/moses/FF/PhraseOrientationFeature.h
@@ -62,7 +62,7 @@ public:
void SetRightBoundaryR2L(const std::vector<float> &scores,
size_t heuristicScoreIndex,
std::bitset<3> &possibleFutureOrientations,
- const Factor* rightBoundaryNonTerminalSymbol,
+ const Factor* rightBoundaryNonTerminalSymbol,
const PhraseOrientationFeatureState* prevState) {
for (size_t i=0; i<3; ++i) {
m_rightBoundaryNonTerminalR2LScores[i] = scores[i];
@@ -177,7 +177,7 @@ protected:
for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
// compare only for possible future orientations
// (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks)
- if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
+ if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
return 1;
}
@@ -238,7 +238,7 @@ protected:
for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
// compare only for possible future orientations
// (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks)
- if ( state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
+ if ( state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
if (state.m_rightBoundaryNonTerminalR2LScores[i] > otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
return 1;
}
@@ -314,7 +314,7 @@ public:
}
void SetParameter(const std::string& key, const std::string& value);
-
+
void Load();
void EvaluateInIsolation(const Phrase &source
@@ -353,12 +353,12 @@ protected:
void LoadWordList(const std::string& filename,
boost::unordered_set<const Factor*>& list);
- void LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
- ScoreComponentCollection &scoreBreakdown,
+ void LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
+ ScoreComponentCollection &scoreBreakdown,
bool subtract=false) const;
size_t GetHeuristicScoreIndex(const std::vector<float>& scores,
- size_t weightsVectorOffset,
+ size_t weightsVectorOffset,
const std::bitset<3> possibleFutureOrientations = 0x7) const;
void LeftBoundaryL2RScoreRecursive(int featureID,
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
index af8e89a3a..655b34ae2 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
@@ -30,10 +30,30 @@ SoftSourceSyntacticConstraintsFeature::SoftSourceSyntacticConstraintsFeature(con
ReadParameters();
VERBOSE(1, " Done.");
VERBOSE(1, " Config:");
- VERBOSE(1, " Log probabilities"); if ( m_useLogprobs ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
- VERBOSE(1, " Sparse scores"); if ( m_useSparse ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
- VERBOSE(1, " Core labels"); if ( m_useCoreSourceLabels ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
- VERBOSE(1, " No mismatches"); if ( m_noMismatches ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); }
+ VERBOSE(1, " Log probabilities");
+ if ( m_useLogprobs ) {
+ VERBOSE(1, " active.");
+ } else {
+ VERBOSE(1, " inactive.");
+ }
+ VERBOSE(1, " Sparse scores");
+ if ( m_useSparse ) {
+ VERBOSE(1, " active.");
+ } else {
+ VERBOSE(1, " inactive.");
+ }
+ VERBOSE(1, " Core labels");
+ if ( m_useCoreSourceLabels ) {
+ VERBOSE(1, " active.");
+ } else {
+ VERBOSE(1, " inactive.");
+ }
+ VERBOSE(1, " No mismatches");
+ if ( m_noMismatches ) {
+ VERBOSE(1, " active.");
+ } else {
+ VERBOSE(1, " inactive.");
+ }
VERBOSE(1, std::endl);
}
@@ -50,9 +70,9 @@ void SoftSourceSyntacticConstraintsFeature::SetParameter(const std::string& key,
} else if (key == "noMismatches") {
m_noMismatches = Scan<bool>(value); // for a hard constraint, allow no mismatches (also set: weights 1 0 0 0 0 0, tuneable=false)
} else if (key == "logProbabilities") {
- m_useLogprobs = Scan<bool>(value);
+ m_useLogprobs = Scan<bool>(value);
} else if (key == "sparse") {
- m_useSparse = Scan<bool>(value);
+ m_useSparse = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
@@ -146,8 +166,8 @@ void SoftSourceSyntacticConstraintsFeature::LoadCoreSourceLabelSet()
LoadLabelSet(m_coreSourceLabelSetFile, m_coreSourceLabels);
}
-void SoftSourceSyntacticConstraintsFeature::LoadLabelSet(std::string &filename,
- boost::unordered_set<size_t> &labelSet)
+void SoftSourceSyntacticConstraintsFeature::LoadLabelSet(std::string &filename,
+ boost::unordered_set<size_t> &labelSet)
{
FEATUREVERBOSE(2, "Loading core source label set from file " << m_coreSourceLabelSetFile << std::endl);
InputFileStream inFile(filename);
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
index e73993df1..550f432a6 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h
@@ -30,7 +30,7 @@ public:
}
void SetParameter(const std::string& key, const std::string& value);
-
+
void Load();
void EvaluateInIsolation(const Phrase &source
diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h
index f54f3a746..c5364fd0d 100644
--- a/moses/FF/StatefulFeatureFunction.h
+++ b/moses/FF/StatefulFeatureFunction.h
@@ -17,9 +17,8 @@ class StatefulFeatureFunction: public FeatureFunction
static std::vector<const StatefulFeatureFunction*> m_statefulFFs;
public:
- static const std::vector<const StatefulFeatureFunction*>&
- GetStatefulFeatureFunctions()
- {
+ static const std::vector<const StatefulFeatureFunction*>&
+ GetStatefulFeatureFunctions() {
return m_statefulFFs;
}
diff --git a/moses/FF/UnalignedWordCountFeature.cpp b/moses/FF/UnalignedWordCountFeature.cpp
index 83a2ac0c3..9f0fe10db 100644
--- a/moses/FF/UnalignedWordCountFeature.cpp
+++ b/moses/FF/UnalignedWordCountFeature.cpp
@@ -32,8 +32,7 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
std::vector<bool> alignedSource(sourceLength, false);
std::vector<bool> alignedTarget(targetLength, false);
- for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); ++alignmentPoint)
- {
+ for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); ++alignmentPoint) {
alignedSource[ alignmentPoint->first ] = true;
alignedTarget[ alignmentPoint->second ] = true;
}
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
index 9be44c8b6..8b7330440 100644
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@@ -157,8 +157,8 @@ public:
// optionally update translation options using leave-one-out
std::vector<bool> keep = (m_leaveOneOut.size() > 0)
- ? LeaveOneOut(translationOptionList)
- : std::vector<bool>(translationOptionList.size(), true);
+ ? LeaveOneOut(translationOptionList)
+ : std::vector<bool>(translationOptionList.size(), true);
std::vector<float> losses(translationOptionList.size());
std::vector<float>::iterator iterLoss;
@@ -187,7 +187,7 @@ public:
const std::vector<VWFeatureBase*>& targetFeatures = VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
- for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin(), iterKeep = keep.begin() ;
+ for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin(), iterKeep = keep.begin() ;
iterTransOpt != translationOptionList.end() ; ++iterTransOpt, ++iterLoss) {
if (! *iterKeep)
@@ -369,7 +369,7 @@ private:
float sourceRawCount = 0.0;
const float ONE = 1.0001; // I don't understand floating point numbers
-
+
std::vector<bool> keepOpt;
TranslationOptionList::const_iterator iterTransOpt;
@@ -426,7 +426,7 @@ private:
std::string m_vwOptions;
// optionally contains feature name of a phrase table where we recompute scores with leaving one out
- std::string m_leaveOneOut;
+ std::string m_leaveOneOut;
Discriminative::Normalizer *m_normalizer = NULL;
TLSClassifier *m_tlsClassifier;