diff options
author | Hieu Hoang <hieuhoang@gmail.com> | 2015-02-19 15:27:23 +0300 |
---|---|---|
committer | Hieu Hoang <hieuhoang@gmail.com> | 2015-02-19 15:27:23 +0300 |
commit | 32de07502217879f0d354ec810c8e669f4d45d3c (patch) | |
tree | fafdf8f63d9902e4fd2b45f05902faa5d65df996 /moses/FF | |
parent | 6d4bad0f78ede5bcf7eaaea1de584f42bce8e91f (diff) |
beautify
Diffstat (limited to 'moses/FF')
-rw-r--r-- | moses/FF/FeatureFunction.cpp | 4 | ||||
-rw-r--r-- | moses/FF/InputFeature.cpp | 15 | ||||
-rw-r--r-- | moses/FF/LexicalReordering/LexicalReordering.cpp | 217 | ||||
-rw-r--r-- | moses/FF/LexicalReordering/LexicalReordering.h | 176 | ||||
-rw-r--r-- | moses/FF/LexicalReordering/LexicalReorderingState.cpp | 861 | ||||
-rw-r--r-- | moses/FF/LexicalReordering/LexicalReorderingState.h | 546 | ||||
-rw-r--r-- | moses/FF/LexicalReordering/LexicalReorderingTable.cpp | 1088 | ||||
-rw-r--r-- | moses/FF/LexicalReordering/LexicalReorderingTable.h | 333 | ||||
-rw-r--r-- | moses/FF/LexicalReordering/SparseReordering.cpp | 2 | ||||
-rw-r--r-- | moses/FF/PhraseOrientationFeature.cpp | 66 | ||||
-rw-r--r-- | moses/FF/PhraseOrientationFeature.h | 14 | ||||
-rw-r--r-- | moses/FF/SoftSourceSyntacticConstraintsFeature.cpp | 36 | ||||
-rw-r--r-- | moses/FF/SoftSourceSyntacticConstraintsFeature.h | 2 | ||||
-rw-r--r-- | moses/FF/StatefulFeatureFunction.h | 5 | ||||
-rw-r--r-- | moses/FF/UnalignedWordCountFeature.cpp | 3 | ||||
-rw-r--r-- | moses/FF/VW/VW.h | 10 |
16 files changed, 1708 insertions, 1670 deletions
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp index fa898857d..abe220161 100644 --- a/moses/FF/FeatureFunction.cpp +++ b/moses/FF/FeatureFunction.cpp @@ -124,7 +124,7 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va if (key == "tuneable") { m_tuneable = Scan<bool>(value); } else if (key == "tuneable-components") { - UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription() + UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription() << ": tuneable-components cannot be set if tuneable=false"); SetTuneableComponents(value); } else if (key == "require-sorting-after-source-context") { @@ -158,7 +158,7 @@ void FeatureFunction::SetTuneableComponents(const std::string& value) UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription() << ": Empty tuneable-components"); UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription() - << ": tuneable-components value has to be a comma-separated list of " + << ": tuneable-components value has to be a comma-separated list of " << m_numScoreComponents << " boolean values"); m_tuneableComponents.resize(m_numScoreComponents); diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp index 10e5347e4..7cbb428a6 100644 --- a/moses/FF/InputFeature.cpp +++ b/moses/FF/InputFeature.cpp @@ -52,14 +52,13 @@ void InputFeature::EvaluateWithSourceContext(const InputType &input , ScoreComponentCollection *estimatedFutureScore) const { if (m_legacy) { - //binary phrase-table does input feature itself - return; - } - else if (input.GetType() == WordLatticeInput){ - const ScorePair *scores = inputPath.GetInputScore(); - if (scores) { - scoreBreakdown.PlusEquals(this, *scores); - } + //binary phrase-table does input feature itself + return; + } else if (input.GetType() == WordLatticeInput) { + const ScorePair *scores = inputPath.GetInputScore(); + if (scores) { + scoreBreakdown.PlusEquals(this, *scores); + } } } diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index 2f0f334dd..9f75ac4bd 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -13,121 +13,116 @@ using namespace boost::algorithm; namespace Moses { - LexicalReordering:: - LexicalReordering(const std::string &line) - : StatefulFeatureFunction(line) - { - VERBOSE(1, "Initializing Lexical Reordering Feature.." << std::endl); - - map<string,string> sparseArgs; - m_haveDefaultScores = false; - for (size_t i = 0; i < m_args.size(); ++i) - { - const vector<string> &args = m_args[i]; - - if (args[0] == "type") - { - m_configuration.reset(new LRModel(args[1])); - m_configuration->SetScoreProducer(this); - m_modelTypeString = m_configuration->GetModelString(); - } - else if (args[0] == "input-factor") - m_factorsF =Tokenize<FactorType>(args[1]); - else if (args[0] == "output-factor") - m_factorsE =Tokenize<FactorType>(args[1]); - else if (args[0] == "path") - m_filePath = args[1]; - else if (starts_with(args[0], "sparse-")) - sparseArgs[args[0].substr(7)] = args[1]; - else if (args[0] == "default-scores") - { - vector<string> tokens = Tokenize(args[1],","); - for(size_t i=0; i<tokens.size(); i++) - m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]))); - m_haveDefaultScores = true; - } - else UTIL_THROW2("Unknown argument " + args[0]); - } - - switch(m_configuration->GetCondition()) - { - case LRModel::FE: - case LRModel::E: - UTIL_THROW_IF2(m_factorsE.empty(), - "TL factor mask for lexical reordering is " - << "unexpectedly empty"); - - if(m_configuration->GetCondition() == LRModel::E) - break; // else fall through - case LRModel::F: - UTIL_THROW_IF2(m_factorsF.empty(), - "SL factor mask for lexical reordering is " - << "unexpectedly empty"); - break; - default: - UTIL_THROW2("Unknown conditioning option!"); - } - - // sanity check: number of default scores - size_t numScores = m_configuration->GetNumScoreComponents(); - UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores, - "wrong number of default scores (" << m_defaultScores.size() - << ") for lexicalized reordering model (expected " - << m_configuration->GetNumScoreComponents() << ")"); - - m_configuration->ConfigureSparse(sparseArgs, this); - } - - LexicalReordering:: - ~LexicalReordering() - { } - - void - LexicalReordering:: - Load() - { - typedef LexicalReorderingTable LRTable; - m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF, - m_factorsE, std::vector<FactorType>())); - } - - Scores - LexicalReordering:: - GetProb(const Phrase& f, const Phrase& e) const - { - return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR)); - } - - FFState* - LexicalReordering:: - EvaluateWhenApplied(const Hypothesis& hypo, - const FFState* prev_state, - ScoreComponentCollection* out) const - { - VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl); - Scores score(GetNumScoreComponents(), 0); - const LRState *prev = dynamic_cast<const LRState *>(prev_state); - LRState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out); - - out->PlusEquals(this, score); - VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl); - - return next_state; +LexicalReordering:: +LexicalReordering(const std::string &line) + : StatefulFeatureFunction(line) +{ + VERBOSE(1, "Initializing Lexical Reordering Feature.." << std::endl); + + map<string,string> sparseArgs; + m_haveDefaultScores = false; + for (size_t i = 0; i < m_args.size(); ++i) { + const vector<string> &args = m_args[i]; + + if (args[0] == "type") { + m_configuration.reset(new LRModel(args[1])); + m_configuration->SetScoreProducer(this); + m_modelTypeString = m_configuration->GetModelString(); + } else if (args[0] == "input-factor") + m_factorsF =Tokenize<FactorType>(args[1]); + else if (args[0] == "output-factor") + m_factorsE =Tokenize<FactorType>(args[1]); + else if (args[0] == "path") + m_filePath = args[1]; + else if (starts_with(args[0], "sparse-")) + sparseArgs[args[0].substr(7)] = args[1]; + else if (args[0] == "default-scores") { + vector<string> tokens = Tokenize(args[1],","); + for(size_t i=0; i<tokens.size(); i++) + m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]))); + m_haveDefaultScores = true; + } else UTIL_THROW2("Unknown argument " + args[0]); } - FFState const* - LexicalReordering::EmptyHypothesisState(const InputType &input) const - { - return m_configuration->CreateLRState(input); + switch(m_configuration->GetCondition()) { + case LRModel::FE: + case LRModel::E: + UTIL_THROW_IF2(m_factorsE.empty(), + "TL factor mask for lexical reordering is " + << "unexpectedly empty"); + + if(m_configuration->GetCondition() == LRModel::E) + break; // else fall through + case LRModel::F: + UTIL_THROW_IF2(m_factorsF.empty(), + "SL factor mask for lexical reordering is " + << "unexpectedly empty"); + break; + default: + UTIL_THROW2("Unknown conditioning option!"); } - bool - LexicalReordering:: - IsUseable(const FactorMask &mask) const - { - BOOST_FOREACH(FactorType const& f, m_factorsE) - { if (!mask[f]) return false; } - return true; + // sanity check: number of default scores + size_t numScores = m_configuration->GetNumScoreComponents(); + UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores, + "wrong number of default scores (" << m_defaultScores.size() + << ") for lexicalized reordering model (expected " + << m_configuration->GetNumScoreComponents() << ")"); + + m_configuration->ConfigureSparse(sparseArgs, this); +} + +LexicalReordering:: +~LexicalReordering() +{ } + +void +LexicalReordering:: +Load() +{ + typedef LexicalReorderingTable LRTable; + m_table.reset(LRTable::LoadAvailable(m_filePath, m_factorsF, + m_factorsE, std::vector<FactorType>())); +} + +Scores +LexicalReordering:: +GetProb(const Phrase& f, const Phrase& e) const +{ + return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR)); +} + +FFState* +LexicalReordering:: +EvaluateWhenApplied(const Hypothesis& hypo, + const FFState* prev_state, + ScoreComponentCollection* out) const +{ + VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) START" << std::endl); + Scores score(GetNumScoreComponents(), 0); + const LRState *prev = dynamic_cast<const LRState *>(prev_state); + LRState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out); + + out->PlusEquals(this, score); + VERBOSE(3,"LexicalReordering::Evaluate(const Hypothesis& hypo,...) END" << std::endl); + + return next_state; +} + +FFState const* +LexicalReordering::EmptyHypothesisState(const InputType &input) const +{ + return m_configuration->CreateLRState(input); +} + +bool +LexicalReordering:: +IsUseable(const FactorMask &mask) const +{ + BOOST_FOREACH(FactorType const& f, m_factorsE) { + if (!mask[f]) return false; } + return true; +} } diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index 071f90751..882d31e54 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -20,91 +20,97 @@ namespace Moses { - class Factor; - class Phrase; - class Hypothesis; - class InputType; - - // implementation of lexical reordering (Tilman ...) for phrase-based - // decoding - class LexicalReordering : public StatefulFeatureFunction - { - public: - LexicalReordering(const std::string &line); - virtual ~LexicalReordering(); - void Load(); - - virtual - bool - IsUseable(const FactorMask &mask) const; - - virtual - FFState const* - EmptyHypothesisState(const InputType &input) const; - - void - InitializeForInput(const InputType& i) - { m_table->InitializeForInput(i); } - - Scores - GetProb(const Phrase& f, const Phrase& e) const; - - virtual - FFState* - EvaluateWhenApplied(const Hypothesis& cur_hypo, - const FFState* prev_state, - ScoreComponentCollection* accumulator) const; - - virtual - FFState* - EvaluateWhenApplied(const ChartHypothesis&, int featureID, - ScoreComponentCollection*) const - { UTIL_THROW2("LexicalReordering is not valid for chart decoder"); } - - void - EvaluateWithSourceContext - (const InputType &input, - const InputPath &inputPath, - const TargetPhrase &targetPhrase, - const StackVec *stackVec, - ScoreComponentCollection& scoreBreakdown, - ScoreComponentCollection* estimatedFutureScore = NULL) const - { } - - void - EvaluateTranslationOptionListWithSourceContext - (const InputType &input, const TranslationOptionList &transOptList) const - { } - - void - EvaluateInIsolation(const Phrase &source, - const TargetPhrase &targetPhrase, - ScoreComponentCollection &scoreBreakdown, - ScoreComponentCollection &estimatedFutureScore) const - { } - - bool - GetHaveDefaultScores() { return m_haveDefaultScores; } - - float - GetDefaultScore( size_t i ) { return m_defaultScores[i]; } - - private: - bool DecodeCondition(std::string s); - bool DecodeDirection(std::string s); - bool DecodeNumFeatureFunctions(std::string s); - - boost::scoped_ptr<LRModel> m_configuration; - std::string m_modelTypeString; - std::vector<std::string> m_modelType; - boost::scoped_ptr<LexicalReorderingTable> m_table; - std::vector<LRModel::Condition> m_condition; - std::vector<FactorType> m_factorsE, m_factorsF; - std::string m_filePath; - bool m_haveDefaultScores; - Scores m_defaultScores; - }; - +class Factor; +class Phrase; +class Hypothesis; +class InputType; + +// implementation of lexical reordering (Tilman ...) for phrase-based +// decoding +class LexicalReordering : public StatefulFeatureFunction +{ +public: + LexicalReordering(const std::string &line); + virtual ~LexicalReordering(); + void Load(); + + virtual + bool + IsUseable(const FactorMask &mask) const; + + virtual + FFState const* + EmptyHypothesisState(const InputType &input) const; + + void + InitializeForInput(const InputType& i) { + m_table->InitializeForInput(i); + } + + Scores + GetProb(const Phrase& f, const Phrase& e) const; + + virtual + FFState* + EvaluateWhenApplied(const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const; + + virtual + FFState* + EvaluateWhenApplied(const ChartHypothesis&, int featureID, + ScoreComponentCollection*) const { + UTIL_THROW2("LexicalReordering is not valid for chart decoder"); + } + + void + EvaluateWithSourceContext + (const InputType &input, + const InputPath &inputPath, + const TargetPhrase &targetPhrase, + const StackVec *stackVec, + ScoreComponentCollection& scoreBreakdown, + ScoreComponentCollection* estimatedFutureScore = NULL) const + { } + + void + EvaluateTranslationOptionListWithSourceContext + (const InputType &input, const TranslationOptionList &transOptList) const + { } + + void + EvaluateInIsolation(const Phrase &source, + const TargetPhrase &targetPhrase, + ScoreComponentCollection &scoreBreakdown, + ScoreComponentCollection &estimatedFutureScore) const + { } + + bool + GetHaveDefaultScores() { + return m_haveDefaultScores; + } + + float + GetDefaultScore( size_t i ) { + return m_defaultScores[i]; + } + +private: + bool DecodeCondition(std::string s); + bool DecodeDirection(std::string s); + bool DecodeNumFeatureFunctions(std::string s); + + boost::scoped_ptr<LRModel> m_configuration; + std::string m_modelTypeString; + std::vector<std::string> m_modelType; + boost::scoped_ptr<LexicalReorderingTable> m_table; + std::vector<LRModel::Condition> m_condition; + std::vector<FactorType> m_factorsE, m_factorsF; + std::string m_filePath; + bool m_haveDefaultScores; + Scores m_defaultScores; +}; + } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index c35f0cd65..cbc04eddb 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -1,4 +1,4 @@ -// -*- c++ -*- +// -*- c++ -*- #include <vector> #include <string> @@ -14,460 +14,471 @@ namespace Moses { - bool - IsMonotonicStep(WordsRange const& prev, // words range of last source phrase - WordsRange const& cur, // words range of current source phrase - WordsBitmap const& cov) // coverage bitmap - { - size_t e = prev.GetEndPos() + 1; - size_t s = cur.GetStartPos(); - return (s == e || (s >= e && !cov.GetValue(e))); - } - - bool - IsSwap(WordsRange const& prev, WordsRange const& cur, WordsBitmap const& cov) - { - size_t s = prev.GetStartPos(); - size_t e = cur.GetEndPos(); - return (e+1 == s || (e < s && !cov.GetValue(s-1))); - } +bool +IsMonotonicStep(WordsRange const& prev, // words range of last source phrase + WordsRange const& cur, // words range of current source phrase + WordsBitmap const& cov) // coverage bitmap +{ + size_t e = prev.GetEndPos() + 1; + size_t s = cur.GetStartPos(); + return (s == e || (s >= e && !cov.GetValue(e))); +} - size_t - LRModel:: - GetNumberOfTypes() const - { - return ((m_modelType == MSD) ? 3 : - (m_modelType == MSLR) ? 4 : 2); - } - - size_t - LRModel:: - GetNumScoreComponents() const - { - size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes(); - return ((m_direction == Bidirectional) - ? 2 * score_per_dir + m_additionalScoreComponents - : score_per_dir + m_additionalScoreComponents); +bool +IsSwap(WordsRange const& prev, WordsRange const& cur, WordsBitmap const& cov) +{ + size_t s = prev.GetStartPos(); + size_t e = cur.GetEndPos(); + return (e+1 == s || (e < s && !cov.GetValue(s-1))); +} + +size_t +LRModel:: +GetNumberOfTypes() const +{ + return ((m_modelType == MSD) ? 3 : + (m_modelType == MSLR) ? 4 : 2); +} + +size_t +LRModel:: +GetNumScoreComponents() const +{ + size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes(); + return ((m_direction == Bidirectional) + ? 2 * score_per_dir + m_additionalScoreComponents + : score_per_dir + m_additionalScoreComponents); +} + +void +LRModel:: +ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, + const LexicalReordering* producer) +{ + if (sparseArgs.size()) { + m_sparse.reset(new SparseReordering(sparseArgs, producer)); } +} + +void +LRModel:: +SetAdditionalScoreComponents(size_t number) +{ + m_additionalScoreComponents = number; +} + +/// return orientation for the first phrase +LRModel::ReorderingType +LRModel:: +GetOrientation(WordsRange const& cur) const +{ + UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None"); + return ((m_modelType == LeftRight) ? R : + (cur.GetStartPos() == 0) ? M : + (m_modelType == MSD) ? D : + (m_modelType == MSLR) ? DR : NM); +} + +LRModel::ReorderingType +LRModel:: +GetOrientation(WordsRange const& prev, WordsRange const& cur) const +{ + UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified"); + return ((m_modelType == LeftRight) + ? prev.GetEndPos() <= cur.GetStartPos() ? R : L + : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M + : (m_modelType == Monotonic) ? NM + : (prev.GetStartPos() == cur.GetEndPos() + 1) ? S + : (m_modelType == MSD) ? D + : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL); +} - void - LRModel:: - ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, - const LexicalReordering* producer) - { - if (sparseArgs.size()) { - m_sparse.reset(new SparseReordering(sparseArgs, producer)); +LRModel::ReorderingType +LRModel:: +GetOrientation(int const reoDistance) const +{ + // this one is for HierarchicalReorderingBackwardState + return ((m_modelType == LeftRight) + ? (reoDistance >= 1) ? R : L + : (reoDistance == 1) ? M + : (m_modelType == Monotonic) ? NM + : (reoDistance == -1) ? S + : (m_modelType == MSD) ? D + : (reoDistance > 1) ? DR : DL); +} + +LRModel::ReorderingType +LRModel:: +GetOrientation(WordsRange const& prev, WordsRange const& cur, + WordsBitmap const& cov) const +{ + return ((m_modelType == LeftRight) + ? cur.GetStartPos() > prev.GetEndPos() ? R : L + : IsMonotonicStep(prev,cur,cov) ? M + : (m_modelType == Monotonic) ? NM + : IsSwap(prev,cur,cov) ? S + : (m_modelType == MSD) ? D + : cur.GetStartPos() > prev.GetEndPos() ? DR : DL); +} + +LRModel:: +LRModel(const std::string &modelType) + : m_modelString(modelType) + , m_scoreProducer(NULL) + , m_modelType(None) + , m_phraseBased(true) + , m_collapseScores(false) + , m_direction(Backward) + , m_additionalScoreComponents(0) +{ + std::vector<std::string> config = Tokenize<std::string>(modelType, "-"); + + for (size_t i=0; i<config.size(); ++i) { + if (config[i] == "hier") { + m_phraseBased = false; + } else if (config[i] == "phrase") { + m_phraseBased = true; + } else if (config[i] == "wbe") { + m_phraseBased = true; + } + // no word-based decoding available, fall-back to phrase-based + // This is the old lexical reordering model combination of moses + + else if (config[i] == "msd") { + m_modelType = MSD; + } else if (config[i] == "mslr") { + m_modelType = MSLR; + } else if (config[i] == "monotonicity") { + m_modelType = Monotonic; + } else if (config[i] == "leftright") { + m_modelType = LeftRight; } - } - void - LRModel:: - SetAdditionalScoreComponents(size_t number) - { - m_additionalScoreComponents = number; - } + // unidirectional is deprecated, use backward instead + else if (config[i] == "unidirectional") { + m_direction = Backward; + } else if (config[i] == "backward") { + m_direction = Backward; + } else if (config[i] == "forward") { + m_direction = Forward; + } else if (config[i] == "bidirectional") { + m_direction = Bidirectional; + } - /// return orientation for the first phrase - LRModel::ReorderingType - LRModel:: - GetOrientation(WordsRange const& cur) const - { - UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None"); - return ((m_modelType == LeftRight) ? R : - (cur.GetStartPos() == 0) ? M : - (m_modelType == MSD) ? D : - (m_modelType == MSLR) ? DR : NM); - } - - LRModel::ReorderingType - LRModel:: - GetOrientation(WordsRange const& prev, WordsRange const& cur) const - { - UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified"); - return ((m_modelType == LeftRight) - ? prev.GetEndPos() <= cur.GetStartPos() ? R : L - : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M - : (m_modelType == Monotonic) ? NM - : (prev.GetStartPos() == cur.GetEndPos() + 1) ? S - : (m_modelType == MSD) ? D - : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL); - } + else if (config[i] == "f") { + m_condition = F; + } else if (config[i] == "fe") { + m_condition = FE; + } - LRModel::ReorderingType - LRModel:: - GetOrientation(int const reoDistance) const - { // this one is for HierarchicalReorderingBackwardState - return ((m_modelType == LeftRight) - ? (reoDistance >= 1) ? R : L - : (reoDistance == 1) ? M - : (m_modelType == Monotonic) ? NM - : (reoDistance == -1) ? S - : (m_modelType == MSD) ? D - : (reoDistance > 1) ? DR : DL); - } - - LRModel::ReorderingType - LRModel:: - GetOrientation(WordsRange const& prev, WordsRange const& cur, - WordsBitmap const& cov) const - { - return ((m_modelType == LeftRight) - ? cur.GetStartPos() > prev.GetEndPos() ? R : L - : IsMonotonicStep(prev,cur,cov) ? M - : (m_modelType == Monotonic) ? NM - : IsSwap(prev,cur,cov) ? S - : (m_modelType == MSD) ? D - : cur.GetStartPos() > prev.GetEndPos() ? DR : DL); + else if (config[i] == "collapseff") { + m_collapseScores = true; + } else if (config[i] == "allff") { + m_collapseScores = false; + } else { + std::cerr + << "Illegal part in the lexical reordering configuration string: " + << config[i] << std::endl; + exit(1); + } } - LRModel:: - LRModel(const std::string &modelType) - : m_modelString(modelType) - , m_scoreProducer(NULL) - , m_modelType(None) - , m_phraseBased(true) - , m_collapseScores(false) - , m_direction(Backward) - , m_additionalScoreComponents(0) - { - std::vector<std::string> config = Tokenize<std::string>(modelType, "-"); - - for (size_t i=0; i<config.size(); ++i) - { - if (config[i] == "hier") { m_phraseBased = false; } - else if (config[i] == "phrase") { m_phraseBased = true; } - else if (config[i] == "wbe") { m_phraseBased = true; } - // no word-based decoding available, fall-back to phrase-based - // This is the old lexical reordering model combination of moses - - else if (config[i] == "msd") { m_modelType = MSD; } - else if (config[i] == "mslr") { m_modelType = MSLR; } - else if (config[i] == "monotonicity") { m_modelType = Monotonic; } - else if (config[i] == "leftright") { m_modelType = LeftRight; } - - // unidirectional is deprecated, use backward instead - else if (config[i] == "unidirectional") { m_direction = Backward; } - else if (config[i] == "backward") { m_direction = Backward; } - else if (config[i] == "forward") { m_direction = Forward; } - else if (config[i] == "bidirectional") { m_direction = Bidirectional; } - - else if (config[i] == "f") { m_condition = F; } - else if (config[i] == "fe") { m_condition = FE; } - - else if (config[i] == "collapseff") { m_collapseScores = true; } - else if (config[i] == "allff") { m_collapseScores = false; } - else - { - std::cerr - << "Illegal part in the lexical reordering configuration string: " - << config[i] << std::endl; - exit(1); - } - } - - if (m_modelType == None) - { - std::cerr - << "You need to specify the type of the reordering model " - << "(msd, monotonicity,...)" << std::endl; - exit(1); - } + if (m_modelType == None) { + std::cerr + << "You need to specify the type of the reordering model " + << "(msd, monotonicity,...)" << std::endl; + exit(1); } - - LRState * - LRModel:: - CreateLRState(const InputType &input) const - { - LRState *bwd = NULL, *fwd = NULL; - size_t offset = 0; - - switch(m_direction) - { - case Backward: - case Bidirectional: - if (m_phraseBased) - bwd = new PhraseBasedReorderingState(*this, Backward, offset); - else - bwd = new HReorderingBackwardState(*this, offset); - offset += m_collapseScores ? 1 : GetNumberOfTypes(); - if (m_direction == Backward) return bwd; // else fall through - case Forward: - if (m_phraseBased) - fwd = new PhraseBasedReorderingState(*this, Forward, offset); - else - fwd = new HReorderingForwardState(*this, input.GetSize(), offset); - offset += m_collapseScores ? 1 : GetNumberOfTypes(); - if (m_direction == Forward) return fwd; - } - return new BidirectionalReorderingState(*this, bwd, fwd, 0); +} + +LRState * +LRModel:: +CreateLRState(const InputType &input) const +{ + LRState *bwd = NULL, *fwd = NULL; + size_t offset = 0; + + switch(m_direction) { + case Backward: + case Bidirectional: + if (m_phraseBased) + bwd = new PhraseBasedReorderingState(*this, Backward, offset); + else + bwd = new HReorderingBackwardState(*this, offset); + offset += m_collapseScores ? 1 : GetNumberOfTypes(); + if (m_direction == Backward) return bwd; // else fall through + case Forward: + if (m_phraseBased) + fwd = new PhraseBasedReorderingState(*this, Forward, offset); + else + fwd = new HReorderingForwardState(*this, input.GetSize(), offset); + offset += m_collapseScores ? 1 : GetNumberOfTypes(); + if (m_direction == Forward) return fwd; } + return new BidirectionalReorderingState(*this, bwd, fwd, 0); +} - void - LRState:: - CopyScores(ScoreComponentCollection* accum, - const TranslationOption &topt, - const InputType& input, - ReorderingType reoType) const - { - // don't call this on a bidirectional object - UTIL_THROW_IF2(m_direction != LRModel::Backward && - m_direction != LRModel::Forward, - "Unknown direction: " << m_direction); - - TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward) - ? &topt : m_prevOption); - - LexicalReordering* producer = m_configuration.GetScoreProducer(); - Scores const* cached = relevantOpt->GetLexReorderingScores(producer); - - // The approach here is bizarre! Why create a whole vector and do - // vector addition (acumm->PlusEquals) to update a single value? - UG - size_t off_remote = m_offset + reoType; - size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote; - - UTIL_THROW_IF2(off_remote >= producer->GetNumScoreComponents(), - "offset out of vector bounds!"); - - // look up applicable score from vectore of scores - if(cached) - { - Scores scores(producer->GetNumScoreComponents(),0); - scores[off_local ] = (*cached)[off_remote]; - accum->PlusEquals(producer, scores); - } - - // else: use default scores (if specified) - else if (producer->GetHaveDefaultScores()) - { - Scores scores(producer->GetNumScoreComponents(),0); - scores[off_local] = producer->GetDefaultScore(off_remote); - accum->PlusEquals(m_configuration.GetScoreProducer(), scores); - } - // note: if no default score, no cost - - const SparseReordering* sparse = m_configuration.GetSparseReordering(); - if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, - m_direction, accum); - } - - - int - LRState:: - ComparePrevScores(const TranslationOption *other) const - { - LexicalReordering* producer = m_configuration.GetScoreProducer(); - const Scores* myScores = m_prevOption->GetLexReorderingScores(producer); - const Scores* yrScores = other->GetLexReorderingScores(producer); - - if(myScores == yrScores) return 0; - - // The pointers are NULL if a phrase pair isn't found in the reordering table. - if(yrScores == NULL) return -1; - if(myScores == NULL) return 1; - - size_t stop = m_offset + m_configuration.GetNumberOfTypes(); - for(size_t i = m_offset; i < stop; i++) - { - if((*myScores)[i] < (*yrScores)[i]) return -1; - if((*myScores)[i] > (*yrScores)[i]) return 1; - } - return 0; +void +LRState:: +CopyScores(ScoreComponentCollection* accum, + const TranslationOption &topt, + const InputType& input, + ReorderingType reoType) const +{ + // don't call this on a bidirectional object + UTIL_THROW_IF2(m_direction != LRModel::Backward && + m_direction != LRModel::Forward, + "Unknown direction: " << m_direction); + + TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward) + ? &topt : m_prevOption); + + LexicalReordering* producer = m_configuration.GetScoreProducer(); + Scores const* cached = relevantOpt->GetLexReorderingScores(producer); + + // The approach here is bizarre! Why create a whole vector and do + // vector addition (acumm->PlusEquals) to update a single value? - UG + size_t off_remote = m_offset + reoType; + size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote; + + UTIL_THROW_IF2(off_remote >= producer->GetNumScoreComponents(), + "offset out of vector bounds!"); + + // look up applicable score from vectore of scores + if(cached) { + Scores scores(producer->GetNumScoreComponents(),0); + scores[off_local ] = (*cached)[off_remote]; + accum->PlusEquals(producer, scores); } - // =========================================================================== - // PHRASE BASED REORDERING STATE - // =========================================================================== - bool PhraseBasedReorderingState::m_useFirstBackwardScore = true; - - PhraseBasedReorderingState:: - PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, - const TranslationOption &topt) - : LRState(prev, topt) - , m_prevRange(topt.GetSourceWordsRange()) - , m_first(false) - { } - - - PhraseBasedReorderingState:: - PhraseBasedReorderingState(const LRModel &config, - LRModel::Direction dir, size_t offset) - : LRState(config, dir, offset) - , m_prevRange(NOT_FOUND,NOT_FOUND) - , m_first(true) - { } - - - int - PhraseBasedReorderingState:: - Compare(const FFState& o) const - { - if (&o == this) return 0; - - const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o); - if (m_prevRange == other->m_prevRange) { - if (m_direction == LRModel::Forward) { - return ComparePrevScores(other->m_prevOption); - } else { - return 0; - } - } else if (m_prevRange < other->m_prevRange) { - return -1; - } - return 1; + // else: use default scores (if specified) + else if (producer->GetHaveDefaultScores()) { + Scores scores(producer->GetNumScoreComponents(),0); + scores[off_local] = producer->GetDefaultScore(off_remote); + accum->PlusEquals(m_configuration.GetScoreProducer(), scores); } + // note: if no default score, no cost - LRState* - PhraseBasedReorderingState:: - Expand(const TranslationOption& topt, const InputType& input, - ScoreComponentCollection* scores) const - { - // const LRModel::ModelType modelType = m_configuration.GetModelType(); - - if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) - { - LRModel const& lrmodel = m_configuration; - WordsRange const cur = topt.GetSourceWordsRange(); - ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur) - : lrmodel.GetOrientation(m_prevRange,cur)); - CopyScores(scores, topt, input, reoType); - } - return new PhraseBasedReorderingState(this, topt); - } + const SparseReordering* sparse = m_configuration.GetSparseReordering(); + if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, + m_direction, accum); +} - /////////////////////////// - //BidirectionalReorderingState +int +LRState:: +ComparePrevScores(const TranslationOption *other) const +{ + LexicalReordering* producer = m_configuration.GetScoreProducer(); + const Scores* myScores = m_prevOption->GetLexReorderingScores(producer); + const Scores* yrScores = other->GetLexReorderingScores(producer); - int - BidirectionalReorderingState:: - Compare(FFState const& o) const - { - if (&o == this) return 0; - - BidirectionalReorderingState const &other - = static_cast<BidirectionalReorderingState const&>(o); + if(myScores == yrScores) return 0; - int cmp = m_backward->Compare(*other.m_backward); - return (cmp < 0) ? -1 : cmp ? 1 : m_forward->Compare(*other.m_forward); - } + // The pointers are NULL if a phrase pair isn't found in the reordering table. + if(yrScores == NULL) return -1; + if(myScores == NULL) return 1; - LRState* - BidirectionalReorderingState:: - Expand(const TranslationOption& topt, const InputType& input, - ScoreComponentCollection* scores) const - { - LRState *newbwd = m_backward->Expand(topt,input, scores); - LRState *newfwd = m_forward->Expand(topt, input, scores); - return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset); + size_t stop = m_offset + m_configuration.GetNumberOfTypes(); + for(size_t i = m_offset; i < stop; i++) { + if((*myScores)[i] < (*yrScores)[i]) return -1; + if((*myScores)[i] > (*yrScores)[i]) return 1; } + return 0; +} + +// =========================================================================== +// PHRASE BASED REORDERING STATE +// =========================================================================== +bool PhraseBasedReorderingState::m_useFirstBackwardScore = true; - /////////////////////////// - //HierarchicalReorderingBackwardState - - HReorderingBackwardState:: - HReorderingBackwardState(const HReorderingBackwardState *prev, - const TranslationOption &topt, - ReorderingStack reoStack) - : LRState(prev, topt), m_reoStack(reoStack) - { } - - HReorderingBackwardState:: - HReorderingBackwardState(const LRModel &config, size_t offset) - : LRState(config, LRModel::Backward, offset) - { } - - - int - HReorderingBackwardState:: - Compare(const FFState& o) const - { - const HReorderingBackwardState& other - = static_cast<const HReorderingBackwardState&>(o); - return m_reoStack.Compare(other.m_reoStack); +PhraseBasedReorderingState:: +PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, + const TranslationOption &topt) + : LRState(prev, topt) + , m_prevRange(topt.GetSourceWordsRange()) + , m_first(false) +{ } + + +PhraseBasedReorderingState:: +PhraseBasedReorderingState(const LRModel &config, + LRModel::Direction dir, size_t offset) + : LRState(config, dir, offset) + , m_prevRange(NOT_FOUND,NOT_FOUND) + , m_first(true) +{ } + + +int +PhraseBasedReorderingState:: +Compare(const FFState& o) const +{ + if (&o == this) return 0; + + const PhraseBasedReorderingState* other = static_cast<const PhraseBasedReorderingState*>(&o); + if (m_prevRange == other->m_prevRange) { + if (m_direction == LRModel::Forward) { + return ComparePrevScores(other->m_prevOption); + } else { + return 0; + } + } else if (m_prevRange < other->m_prevRange) { + return -1; } - - LRState* - HReorderingBackwardState:: - Expand(const TranslationOption& topt, const InputType& input, - ScoreComponentCollection* scores) const - { - HReorderingBackwardState* nextState; - nextState = new HReorderingBackwardState(this, topt, m_reoStack); - WordsRange swrange = topt.GetSourceWordsRange(); - int reoDistance = nextState->m_reoStack.ShiftReduce(swrange); - ReorderingType reoType = m_configuration.GetOrientation(reoDistance); + return 1; +} + +LRState* +PhraseBasedReorderingState:: +Expand(const TranslationOption& topt, const InputType& input, + ScoreComponentCollection* scores) const +{ + // const LRModel::ModelType modelType = m_configuration.GetModelType(); + + if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) { + LRModel const& lrmodel = m_configuration; + WordsRange const cur = topt.GetSourceWordsRange(); + ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur) + : lrmodel.GetOrientation(m_prevRange,cur)); CopyScores(scores, topt, input, reoType); - return nextState; } + return new PhraseBasedReorderingState(this, topt); +} - /////////////////////////// - //HReorderingForwardState - - HReorderingForwardState:: - HReorderingForwardState(const LRModel &config, - size_t size, size_t offset) - : LRState(config, LRModel::Forward, offset) - , m_first(true) - , m_prevRange(NOT_FOUND,NOT_FOUND) - , m_coverage(size) - { } - - HReorderingForwardState:: - HReorderingForwardState(const HReorderingForwardState *prev, - const TranslationOption &topt) - : LRState(prev, topt) - , m_first(false) - , m_prevRange(topt.GetSourceWordsRange()) - , m_coverage(prev->m_coverage) - { - m_coverage.SetValue(topt.GetSourceWordsRange(), true); - } - int - HReorderingForwardState:: - Compare(const FFState& o) const - { - if (&o == this) return 0; - - HReorderingForwardState const& other - = static_cast<HReorderingForwardState const&>(o); - - return ((m_prevRange == other.m_prevRange) - ? ComparePrevScores(other.m_prevOption) - : (m_prevRange < other.m_prevRange) ? -1 : 1); - } +/////////////////////////// +//BidirectionalReorderingState + +int +BidirectionalReorderingState:: +Compare(FFState const& o) const +{ + if (&o == this) return 0; + + BidirectionalReorderingState const &other + = static_cast<BidirectionalReorderingState const&>(o); + + int cmp = m_backward->Compare(*other.m_backward); + return (cmp < 0) ? -1 : cmp ? 1 : m_forward->Compare(*other.m_forward); +} - // For compatibility with the phrase-based reordering model, scoring is one - // step delayed. - // The forward model takes determines orientations heuristically as follows: - // mono: if the next phrase comes after the conditioning phrase and - // - there is a gap to the right of the conditioning phrase, or - // - the next phrase immediately follows it - // swap: if the next phrase goes before the conditioning phrase and - // - there is a gap to the left of the conditioning phrase, or - // - the next phrase immediately precedes it - // dright: if the next phrase follows the conditioning phrase and other - // stuff comes in between - // dleft: if the next phrase precedes the conditioning phrase and other - // stuff comes in between - - LRState* - HReorderingForwardState:: - Expand(TranslationOption const& topt, InputType const& input, - ScoreComponentCollection* scores) const - { - const WordsRange cur = topt.GetSourceWordsRange(); - // keep track of the current coverage ourselves so we don't need the hypothesis - WordsBitmap cov = m_coverage; - cov.SetValue(cur, true); - if (!m_first) - { - LRModel::ReorderingType reoType; - reoType = m_configuration.GetOrientation(m_prevRange,cur,cov); - CopyScores(scores, topt, input, reoType); - } - return new HReorderingForwardState(this, topt); +LRState* +BidirectionalReorderingState:: +Expand(const TranslationOption& topt, const InputType& input, + ScoreComponentCollection* scores) const +{ + LRState *newbwd = m_backward->Expand(topt,input, scores); + LRState *newfwd = m_forward->Expand(topt, input, scores); + return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset); +} + +/////////////////////////// +//HierarchicalReorderingBackwardState + +HReorderingBackwardState:: +HReorderingBackwardState(const HReorderingBackwardState *prev, + const TranslationOption &topt, + ReorderingStack reoStack) + : LRState(prev, topt), m_reoStack(reoStack) +{ } + +HReorderingBackwardState:: +HReorderingBackwardState(const LRModel &config, size_t offset) + : LRState(config, LRModel::Backward, offset) +{ } + + +int +HReorderingBackwardState:: +Compare(const FFState& o) const +{ + const HReorderingBackwardState& other + = static_cast<const HReorderingBackwardState&>(o); + return m_reoStack.Compare(other.m_reoStack); +} + +LRState* +HReorderingBackwardState:: +Expand(const TranslationOption& topt, const InputType& input, + ScoreComponentCollection* scores) const +{ + HReorderingBackwardState* nextState; + nextState = new HReorderingBackwardState(this, topt, m_reoStack); + WordsRange swrange = topt.GetSourceWordsRange(); + int reoDistance = nextState->m_reoStack.ShiftReduce(swrange); + ReorderingType reoType = m_configuration.GetOrientation(reoDistance); + CopyScores(scores, topt, input, reoType); + return nextState; +} + +/////////////////////////// +//HReorderingForwardState + +HReorderingForwardState:: +HReorderingForwardState(const LRModel &config, + size_t size, size_t offset) + : LRState(config, LRModel::Forward, offset) + , m_first(true) + , m_prevRange(NOT_FOUND,NOT_FOUND) + , m_coverage(size) +{ } + +HReorderingForwardState:: +HReorderingForwardState(const HReorderingForwardState *prev, + const TranslationOption &topt) + : LRState(prev, topt) + , m_first(false) + , m_prevRange(topt.GetSourceWordsRange()) + , m_coverage(prev->m_coverage) +{ + m_coverage.SetValue(topt.GetSourceWordsRange(), true); +} + +int +HReorderingForwardState:: +Compare(const FFState& o) const +{ + if (&o == this) return 0; + + HReorderingForwardState const& other + = static_cast<HReorderingForwardState const&>(o); + + return ((m_prevRange == other.m_prevRange) + ? ComparePrevScores(other.m_prevOption) + : (m_prevRange < other.m_prevRange) ? -1 : 1); +} + +// For compatibility with the phrase-based reordering model, scoring is one +// step delayed. +// The forward model takes determines orientations heuristically as follows: +// mono: if the next phrase comes after the conditioning phrase and +// - there is a gap to the right of the conditioning phrase, or +// - the next phrase immediately follows it +// swap: if the next phrase goes before the conditioning phrase and +// - there is a gap to the left of the conditioning phrase, or +// - the next phrase immediately precedes it +// dright: if the next phrase follows the conditioning phrase and other +// stuff comes in between +// dleft: if the next phrase precedes the conditioning phrase and other +// stuff comes in between + +LRState* +HReorderingForwardState:: +Expand(TranslationOption const& topt, InputType const& input, + ScoreComponentCollection* scores) const +{ + const WordsRange cur = topt.GetSourceWordsRange(); + // keep track of the current coverage ourselves so we don't need the hypothesis + WordsBitmap cov = m_coverage; + cov.SetValue(cur, true); + if (!m_first) { + LRModel::ReorderingType reoType; + reoType = m_configuration.GetOrientation(m_prevRange,cur,cov); + CopyScores(scores, topt, input, reoType); } + return new HReorderingForwardState(this, topt); +} } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index 83607a66a..b11eed0cf 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -17,272 +17,284 @@ namespace Moses { - class LRState; - class LexicalReordering; - class SparseReordering; - - //! Factory class for lexical reordering states - class LRModel - { - public: - typedef int ReorderingType; - friend class LexicalReordering; - enum ModelType { Monotonic, MSD, MSLR, LeftRight, None }; - enum Direction { Forward, Backward, Bidirectional }; - enum Condition { F, E, FE }; - - // constants for the different types of reordering - // (correspond to indices in the respective table) - static const ReorderingType M = 0; // monotonic - static const ReorderingType NM = 1; // non-monotonic - static const ReorderingType S = 1; // swap - static const ReorderingType D = 2; // discontinuous - static const ReorderingType DL = 2; // discontinuous, left - static const ReorderingType DR = 3; // discontinuous, right - static const ReorderingType R = 0; // right - static const ReorderingType L = 1; // left - static const ReorderingType MAX = 3; // largest possible - - // determine orientation, depending on model: - - - ReorderingType // for first phrase in phrase-based - GetOrientation(WordsRange const& cur) const; - - ReorderingType // for non-first phrases in phrase-based - GetOrientation(WordsRange const& prev, WordsRange const& cur) const; - - ReorderingType // for HReorderingForwardState - GetOrientation(WordsRange const& prev, WordsRange const& cur, - WordsBitmap const& cov) const; - - ReorderingType // for HReorderingBackwarddState - GetOrientation(int const reoDistance) const; - - LRModel(const std::string &modelType); - - void - ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, - const LexicalReordering* producer); - - LRState* - CreateLRState(const InputType &input) const; - - size_t GetNumberOfTypes() const; - size_t GetNumScoreComponents() const; - void SetAdditionalScoreComponents(size_t number); - - LexicalReordering* - GetScoreProducer() const { return m_scoreProducer; } - - ModelType GetModelType() const { return m_modelType; } - Direction GetDirection() const { return m_direction; } - Condition GetCondition() const { return m_condition; } - - bool - IsPhraseBased() const - { return m_phraseBased; } - - bool - CollapseScores() const - { return m_collapseScores; } - - SparseReordering const* - GetSparseReordering() const - { return m_sparse.get(); } - - private: - void - SetScoreProducer(LexicalReordering* scoreProducer) - { m_scoreProducer = scoreProducer; } - - std::string const& - GetModelString() const - { return m_modelString; } - - std::string m_modelString; - LexicalReordering *m_scoreProducer; - ModelType m_modelType; - bool m_phraseBased; - bool m_collapseScores; - Direction m_direction; - Condition m_condition; - size_t m_additionalScoreComponents; - boost::scoped_ptr<SparseReordering> m_sparse; - }; - - //! Abstract class for lexical reordering model states - class LRState : public FFState - { - public: - - typedef int ReorderingType; - - virtual - int - Compare(const FFState& o) const = 0; - - virtual - LRState* - Expand(const TranslationOption& hypo, const InputType& input, - ScoreComponentCollection* scores) const = 0; - - static - LRState* - CreateLRState(const std::vector<std::string>& config, - LRModel::Direction dir, - const InputType &input); - - protected: - - const LRModel& m_configuration; - - // The following is the true direction of the object, which can be - // Backward or Forward even if the Configuration has Bidirectional. - LRModel::Direction m_direction; - size_t m_offset; - //forward scores are conditioned on prev option, so need to remember it - const TranslationOption *m_prevOption; - - inline - LRState(const LRState *prev, - const TranslationOption &topt) - : m_configuration(prev->m_configuration) - , m_direction(prev->m_direction) - , m_offset(prev->m_offset) - , m_prevOption(&topt) - { } - - inline - LRState(const LRModel &config, - LRModel::Direction dir, - size_t offset) - : m_configuration(config) - , m_direction(dir) - , m_offset(offset) - , m_prevOption(NULL) - { } - - // copy the right scores in the right places, taking into account - // forward/backward, offset, collapse - void - CopyScores(ScoreComponentCollection* scores, - const TranslationOption& topt, - const InputType& input, ReorderingType reoType) const; - - int - ComparePrevScores(const TranslationOption *other) const; - }; - - //! @todo what is this? - class BidirectionalReorderingState - : public LRState - { - private: - const LRState *m_backward; - const LRState *m_forward; - public: - BidirectionalReorderingState(const LRModel &config, - const LRState *bw, - const LRState *fw, size_t offset) - : LRState(config, - LRModel::Bidirectional, - offset) - , m_backward(bw) - , m_forward(fw) - { } - - ~BidirectionalReorderingState() - { - delete m_backward; - delete m_forward; - } - - virtual - int - Compare(const FFState& o) const; - - virtual - LRState* - Expand(const TranslationOption& topt, const InputType& input, - ScoreComponentCollection* scores) const; - }; - - //! State for the standard Moses implementation of lexical reordering models - //! (see Koehn et al, Edinburgh System Description for the 2005 NIST MT - //! Evaluation) - class PhraseBasedReorderingState - : public LRState - { - private: - WordsRange m_prevRange; - bool m_first; - public: - static bool m_useFirstBackwardScore; - PhraseBasedReorderingState(const LRModel &config, - LRModel::Direction dir, - size_t offset); - PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, - const TranslationOption &topt); - - virtual - int - Compare(const FFState& o) const; - - virtual - LRState* - Expand(const TranslationOption& topt,const InputType& input, - ScoreComponentCollection* scores) const; - - ReorderingType GetOrientationTypeMSD(WordsRange currRange) const; - ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const; - ReorderingType GetOrientationTypeMonotonic(WordsRange currRange) const; - ReorderingType GetOrientationTypeLeftRight(WordsRange currRange) const; - }; - - //! State for a hierarchical reordering model (see Galley and Manning, A - //! Simple and Effective Hierarchical Phrase Reordering Model, EMNLP 2008) - //! backward state (conditioned on the previous phrase) - class HReorderingBackwardState : public LRState - { - private: - ReorderingStack m_reoStack; - public: - HReorderingBackwardState(const LRModel &config, size_t offset); - HReorderingBackwardState(const HReorderingBackwardState *prev, - const TranslationOption &topt, - ReorderingStack reoStack); - - virtual int Compare(const FFState& o) const; - virtual LRState* Expand(const TranslationOption& hypo, const InputType& input, - ScoreComponentCollection* scores) const; - - private: - ReorderingType GetOrientationTypeMSD(int reoDistance) const; - ReorderingType GetOrientationTypeMSLR(int reoDistance) const; - ReorderingType GetOrientationTypeMonotonic(int reoDistance) const; - ReorderingType GetOrientationTypeLeftRight(int reoDistance) const; - }; - - - //!forward state (conditioned on the next phrase) - class HReorderingForwardState : public LRState - { - private: - bool m_first; - WordsRange m_prevRange; - WordsBitmap m_coverage; - - public: - HReorderingForwardState(const LRModel &config, size_t sentenceLength, - size_t offset); - HReorderingForwardState(const HReorderingForwardState *prev, - const TranslationOption &topt); - - virtual int Compare(const FFState& o) const; - virtual LRState* Expand(const TranslationOption& hypo, - const InputType& input, - ScoreComponentCollection* scores) const; - }; +class LRState; +class LexicalReordering; +class SparseReordering; + +//! Factory class for lexical reordering states +class LRModel +{ +public: + typedef int ReorderingType; + friend class LexicalReordering; + enum ModelType { Monotonic, MSD, MSLR, LeftRight, None }; + enum Direction { Forward, Backward, Bidirectional }; + enum Condition { F, E, FE }; + + // constants for the different types of reordering + // (correspond to indices in the respective table) + static const ReorderingType M = 0; // monotonic + static const ReorderingType NM = 1; // non-monotonic + static const ReorderingType S = 1; // swap + static const ReorderingType D = 2; // discontinuous + static const ReorderingType DL = 2; // discontinuous, left + static const ReorderingType DR = 3; // discontinuous, right + static const ReorderingType R = 0; // right + static const ReorderingType L = 1; // left + static const ReorderingType MAX = 3; // largest possible + + // determine orientation, depending on model: + + + ReorderingType // for first phrase in phrase-based + GetOrientation(WordsRange const& cur) const; + + ReorderingType // for non-first phrases in phrase-based + GetOrientation(WordsRange const& prev, WordsRange const& cur) const; + + ReorderingType // for HReorderingForwardState + GetOrientation(WordsRange const& prev, WordsRange const& cur, + WordsBitmap const& cov) const; + + ReorderingType // for HReorderingBackwarddState + GetOrientation(int const reoDistance) const; + + LRModel(const std::string &modelType); + + void + ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, + const LexicalReordering* producer); + + LRState* + CreateLRState(const InputType &input) const; + + size_t GetNumberOfTypes() const; + size_t GetNumScoreComponents() const; + void SetAdditionalScoreComponents(size_t number); + + LexicalReordering* + GetScoreProducer() const { + return m_scoreProducer; + } + + ModelType GetModelType() const { + return m_modelType; + } + Direction GetDirection() const { + return m_direction; + } + Condition GetCondition() const { + return m_condition; + } + + bool + IsPhraseBased() const { + return m_phraseBased; + } + + bool + CollapseScores() const { + return m_collapseScores; + } + + SparseReordering const* + GetSparseReordering() const { + return m_sparse.get(); + } + +private: + void + SetScoreProducer(LexicalReordering* scoreProducer) { + m_scoreProducer = scoreProducer; + } + + std::string const& + GetModelString() const { + return m_modelString; + } + + std::string m_modelString; + LexicalReordering *m_scoreProducer; + ModelType m_modelType; + bool m_phraseBased; + bool m_collapseScores; + Direction m_direction; + Condition m_condition; + size_t m_additionalScoreComponents; + boost::scoped_ptr<SparseReordering> m_sparse; +}; + +//! Abstract class for lexical reordering model states +class LRState : public FFState +{ +public: + + typedef int ReorderingType; + + virtual + int + Compare(const FFState& o) const = 0; + + virtual + LRState* + Expand(const TranslationOption& hypo, const InputType& input, + ScoreComponentCollection* scores) const = 0; + + static + LRState* + CreateLRState(const std::vector<std::string>& config, + LRModel::Direction dir, + const InputType &input); + +protected: + + const LRModel& m_configuration; + + // The following is the true direction of the object, which can be + // Backward or Forward even if the Configuration has Bidirectional. + LRModel::Direction m_direction; + size_t m_offset; + //forward scores are conditioned on prev option, so need to remember it + const TranslationOption *m_prevOption; + + inline + LRState(const LRState *prev, + const TranslationOption &topt) + : m_configuration(prev->m_configuration) + , m_direction(prev->m_direction) + , m_offset(prev->m_offset) + , m_prevOption(&topt) + { } + + inline + LRState(const LRModel &config, + LRModel::Direction dir, + size_t offset) + : m_configuration(config) + , m_direction(dir) + , m_offset(offset) + , m_prevOption(NULL) + { } + + // copy the right scores in the right places, taking into account + // forward/backward, offset, collapse + void + CopyScores(ScoreComponentCollection* scores, + const TranslationOption& topt, + const InputType& input, ReorderingType reoType) const; + + int + ComparePrevScores(const TranslationOption *other) const; +}; + +//! @todo what is this? +class BidirectionalReorderingState + : public LRState +{ +private: + const LRState *m_backward; + const LRState *m_forward; +public: + BidirectionalReorderingState(const LRModel &config, + const LRState *bw, + const LRState *fw, size_t offset) + : LRState(config, + LRModel::Bidirectional, + offset) + , m_backward(bw) + , m_forward(fw) + { } + + ~BidirectionalReorderingState() { + delete m_backward; + delete m_forward; + } + + virtual + int + Compare(const FFState& o) const; + + virtual + LRState* + Expand(const TranslationOption& topt, const InputType& input, + ScoreComponentCollection* scores) const; +}; + +//! State for the standard Moses implementation of lexical reordering models +//! (see Koehn et al, Edinburgh System Description for the 2005 NIST MT +//! Evaluation) +class PhraseBasedReorderingState + : public LRState +{ +private: + WordsRange m_prevRange; + bool m_first; +public: + static bool m_useFirstBackwardScore; + PhraseBasedReorderingState(const LRModel &config, + LRModel::Direction dir, + size_t offset); + PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, + const TranslationOption &topt); + + virtual + int + Compare(const FFState& o) const; + + virtual + LRState* + Expand(const TranslationOption& topt,const InputType& input, + ScoreComponentCollection* scores) const; + + ReorderingType GetOrientationTypeMSD(WordsRange currRange) const; + ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const; + ReorderingType GetOrientationTypeMonotonic(WordsRange currRange) const; + ReorderingType GetOrientationTypeLeftRight(WordsRange currRange) const; +}; + +//! State for a hierarchical reordering model (see Galley and Manning, A +//! Simple and Effective Hierarchical Phrase Reordering Model, EMNLP 2008) +//! backward state (conditioned on the previous phrase) +class HReorderingBackwardState : public LRState +{ +private: + ReorderingStack m_reoStack; +public: + HReorderingBackwardState(const LRModel &config, size_t offset); + HReorderingBackwardState(const HReorderingBackwardState *prev, + const TranslationOption &topt, + ReorderingStack reoStack); + + virtual int Compare(const FFState& o) const; + virtual LRState* Expand(const TranslationOption& hypo, const InputType& input, + ScoreComponentCollection* scores) const; + +private: + ReorderingType GetOrientationTypeMSD(int reoDistance) const; + ReorderingType GetOrientationTypeMSLR(int reoDistance) const; + ReorderingType GetOrientationTypeMonotonic(int reoDistance) const; + ReorderingType GetOrientationTypeLeftRight(int reoDistance) const; +}; + + +//!forward state (conditioned on the next phrase) +class HReorderingForwardState : public LRState +{ +private: + bool m_first; + WordsRange m_prevRange; + WordsBitmap m_coverage; + +public: + HReorderingForwardState(const LRModel &config, size_t sentenceLength, + size_t offset); + HReorderingForwardState(const HReorderingForwardState *prev, + const TranslationOption &topt); + + virtual int Compare(const FFState& o) const; + virtual LRState* Expand(const TranslationOption& hypo, + const InputType& input, + ScoreComponentCollection* scores) const; +}; } diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp index ec79163a7..387874c34 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp @@ -15,597 +15,577 @@ namespace Moses { - //cleans str of leading and tailing spaces - std::string auxClearString(const std::string& str) - { - int i = 0, j = str.size()-1; - while(i <= j) { - if(' ' != str[i]) { - break; - } else { - ++i; - } +//cleans str of leading and tailing spaces +std::string auxClearString(const std::string& str) +{ + int i = 0, j = str.size()-1; + while(i <= j) { + if(' ' != str[i]) { + break; + } else { + ++i; } - while(j >= i) { - if(' ' != str[j]) { - break; - } else { - --j; - } + } + while(j >= i) { + if(' ' != str[j]) { + break; + } else { + --j; } - return str.substr(i,j-i+1); } + return str.substr(i,j-i+1); +} - void auxAppend(IPhrase& head, const IPhrase& tail) - { - head.reserve(head.size()+tail.size()); - for(size_t i = 0; i < tail.size(); ++i) { - head.push_back(tail[i]); - } +void auxAppend(IPhrase& head, const IPhrase& tail) +{ + head.reserve(head.size()+tail.size()); + for(size_t i = 0; i < tail.size(); ++i) { + head.push_back(tail[i]); } +} - LexicalReorderingTable* - LexicalReorderingTable:: - LoadAvailable(const std::string& filePath, - const FactorList& f_factors, - const FactorList& e_factors, - const FactorList& c_factors) - { - //decide use Compact or Tree or Memory table +LexicalReorderingTable* +LexicalReorderingTable:: +LoadAvailable(const std::string& filePath, + const FactorList& f_factors, + const FactorList& e_factors, + const FactorList& c_factors) +{ + //decide use Compact or Tree or Memory table #ifdef HAVE_CMPH - LexicalReorderingTable *compactLexr = NULL; - compactLexr = LexicalReorderingTableCompact::CheckAndLoad(filePath + ".minlexr", f_factors, e_factors, c_factors); - if(compactLexr) - return compactLexr; + LexicalReorderingTable *compactLexr = NULL; + compactLexr = LexicalReorderingTableCompact::CheckAndLoad(filePath + ".minlexr", f_factors, e_factors, c_factors); + if(compactLexr) + return compactLexr; #endif - LexicalReorderingTable* ret; - if (FileExists(filePath+".binlexr.idx") ) - ret = new LexicalReorderingTableTree(filePath, f_factors, - e_factors, c_factors); - else - ret = new LexicalReorderingTableMemory(filePath, f_factors, - e_factors, c_factors); - return ret; - } + LexicalReorderingTable* ret; + if (FileExists(filePath+".binlexr.idx") ) + ret = new LexicalReorderingTableTree(filePath, f_factors, + e_factors, c_factors); + else + ret = new LexicalReorderingTableMemory(filePath, f_factors, + e_factors, c_factors); + return ret; +} - LexicalReorderingTableMemory:: - LexicalReorderingTableMemory(const std::string& filePath, - const std::vector<FactorType>& f_factors, - const std::vector<FactorType>& e_factors, - const std::vector<FactorType>& c_factors) - : LexicalReorderingTable(f_factors, e_factors, c_factors) - { - LoadFromFile(filePath); - } - - LexicalReorderingTableMemory:: - ~LexicalReorderingTableMemory() { } - - std::vector<float> - LexicalReorderingTableMemory::GetScore(const Phrase& f, - const Phrase& e, - const Phrase& c) - { - //rather complicated because of const can't use []... as [] might enter new things into std::map - //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large - TableType::const_iterator r; - std::string key; - if(0 == c.GetSize()) { - key = MakeKey(f,e,c); +LexicalReorderingTableMemory:: +LexicalReorderingTableMemory(const std::string& filePath, + const std::vector<FactorType>& f_factors, + const std::vector<FactorType>& e_factors, + const std::vector<FactorType>& c_factors) + : LexicalReorderingTable(f_factors, e_factors, c_factors) +{ + LoadFromFile(filePath); +} + +LexicalReorderingTableMemory:: +~LexicalReorderingTableMemory() { } + +std::vector<float> +LexicalReorderingTableMemory::GetScore(const Phrase& f, + const Phrase& e, + const Phrase& c) +{ + //rather complicated because of const can't use []... as [] might enter new things into std::map + //also can't have to be careful with words range if c is empty can't use c.GetSize()-1 will underflow and be large + TableType::const_iterator r; + std::string key; + if(0 == c.GetSize()) { + key = MakeKey(f,e,c); + r = m_Table.find(key); + if(m_Table.end() != r) { + return r->second; + } + } else { + //right try from large to smaller context + for(size_t i = 0; i <= c.GetSize(); ++i) { + Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1))); + key = MakeKey(f,e,sub_c); r = m_Table.find(key); if(m_Table.end() != r) { - return r->second; - } - } else { - //right try from large to smaller context - for(size_t i = 0; i <= c.GetSize(); ++i) { - Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1))); - key = MakeKey(f,e,sub_c); - r = m_Table.find(key); - if(m_Table.end() != r) { - return r->second; - } + return r->second; } } - return Scores(); } + return Scores(); +} - void - LexicalReorderingTableMemory:: - DbgDump(std::ostream* out) const - { - TableType::const_iterator i; - for(i = m_Table.begin(); i != m_Table.end(); ++i) - { - *out << " key: '" << i->first << "' score: "; - *out << "(num scores: " << (i->second).size() << ")"; - for(size_t j = 0; j < (i->second).size(); ++j) - *out << (i->second)[j] << " "; - - *out << "\n"; - } - }; - - std::string - LexicalReorderingTableMemory::MakeKey(const Phrase& f, - const Phrase& e, - const Phrase& c) const - { - return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)), - auxClearString(e.GetStringRep(m_FactorsE)), - auxClearString(c.GetStringRep(m_FactorsC))); +void +LexicalReorderingTableMemory:: +DbgDump(std::ostream* out) const +{ + TableType::const_iterator i; + for(i = m_Table.begin(); i != m_Table.end(); ++i) { + *out << " key: '" << i->first << "' score: "; + *out << "(num scores: " << (i->second).size() << ")"; + for(size_t j = 0; j < (i->second).size(); ++j) + *out << (i->second)[j] << " "; + + *out << "\n"; } +}; + +std::string +LexicalReorderingTableMemory::MakeKey(const Phrase& f, + const Phrase& e, + const Phrase& c) const +{ + return MakeKey(auxClearString(f.GetStringRep(m_FactorsF)), + auxClearString(e.GetStringRep(m_FactorsE)), + auxClearString(c.GetStringRep(m_FactorsC))); +} - std::string - LexicalReorderingTableMemory::MakeKey(const std::string& f, - const std::string& e, - const std::string& c) const - { - std::string key; - if(!f.empty()) key += f; - if(!m_FactorsE.empty()) { if(!key.empty()) { key += "|||"; } key += e; } - if(!m_FactorsC.empty()) { if(!key.empty()) { key += "|||"; } key += c; } - return key; +std::string +LexicalReorderingTableMemory::MakeKey(const std::string& f, + const std::string& e, + const std::string& c) const +{ + std::string key; + if(!f.empty()) key += f; + if(!m_FactorsE.empty()) { + if(!key.empty()) { + key += "|||"; + } + key += e; + } + if(!m_FactorsC.empty()) { + if(!key.empty()) { + key += "|||"; + } + key += c; + } + return key; +} + +void +LexicalReorderingTableMemory:: +LoadFromFile(const std::string& filePath) +{ + std::string fileName = filePath; + if(!FileExists(fileName) && FileExists(fileName+".gz")) + fileName += ".gz"; + + InputFileStream file(fileName); + std::string line(""), key(""); + int numScores = -1; + std::cerr << "Loading table into memory..."; + while(!getline(file, line).eof()) { + std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||"); + int t = 0 ; + std::string f(""),e(""),c(""); + + if(!m_FactorsF.empty()) { + //there should be something for f + f = auxClearString(tokens.at(t)); + ++t; + } + if(!m_FactorsE.empty()) { + //there should be something for e + e = auxClearString(tokens.at(t)); + ++t; + } + if(!m_FactorsC.empty()) { + //there should be something for c + c = auxClearString(tokens.at(t)); + ++t; + } + //last token are the probs + std::vector<float> p = Scan<float>(Tokenize(tokens.at(t))); + //sanity check: all lines must have equall number of probs + if(-1 == numScores) { + numScores = (int)p.size(); //set in first line + } + if((int)p.size() != numScores) { + TRACE_ERR( "found inconsistent number of probabilities... found " + << p.size() << " expected " << numScores << std::endl); + exit(0); + } + std::transform(p.begin(),p.end(),p.begin(),TransformScore); + std::transform(p.begin(),p.end(),p.begin(),FloorScore); + //save it all into our map + m_Table[MakeKey(f,e,c)] = p; + } + std::cerr << "done.\n"; +} + +LexicalReorderingTableTree:: +LexicalReorderingTableTree(const std::string& filePath, + const std::vector<FactorType>& f_factors, + const std::vector<FactorType>& e_factors, + const std::vector<FactorType>& c_factors) + : LexicalReorderingTable(f_factors, e_factors, c_factors) + , m_UseCache(false) + , m_FilePath(filePath) +{ + m_Table.reset(new PrefixTreeMap()); + m_Table->Read(m_FilePath+".binlexr"); +} + +LexicalReorderingTableTree:: +~LexicalReorderingTableTree() +{ } + +Scores +LexicalReorderingTableTree:: +GetScore(const Phrase& f, const Phrase& e, const Phrase& c) +{ + if((!m_FactorsF.empty() && 0 == f.GetSize()) + || (!m_FactorsE.empty() && 0 == e.GetSize())) { + //NOTE: no check for c as c might be empty, e.g. start of sentence + //not a proper key + // phi: commented out, since e may be empty (drop-unknown) + //std::cerr << "Not a proper key!\n"; + return Scores(); } - void - LexicalReorderingTableMemory:: - LoadFromFile(const std::string& filePath) - { - std::string fileName = filePath; - if(!FileExists(fileName) && FileExists(fileName+".gz")) - fileName += ".gz"; - - InputFileStream file(fileName); - std::string line(""), key(""); - int numScores = -1; - std::cerr << "Loading table into memory..."; - while(!getline(file, line).eof()) - { - std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||"); - int t = 0 ; - std::string f(""),e(""),c(""); - - if(!m_FactorsF.empty()) { - //there should be something for f - f = auxClearString(tokens.at(t)); - ++t; - } - if(!m_FactorsE.empty()) { - //there should be something for e - e = auxClearString(tokens.at(t)); - ++t; - } - if(!m_FactorsC.empty()) { - //there should be something for c - c = auxClearString(tokens.at(t)); - ++t; - } - //last token are the probs - std::vector<float> p = Scan<float>(Tokenize(tokens.at(t))); - //sanity check: all lines must have equall number of probs - if(-1 == numScores) { - numScores = (int)p.size(); //set in first line - } - if((int)p.size() != numScores) { - TRACE_ERR( "found inconsistent number of probabilities... found " - << p.size() << " expected " << numScores << std::endl); - exit(0); - } - std::transform(p.begin(),p.end(),p.begin(),TransformScore); - std::transform(p.begin(),p.end(),p.begin(),FloorScore); - //save it all into our map - m_Table[MakeKey(f,e,c)] = p; + CacheType::iterator i; + + if(m_UseCache) { + std::pair<CacheType::iterator, bool> r; + r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates())); + if(!r.second) return auxFindScoreForContext((r.first)->second, c); + i = r.first; + } else if((i = m_Cache.find(MakeCacheKey(f,e))) != m_Cache.end()) + // although we might not be caching now, cache might be none empty! + return auxFindScoreForContext(i->second, c); + + // not in cache => go to file... + Candidates cands; + m_Table->GetCandidates(MakeTableKey(f,e), &cands); + if(cands.empty()) return Scores(); + if(m_UseCache) i->second = cands; + + if(m_FactorsC.empty()) { + UTIL_THROW_IF2(1 != cands.size(), "Error"); + return cands[0].GetScore(0); + } else return auxFindScoreForContext(cands, c); +}; + +Scores +LexicalReorderingTableTree:: +auxFindScoreForContext(const Candidates& cands, const Phrase& context) +{ + if(m_FactorsC.empty()) { + UTIL_THROW_IF2(cands.size() > 1, "Error"); + return (cands.size() == 1) ? cands[0].GetScore(0) : Scores(); + } else { + std::vector<std::string> cvec; + for(size_t i = 0; i < context.GetSize(); ++i) + cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false)); + + IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId); + IPhrase sub_c; + IPhrase::iterator start = c.begin(); + for(size_t j = 0; j <= context.GetSize(); ++j, ++start) { + sub_c.assign(start, c.end()); + for(size_t cand = 0; cand < cands.size(); ++cand) { + IPhrase p = cands[cand].GetPhrase(0); + if(cands[cand].GetPhrase(0) == sub_c) + return cands[cand].GetScore(0); } - std::cerr << "done.\n"; + } + return Scores(); } - - LexicalReorderingTableTree:: - LexicalReorderingTableTree(const std::string& filePath, - const std::vector<FactorType>& f_factors, - const std::vector<FactorType>& e_factors, - const std::vector<FactorType>& c_factors) - : LexicalReorderingTable(f_factors, e_factors, c_factors) - , m_UseCache(false) - , m_FilePath(filePath) - { +} + +void +LexicalReorderingTableTree:: +InitializeForInput(const InputType& input) +{ + ClearCache(); + if(ConfusionNet const* cn = dynamic_cast<ConfusionNet const*>(&input)) { + Cache(*cn); + } else if (dynamic_cast<Sentence const*>(&input)) { + // Cache(*s); ... this just takes up too much memory, we cache elsewhere + DisableCache(); + } + if (!m_Table.get()) { + //load thread specific table. m_Table.reset(new PrefixTreeMap()); m_Table->Read(m_FilePath+".binlexr"); } - - LexicalReorderingTableTree:: - ~LexicalReorderingTableTree() - { } - - Scores - LexicalReorderingTableTree:: - GetScore(const Phrase& f, const Phrase& e, const Phrase& c) - { - if((!m_FactorsF.empty() && 0 == f.GetSize()) - || (!m_FactorsE.empty() && 0 == e.GetSize())) - { - //NOTE: no check for c as c might be empty, e.g. start of sentence - //not a proper key - // phi: commented out, since e may be empty (drop-unknown) - //std::cerr << "Not a proper key!\n"; - return Scores(); +}; + +bool +LexicalReorderingTableTree:: +Create(std::istream& inFile, const std::string& outFileName) +{ + typedef PrefixTreeSA<LabelId,OFF_T> PSA; + + std::string + line, + ofn(outFileName+".binlexr.srctree"), + oft(outFileName+".binlexr.tgtdata"), + ofi(outFileName+".binlexr.idx"), + ofsv(outFileName+".binlexr.voc0"), + oftv(outFileName+".binlexr.voc1"); + + FILE *os = fOpen(ofn.c_str(),"wb"); + FILE *ot = fOpen(oft.c_str(),"wb"); + + PSA *psa = new PSA; + PSA::setDefault(InvalidOffT); + WordVoc* voc[3]; + + LabelId currFirstWord = InvalidLabelId; + IPhrase currKey; + + Candidates cands; + std::vector<OFF_T> vo; + size_t lnc = 0; + size_t numTokens = 0; + size_t numKeyTokens = 0; + while(getline(inFile, line)) { + ++lnc; + if(0 == lnc % 10000) TRACE_ERR("."); + IPhrase key; + Scores score; + + std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||"); + std::string w; + if(1 == lnc) { + //do some init stuff in the first line + numTokens = tokens.size(); + if(tokens.size() == 2) { + // f ||| score + numKeyTokens = 1; + voc[0] = new WordVoc(); + voc[1] = 0; + } else if(3 == tokens.size() || 4 == tokens.size()) { + //either f ||| e ||| score or f ||| e ||| c ||| score + numKeyTokens = 2; + voc[0] = new WordVoc(); //f voc + voc[1] = new WordVoc(); //e voc + voc[2] = voc[1]; //c & e share voc } + } else { + //sanity check ALL lines must have same number of tokens + UTIL_THROW_IF2(numTokens != tokens.size(), + "Lines do not have the same number of tokens"); + } + size_t phrase = 0; + for(; phrase < numKeyTokens; ++phrase) { + //conditioned on more than just f... need ||| + if(phrase >=1) key.push_back(PrefixTreeMap::MagicWord); + std::istringstream is(tokens[phrase]); + while(is >> w) key.push_back(voc[phrase]->add(w)); + } - CacheType::iterator i; + //collect all non key phrases, i.e. c + std::vector<IPhrase> tgt_phrases; + tgt_phrases.resize(numTokens - numKeyTokens - 1); + for(size_t j = 0; j < tgt_phrases.size(); ++j, ++phrase) { + std::istringstream is(tokens[numKeyTokens + j]); + while(is >> w) tgt_phrases[j].push_back(voc[phrase]->add(w)); + } - if(m_UseCache) - { - std::pair<CacheType::iterator, bool> r; - r = m_Cache.insert(std::make_pair(MakeCacheKey(f,e),Candidates())); - if(!r.second) return auxFindScoreForContext((r.first)->second, c); - i = r.first; - } - else if((i = m_Cache.find(MakeCacheKey(f,e))) != m_Cache.end()) - // although we might not be caching now, cache might be none empty! - return auxFindScoreForContext(i->second, c); + //last token is score + std::istringstream is(tokens[numTokens-1]); + while(is >> w) score.push_back(atof(w.c_str())); - // not in cache => go to file... - Candidates cands; - m_Table->GetCandidates(MakeTableKey(f,e), &cands); - if(cands.empty()) return Scores(); - if(m_UseCache) i->second = cands; - - if(m_FactorsC.empty()) - { - UTIL_THROW_IF2(1 != cands.size(), "Error"); - return cands[0].GetScore(0); - } - else return auxFindScoreForContext(cands, c); - }; - - Scores - LexicalReorderingTableTree:: - auxFindScoreForContext(const Candidates& cands, const Phrase& context) - { - if(m_FactorsC.empty()) - { - UTIL_THROW_IF2(cands.size() > 1, "Error"); - return (cands.size() == 1) ? cands[0].GetScore(0) : Scores(); - } - else - { - std::vector<std::string> cvec; - for(size_t i = 0; i < context.GetSize(); ++i) - cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false)); - - IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId); - IPhrase sub_c; - IPhrase::iterator start = c.begin(); - for(size_t j = 0; j <= context.GetSize(); ++j, ++start) - { - sub_c.assign(start, c.end()); - for(size_t cand = 0; cand < cands.size(); ++cand) - { - IPhrase p = cands[cand].GetPhrase(0); - if(cands[cand].GetPhrase(0) == sub_c) - return cands[cand].GetScore(0); - } - } - return Scores(); - } - } + //transform score now... + std::transform(score.begin(),score.end(),score.begin(),TransformScore); + std::transform(score.begin(),score.end(),score.begin(),FloorScore); + std::vector<Scores> scores; + scores.push_back(score); - void - LexicalReorderingTableTree:: - InitializeForInput(const InputType& input) - { - ClearCache(); - if(ConfusionNet const* cn = dynamic_cast<ConfusionNet const*>(&input)) - { - Cache(*cn); - } - else if (dynamic_cast<Sentence const*>(&input)) - { - // Cache(*s); ... this just takes up too much memory, we cache elsewhere - DisableCache(); - } - if (!m_Table.get()) - { - //load thread specific table. - m_Table.reset(new PrefixTreeMap()); - m_Table->Read(m_FilePath+".binlexr"); + if(key.empty()) { + TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n"); + continue; + } + + //first time inits + if(currFirstWord == InvalidLabelId) currFirstWord = key[0]; + if(currKey.empty()) { + currKey = key; + //insert key into tree + UTIL_THROW_IF2(psa == NULL, "Object not yet created"); + PSA::Data& d = psa->insert(key); + if(d == InvalidOffT) d = fTell(ot); + else { + TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" + << lnc << "): '" << line << "\n"); + return false; } - }; - - bool - LexicalReorderingTableTree:: - Create(std::istream& inFile, const std::string& outFileName) - { - typedef PrefixTreeSA<LabelId,OFF_T> PSA; - - std::string - line, - ofn(outFileName+".binlexr.srctree"), - oft(outFileName+".binlexr.tgtdata"), - ofi(outFileName+".binlexr.idx"), - ofsv(outFileName+".binlexr.voc0"), - oftv(outFileName+".binlexr.voc1"); - - FILE *os = fOpen(ofn.c_str(),"wb"); - FILE *ot = fOpen(oft.c_str(),"wb"); - - PSA *psa = new PSA; - PSA::setDefault(InvalidOffT); - WordVoc* voc[3]; - - LabelId currFirstWord = InvalidLabelId; - IPhrase currKey; - - Candidates cands; - std::vector<OFF_T> vo; - size_t lnc = 0; - size_t numTokens = 0; - size_t numKeyTokens = 0; - while(getline(inFile, line)) - { - ++lnc; - if(0 == lnc % 10000) TRACE_ERR("."); - IPhrase key; - Scores score; - - std::vector<std::string> tokens = TokenizeMultiCharSeparator(line, "|||"); - std::string w; - if(1 == lnc) - { - //do some init stuff in the first line - numTokens = tokens.size(); - if(tokens.size() == 2) - { // f ||| score - numKeyTokens = 1; - voc[0] = new WordVoc(); - voc[1] = 0; - } - else if(3 == tokens.size() || 4 == tokens.size()) - { //either f ||| e ||| score or f ||| e ||| c ||| score - numKeyTokens = 2; - voc[0] = new WordVoc(); //f voc - voc[1] = new WordVoc(); //e voc - voc[2] = voc[1]; //c & e share voc - } - } - else - { - //sanity check ALL lines must have same number of tokens - UTIL_THROW_IF2(numTokens != tokens.size(), - "Lines do not have the same number of tokens"); - } - size_t phrase = 0; - for(; phrase < numKeyTokens; ++phrase) - { - //conditioned on more than just f... need ||| - if(phrase >=1) key.push_back(PrefixTreeMap::MagicWord); - std::istringstream is(tokens[phrase]); - while(is >> w) key.push_back(voc[phrase]->add(w)); - } - - //collect all non key phrases, i.e. c - std::vector<IPhrase> tgt_phrases; - tgt_phrases.resize(numTokens - numKeyTokens - 1); - for(size_t j = 0; j < tgt_phrases.size(); ++j, ++phrase) - { - std::istringstream is(tokens[numKeyTokens + j]); - while(is >> w) tgt_phrases[j].push_back(voc[phrase]->add(w)); - } - - //last token is score - std::istringstream is(tokens[numTokens-1]); - while(is >> w) score.push_back(atof(w.c_str())); - - //transform score now... - std::transform(score.begin(),score.end(),score.begin(),TransformScore); - std::transform(score.begin(),score.end(),score.begin(),FloorScore); - std::vector<Scores> scores; - scores.push_back(score); - - if(key.empty()) { - TRACE_ERR("WARNING: empty source phrase in line '"<<line<<"'\n"); - continue; - } - - //first time inits - if(currFirstWord == InvalidLabelId) currFirstWord = key[0]; - if(currKey.empty()) - { - currKey = key; - //insert key into tree - UTIL_THROW_IF2(psa == NULL, "Object not yet created"); - PSA::Data& d = psa->insert(key); - if(d == InvalidOffT) d = fTell(ot); - else - { - TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" - << lnc << "): '" << line << "\n"); - return false; - } - } - - if(currKey != key) { - //ok new key - currKey = key; - //a) write cands for old key - cands.writeBin(ot); - cands.clear(); - //b) check if we need to move on to new tree root - if(key[0] != currFirstWord) { - // write key prefix tree to file and clear - PTF pf; - if(currFirstWord >= vo.size()) - vo.resize(currFirstWord+1,InvalidOffT); - vo[currFirstWord] = fTell(os); - pf.create(*psa, os); - delete psa; - psa = new PSA; - currFirstWord = key[0]; - } - - // c) insert key into tree - UTIL_THROW_IF2(psa == NULL, "Object not yet created"); - PSA::Data& d = psa->insert(key); - if(d == InvalidOffT) d = fTell(ot); - else - { - TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" - << lnc << "): '" << line << "\n"); - return false; - } - } - cands.push_back(GenericCandidate(tgt_phrases, scores)); + } + + if(currKey != key) { + //ok new key + currKey = key; + //a) write cands for old key + cands.writeBin(ot); + cands.clear(); + //b) check if we need to move on to new tree root + if(key[0] != currFirstWord) { + // write key prefix tree to file and clear + PTF pf; + if(currFirstWord >= vo.size()) + vo.resize(currFirstWord+1,InvalidOffT); + vo[currFirstWord] = fTell(os); + pf.create(*psa, os); + delete psa; + psa = new PSA; + currFirstWord = key[0]; } - if (lnc == 0) - { - TRACE_ERR("ERROR: empty lexicalised reordering file\n" << std::endl); - return false; + + // c) insert key into tree + UTIL_THROW_IF2(psa == NULL, "Object not yet created"); + PSA::Data& d = psa->insert(key); + if(d == InvalidOffT) d = fTell(ot); + else { + TRACE_ERR("ERROR: source phrase already inserted (A)!\nline(" + << lnc << "): '" << line << "\n"); + return false; } - cands.writeBin(ot); - cands.clear(); - - PTF pf; - if(currFirstWord >= vo.size()) - vo.resize(currFirstWord+1,InvalidOffT); - vo[currFirstWord] = fTell(os); - pf.create(*psa,os); - delete psa; - psa=0; - - fClose(os); - fClose(ot); - FILE *oi = fOpen(ofi.c_str(),"wb"); - fWriteVector(oi,vo); - fClose(oi); - - if(voc[0]) { voc[0]->Write(ofsv); delete voc[0]; } - if(voc[1]) { voc[1]->Write(oftv); delete voc[1]; } - return true; + } + cands.push_back(GenericCandidate(tgt_phrases, scores)); } - - std::string - LexicalReorderingTableTree:: - MakeCacheKey(const Phrase& f, const Phrase& e) const - { - std::string key; - if(!m_FactorsF.empty()) - key += auxClearString(f.GetStringRep(m_FactorsF)); - - if(!m_FactorsE.empty()) { - if(!key.empty()) { key += "|||"; } - key += auxClearString(e.GetStringRep(m_FactorsE)); + if (lnc == 0) { + TRACE_ERR("ERROR: empty lexicalised reordering file\n" << std::endl); + return false; + } + cands.writeBin(ot); + cands.clear(); + + PTF pf; + if(currFirstWord >= vo.size()) + vo.resize(currFirstWord+1,InvalidOffT); + vo[currFirstWord] = fTell(os); + pf.create(*psa,os); + delete psa; + psa=0; + + fClose(os); + fClose(ot); + FILE *oi = fOpen(ofi.c_str(),"wb"); + fWriteVector(oi,vo); + fClose(oi); + + if(voc[0]) { + voc[0]->Write(ofsv); + delete voc[0]; + } + if(voc[1]) { + voc[1]->Write(oftv); + delete voc[1]; + } + return true; +} + +std::string +LexicalReorderingTableTree:: +MakeCacheKey(const Phrase& f, const Phrase& e) const +{ + std::string key; + if(!m_FactorsF.empty()) + key += auxClearString(f.GetStringRep(m_FactorsF)); + + if(!m_FactorsE.empty()) { + if(!key.empty()) { + key += "|||"; } - return key; - }; + key += auxClearString(e.GetStringRep(m_FactorsE)); + } + return key; +}; - IPhrase - LexicalReorderingTableTree:: - MakeTableKey(const Phrase& f, const Phrase& e) const - { - IPhrase key; - std::vector<std::string> keyPart; - if(!m_FactorsF.empty()) - { - for(size_t i = 0; i < f.GetSize(); ++i) - keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false)); - auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId)); - keyPart.clear(); - } - if(!m_FactorsE.empty()) - { - if(!key.empty()) key.push_back(PrefixTreeMap::MagicWord); - for(size_t i = 0; i < e.GetSize(); ++i) - keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false)); - auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId)); - } - return key; - }; - - - struct State - { - State(PPimp* t, const std::string& p) - : pos(t), path(p) { } - - PPimp* pos; - std::string path; - }; - - void - LexicalReorderingTableTree:: - auxCacheForSrcPhrase(const Phrase& f) - { - if(m_FactorsE.empty()) - { - //f is all of key... - Candidates cands; - m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands); - m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands; - } - else - { - ObjectPool<PPimp> pool; - PPimp* pPos = m_Table->GetRoot(); - - // 1) goto subtree for f - for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i) - pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId); - - if(pPos && pPos->isValid()) - pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord); - - if(!pPos || !pPos->isValid()) - return; - - //2) explore whole subtree depth first & cache - std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||"; - - std::vector<State> stack; - stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),"")); - Candidates cands; - while(!stack.empty()) - { - if(stack.back().pos->isValid()) - { - LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx); - std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId); - //cache this - m_Table->GetCandidates(*stack.back().pos,&cands); - if(!cands.empty()) m_Cache[cache_key + auxClearString(next_path)] = cands; - cands.clear(); - PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0)); - ++stack.back().pos->idx; - stack.push_back(State(next_pos,next_path)); - } - else stack.pop_back(); - } - } +IPhrase +LexicalReorderingTableTree:: +MakeTableKey(const Phrase& f, const Phrase& e) const +{ + IPhrase key; + std::vector<std::string> keyPart; + if(!m_FactorsF.empty()) { + for(size_t i = 0; i < f.GetSize(); ++i) + keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false)); + auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId)); + keyPart.clear(); } - - void - LexicalReorderingTableTree:: - Cache(const ConfusionNet& /*input*/) - { - return; + if(!m_FactorsE.empty()) { + if(!key.empty()) key.push_back(PrefixTreeMap::MagicWord); + for(size_t i = 0; i < e.GetSize(); ++i) + keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false)); + auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId)); } - - void - LexicalReorderingTableTree:: - Cache(const Sentence& input) - { - //only works with sentences... - size_t prev_cache_size = m_Cache.size(); - size_t max_phrase_length = input.GetSize(); - for(size_t len = 0; len <= max_phrase_length; ++len) - { - for(size_t start = 0; start+len <= input.GetSize(); ++start) - { - Phrase f = input.GetSubString(WordsRange(start, start+len)); - auxCacheForSrcPhrase(f); - } - } - std::cerr << "Cached " << m_Cache.size() - prev_cache_size - << " new primary reordering table keys\n"; + return key; +}; + + +struct State { + State(PPimp* t, const std::string& p) + : pos(t), path(p) { } + + PPimp* pos; + std::string path; +}; + +void +LexicalReorderingTableTree:: +auxCacheForSrcPhrase(const Phrase& f) +{ + if(m_FactorsE.empty()) { + //f is all of key... + Candidates cands; + m_Table->GetCandidates(MakeTableKey(f,Phrase(ARRAY_SIZE_INCR)),&cands); + m_Cache[MakeCacheKey(f,Phrase(ARRAY_SIZE_INCR))] = cands; + } else { + ObjectPool<PPimp> pool; + PPimp* pPos = m_Table->GetRoot(); + + // 1) goto subtree for f + for(size_t i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i) + pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId); + + if(pPos && pPos->isValid()) + pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord); + + if(!pPos || !pPos->isValid()) + return; + + //2) explore whole subtree depth first & cache + std::string cache_key = auxClearString(f.GetStringRep(m_FactorsF)) + "|||"; + + std::vector<State> stack; + stack.push_back(State(pool.get(PPimp(pPos->ptr()->getPtr(pPos->idx),0,0)),"")); + Candidates cands; + while(!stack.empty()) { + if(stack.back().pos->isValid()) { + LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx); + std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId); + //cache this + m_Table->GetCandidates(*stack.back().pos,&cands); + if(!cands.empty()) m_Cache[cache_key + auxClearString(next_path)] = cands; + cands.clear(); + PPimp* next_pos = pool.get(PPimp(stack.back().pos->ptr()->getPtr(stack.back().pos->idx),0,0)); + ++stack.back().pos->idx; + stack.push_back(State(next_pos,next_path)); + } else stack.pop_back(); + } + } +} + +void +LexicalReorderingTableTree:: +Cache(const ConfusionNet& /*input*/) +{ + return; +} + +void +LexicalReorderingTableTree:: +Cache(const Sentence& input) +{ + //only works with sentences... + size_t prev_cache_size = m_Cache.size(); + size_t max_phrase_length = input.GetSize(); + for(size_t len = 0; len <= max_phrase_length; ++len) { + for(size_t start = 0; start+len <= input.GetSize(); ++start) { + Phrase f = input.GetSubString(WordsRange(start, start+len)); + auxCacheForSrcPhrase(f); + } } + std::cerr << "Cached " << m_Cache.size() - prev_cache_size + << " new primary reordering table keys\n"; +} } diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.h b/moses/FF/LexicalReordering/LexicalReorderingTable.h index f4eceb72e..6c8e7e03c 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingTable.h +++ b/moses/FF/LexicalReordering/LexicalReorderingTable.h @@ -22,174 +22,191 @@ namespace Moses { - class Phrase; - class InputType; - class ConfusionNet; - - //! additional types - class LexicalReorderingTable - { - public: - LexicalReorderingTable(const FactorList& f_factors, - const FactorList& e_factors, - const FactorList& c_factors) - : m_FactorsF(f_factors) - , m_FactorsE(e_factors) - , m_FactorsC(c_factors) { } - - virtual - ~LexicalReorderingTable() { } - - public: - static - LexicalReorderingTable* - LoadAvailable(const std::string& filePath, - const FactorList& f_factors, - const FactorList& e_factors, - const FactorList& c_factors); - - virtual - Scores - GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0; - - virtual - void - InitializeForInput(const InputType&) { /* override for on-demand loading */ }; - - virtual - void - InitializeForInputPhrase(const Phrase&) { } - - - const FactorList& GetFFactorMask() const { return m_FactorsF; } - const FactorList& GetEFactorMask() const { return m_FactorsE; } - const FactorList& GetCFactorMask() const { return m_FactorsC; } - - virtual - void - DbgDump(std::ostream* out) const { *out << "Overwrite in subclass...\n"; }; - // why is this not a pure virtual function? - UG - - protected: - FactorList m_FactorsF; - FactorList m_FactorsE; - FactorList m_FactorsC; +class Phrase; +class InputType; +class ConfusionNet; + +//! additional types +class LexicalReorderingTable +{ +public: + LexicalReorderingTable(const FactorList& f_factors, + const FactorList& e_factors, + const FactorList& c_factors) + : m_FactorsF(f_factors) + , m_FactorsE(e_factors) + , m_FactorsC(c_factors) { } + + virtual + ~LexicalReorderingTable() { } + +public: + static + LexicalReorderingTable* + LoadAvailable(const std::string& filePath, + const FactorList& f_factors, + const FactorList& e_factors, + const FactorList& c_factors); + + virtual + Scores + GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0; + + virtual + void + InitializeForInput(const InputType&) { + /* override for on-demand loading */ }; - //! @todo what is this? - class LexicalReorderingTableMemory - : public LexicalReorderingTable - { - typedef std::map< std::string, std::vector<float> > TableType; - TableType m_Table; - - //implements LexicalReorderingTable saving all scores in one large std::map<> thingy - //to be used for non binary tables... uses a LOT of memory - public: - LexicalReorderingTableMemory(const std::string& filePath, - const std::vector<FactorType>& f_factors, - const std::vector<FactorType>& e_factors, - const std::vector<FactorType>& c_factors); - - virtual - ~LexicalReorderingTableMemory(); - - public: - virtual - std::vector<float> - GetScore(const Phrase& f, const Phrase& e, const Phrase& c); - - void - DbgDump(std::ostream* out) const; - - private: - - std::string - MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const; - - std::string - MakeKey(const std::string& f, const std::string& e, const std::string& c) const; - - void - LoadFromFile(const std::string& filePath); + virtual + void + InitializeForInputPhrase(const Phrase&) { } + + + const FactorList& GetFFactorMask() const { + return m_FactorsF; + } + const FactorList& GetEFactorMask() const { + return m_FactorsE; + } + const FactorList& GetCFactorMask() const { + return m_FactorsC; + } + + virtual + void + DbgDump(std::ostream* out) const { + *out << "Overwrite in subclass...\n"; }; - - class LexicalReorderingTableTree - : public LexicalReorderingTable - { - //implements LexicalReorderingTable using the crafty PDT code... + // why is this not a pure virtual function? - UG + +protected: + FactorList m_FactorsF; + FactorList m_FactorsE; + FactorList m_FactorsC; +}; + +//! @todo what is this? +class LexicalReorderingTableMemory + : public LexicalReorderingTable +{ + typedef std::map< std::string, std::vector<float> > TableType; + TableType m_Table; + + //implements LexicalReorderingTable saving all scores in one large std::map<> thingy + //to be used for non binary tables... uses a LOT of memory +public: + LexicalReorderingTableMemory(const std::string& filePath, + const std::vector<FactorType>& f_factors, + const std::vector<FactorType>& e_factors, + const std::vector<FactorType>& c_factors); + + virtual + ~LexicalReorderingTableMemory(); + +public: + virtual + std::vector<float> + GetScore(const Phrase& f, const Phrase& e, const Phrase& c); + + void + DbgDump(std::ostream* out) const; - typedef std::map< std::string, Candidates > CacheType; +private: + + std::string + MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const; + + std::string + MakeKey(const std::string& f, const std::string& e, const std::string& c) const; + + void + LoadFromFile(const std::string& filePath); +}; + +class LexicalReorderingTableTree + : public LexicalReorderingTable +{ + //implements LexicalReorderingTable using the crafty PDT code... + + typedef std::map< std::string, Candidates > CacheType; #ifdef WITH_THREADS - typedef boost::thread_specific_ptr<PrefixTreeMap> TableType; + typedef boost::thread_specific_ptr<PrefixTreeMap> TableType; #else - typedef std::auto_ptr<PrefixTreeMap> TableType; + typedef std::auto_ptr<PrefixTreeMap> TableType; #endif - static const int SourceVocId = 0; - static const int TargetVocId = 1; - - bool m_UseCache; - std::string m_FilePath; - CacheType m_Cache; - TableType m_Table; - - public: - - static - bool - Create(std::istream& inFile, const std::string& outFileName); - - LexicalReorderingTableTree(const std::string& filePath, - const std::vector<FactorType>& f_factors, - const std::vector<FactorType>& e_factors, - const std::vector<FactorType>& c_factors); - - ~LexicalReorderingTableTree(); - - bool IsCacheEnabled() const { return m_UseCache; }; - void EnableCache() { m_UseCache = true; }; - void DisableCache() { m_UseCache = false; }; - void ClearCache() { if (m_UseCache) m_Cache.clear(); }; - - virtual - std::vector<float> - GetScore(const Phrase& f, const Phrase& e, const Phrase& c); - - virtual - void - InitializeForInput(const InputType& input); - - virtual - void - InitializeForInputPhrase(const Phrase& f) - { - ClearCache(); - auxCacheForSrcPhrase(f); - } - - - private: - std::string - MakeCacheKey(const Phrase& f, const Phrase& e) const; - - IPhrase - MakeTableKey(const Phrase& f, const Phrase& e) const; - - void - Cache(const ConfusionNet& input); - - void - Cache(const Sentence& input); - - void - auxCacheForSrcPhrase(const Phrase& f); - - Scores - auxFindScoreForContext(const Candidates& cands, const Phrase& contex); - + static const int SourceVocId = 0; + static const int TargetVocId = 1; + + bool m_UseCache; + std::string m_FilePath; + CacheType m_Cache; + TableType m_Table; + +public: + + static + bool + Create(std::istream& inFile, const std::string& outFileName); + + LexicalReorderingTableTree(const std::string& filePath, + const std::vector<FactorType>& f_factors, + const std::vector<FactorType>& e_factors, + const std::vector<FactorType>& c_factors); + + ~LexicalReorderingTableTree(); + + bool IsCacheEnabled() const { + return m_UseCache; + }; + void EnableCache() { + m_UseCache = true; + }; + void DisableCache() { + m_UseCache = false; }; + void ClearCache() { + if (m_UseCache) m_Cache.clear(); + }; + + virtual + std::vector<float> + GetScore(const Phrase& f, const Phrase& e, const Phrase& c); + + virtual + void + InitializeForInput(const InputType& input); + + virtual + void + InitializeForInputPhrase(const Phrase& f) { + ClearCache(); + auxCacheForSrcPhrase(f); + } + + +private: + std::string + MakeCacheKey(const Phrase& f, const Phrase& e) const; + + IPhrase + MakeTableKey(const Phrase& f, const Phrase& e) const; + + void + Cache(const ConfusionNet& input); + + void + Cache(const Sentence& input); + + void + auxCacheForSrcPhrase(const Phrase& f); + + Scores + auxFindScoreForContext(const Candidates& cands, const Phrase& contex); + +}; } diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 22eca8520..54f314574 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -199,7 +199,7 @@ void SparseReordering::CopyScores( ScoreComponentCollection* scores) const { if (m_useBetween && direction == LRModel::Backward && - (reoType == LRModel::D || reoType == LRModel::DL || reoType == LRModel::DR)){ + (reoType == LRModel::D || reoType == LRModel::DL || reoType == LRModel::DR)) { size_t gapStart, gapEnd; //NB: Using a static cast for speed, but could be nasty if //using non-sentence input diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp index d2d4f881c..76ceb971c 100644 --- a/moses/FF/PhraseOrientationFeature.cpp +++ b/moses/FF/PhraseOrientationFeature.cpp @@ -53,13 +53,13 @@ void PhraseOrientationFeature::SetParameter(const std::string& key, const std::s } else if (key == "distinguishStates") { m_distinguishStates = Scan<bool>(value); } else if (key == "sparseWord") { - m_useSparseWord = Scan<bool>(value); + m_useSparseWord = Scan<bool>(value); } else if (key == "sparseNT") { - m_useSparseNT = Scan<bool>(value); + m_useSparseNT = Scan<bool>(value); } else if (key == "targetWordList") { - m_filenameTargetWordList = value; + m_filenameTargetWordList = value; } else if (key == "sourceWordList") { - m_filenameSourceWordList = value; + m_filenameSourceWordList = value; } else { StatefulFeatureFunction::SetParameter(key, value); } @@ -80,7 +80,7 @@ void PhraseOrientationFeature::Load() void PhraseOrientationFeature::LoadWordList(const std::string& filename, - boost::unordered_set<const Factor*>& list) + boost::unordered_set<const Factor*>& list) { FEATUREVERBOSE(2, "Loading word list from file " << filename << std::endl); FactorCollection &factorCollection = FactorCollection::Instance(); @@ -97,10 +97,10 @@ void PhraseOrientationFeature::LoadWordList(const std::string& filename, } -void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, - const TargetPhrase &targetPhrase, - ScoreComponentCollection &scoreBreakdown, - ScoreComponentCollection &estimatedFutureScore) const +void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, + const TargetPhrase &targetPhrase, + ScoreComponentCollection &scoreBreakdown, + ScoreComponentCollection &estimatedFutureScore) const { targetPhrase.SetRuleSource(source); @@ -116,9 +116,9 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source, } -void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty, - ScoreComponentCollection &scoreBreakdown, - bool subtract) const +void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty, + ScoreComponentCollection &scoreBreakdown, + bool subtract) const { size_t ffScoreIndex = scoreBreakdown.GetIndexes(this).first; @@ -129,10 +129,10 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o size_t heuristicScoreIndexL2R = GetHeuristicScoreIndex(scoresL2R, 0); if (subtract) { - scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R, + scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R, -scoresL2R[heuristicScoreIndexL2R]); } else { - scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R, + scoreBreakdown.PlusEquals(ffScoreIndex+heuristicScoreIndexL2R, scoresL2R[heuristicScoreIndexL2R]); } @@ -143,10 +143,10 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o size_t heuristicScoreIndexR2L = GetHeuristicScoreIndex(scoresR2L, m_offsetR2LScores); if (subtract) { - scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L, + scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L, -scoresR2L[heuristicScoreIndexR2L]); } else { - scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L, + scoreBreakdown.PlusEquals(ffScoreIndex+m_offsetR2LScores+heuristicScoreIndexR2L, scoresR2L[heuristicScoreIndexR2L]); } } @@ -221,7 +221,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright() << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft() << std::endl); - + LookaheadScore(orientationPhraseProperty, *accumulator, true); const PhraseOrientationFeatureState* prevState = @@ -490,8 +490,8 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied( size_t PhraseOrientationFeature::GetHeuristicScoreIndex(const std::vector<float>& scores, - size_t weightsVectorOffset, - const std::bitset<3> possibleFutureOrientations) const + size_t weightsVectorOffset, + const std::bitset<3> possibleFutureOrientations) const { std::vector<float> weightedScores; for ( size_t i=0; i<3; ++i ) { @@ -532,8 +532,8 @@ void PhraseOrientationFeature::LeftBoundaryL2RScoreRecursive(int featureID, const PhraseOrientationFeatureState *state, const std::bitset<3> orientation, std::vector<float>& newScores, - ScoreComponentCollection* scoreBreakdown) const - // TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores) + ScoreComponentCollection* scoreBreakdown) const +// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores) { if (state->m_leftBoundaryIsSet) { const std::string* recursiveOrientationString; @@ -593,8 +593,8 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID, const PhraseOrientationFeatureState *state, const std::bitset<3> orientation, std::vector<float>& newScores, - ScoreComponentCollection* scoreBreakdown) const - // TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores) + ScoreComponentCollection* scoreBreakdown) const +// TODO: passing both newScores and scoreBreakdown seems redundant (scoreBreakdown needed for sparse scores) { if (state->m_rightBoundaryIsSet) { const std::string* recursiveOrientationString; @@ -651,8 +651,8 @@ void PhraseOrientationFeature::RightBoundaryR2LScoreRecursive(int featureID, void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo, - ScoreComponentCollection* scoreBreakdown, - const std::string* o) const + ScoreComponentCollection* scoreBreakdown, + const std::string* o) const { // target word @@ -686,7 +686,7 @@ void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo, } // source word - + WordsRange sourceSpan = hypo->GetCurrSourceRange(); const InputType& input = hypo->GetManager().GetSource(); const Sentence& sourceSentence = static_cast<const Sentence&>(input); @@ -710,8 +710,8 @@ void PhraseOrientationFeature::SparseWordL2RScore(const ChartHypothesis* hypo, void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo, - ScoreComponentCollection* scoreBreakdown, - const std::string* o) const + ScoreComponentCollection* scoreBreakdown, + const std::string* o) const { // target word @@ -745,7 +745,7 @@ void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo, } // source word - + WordsRange sourceSpan = hypo->GetCurrSourceRange(); const InputType& input = hypo->GetManager().GetSource(); const Sentence& sourceSentence = static_cast<const Sentence&>(input); @@ -769,8 +769,8 @@ void PhraseOrientationFeature::SparseWordR2LScore(const ChartHypothesis* hypo, void PhraseOrientationFeature::SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol, - ScoreComponentCollection* scoreBreakdown, - const std::string* o) const + ScoreComponentCollection* scoreBreakdown, + const std::string* o) const { if ( nonTerminalSymbol != m_glueTargetLHS ) { const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string(); @@ -783,8 +783,8 @@ void PhraseOrientationFeature::SparseNonTerminalL2RScore(const Factor* nonTermin void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol, - ScoreComponentCollection* scoreBreakdown, - const std::string* o) const + ScoreComponentCollection* scoreBreakdown, + const std::string* o) const { if ( nonTerminalSymbol != m_glueTargetLHS ) { const std::string& nonTerminalString = nonTerminalSymbol->GetString().as_string(); diff --git a/moses/FF/PhraseOrientationFeature.h b/moses/FF/PhraseOrientationFeature.h index 9abbe9a8e..0ad566632 100644 --- a/moses/FF/PhraseOrientationFeature.h +++ b/moses/FF/PhraseOrientationFeature.h @@ -62,7 +62,7 @@ public: void SetRightBoundaryR2L(const std::vector<float> &scores, size_t heuristicScoreIndex, std::bitset<3> &possibleFutureOrientations, - const Factor* rightBoundaryNonTerminalSymbol, + const Factor* rightBoundaryNonTerminalSymbol, const PhraseOrientationFeatureState* prevState) { for (size_t i=0; i<3; ++i) { m_rightBoundaryNonTerminalR2LScores[i] = scores[i]; @@ -177,7 +177,7 @@ protected: for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) { // compare only for possible future orientations // (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks) - if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) { + if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) { if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i]) { return 1; } @@ -238,7 +238,7 @@ protected: for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) { // compare only for possible future orientations // (possible future orientations of state and otherState are the same at this point due to the previous two conditional blocks) - if ( state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) { + if ( state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) { if (state.m_rightBoundaryNonTerminalR2LScores[i] > otherState.m_rightBoundaryNonTerminalR2LScores[i]) { return 1; } @@ -314,7 +314,7 @@ public: } void SetParameter(const std::string& key, const std::string& value); - + void Load(); void EvaluateInIsolation(const Phrase &source @@ -353,12 +353,12 @@ protected: void LoadWordList(const std::string& filename, boost::unordered_set<const Factor*>& list); - void LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty, - ScoreComponentCollection &scoreBreakdown, + void LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty, + ScoreComponentCollection &scoreBreakdown, bool subtract=false) const; size_t GetHeuristicScoreIndex(const std::vector<float>& scores, - size_t weightsVectorOffset, + size_t weightsVectorOffset, const std::bitset<3> possibleFutureOrientations = 0x7) const; void LeftBoundaryL2RScoreRecursive(int featureID, diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp index af8e89a3a..655b34ae2 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp @@ -30,10 +30,30 @@ SoftSourceSyntacticConstraintsFeature::SoftSourceSyntacticConstraintsFeature(con ReadParameters(); VERBOSE(1, " Done."); VERBOSE(1, " Config:"); - VERBOSE(1, " Log probabilities"); if ( m_useLogprobs ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); } - VERBOSE(1, " Sparse scores"); if ( m_useSparse ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); } - VERBOSE(1, " Core labels"); if ( m_useCoreSourceLabels ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); } - VERBOSE(1, " No mismatches"); if ( m_noMismatches ) { VERBOSE(1, " active."); } else { VERBOSE(1, " inactive."); } + VERBOSE(1, " Log probabilities"); + if ( m_useLogprobs ) { + VERBOSE(1, " active."); + } else { + VERBOSE(1, " inactive."); + } + VERBOSE(1, " Sparse scores"); + if ( m_useSparse ) { + VERBOSE(1, " active."); + } else { + VERBOSE(1, " inactive."); + } + VERBOSE(1, " Core labels"); + if ( m_useCoreSourceLabels ) { + VERBOSE(1, " active."); + } else { + VERBOSE(1, " inactive."); + } + VERBOSE(1, " No mismatches"); + if ( m_noMismatches ) { + VERBOSE(1, " active."); + } else { + VERBOSE(1, " inactive."); + } VERBOSE(1, std::endl); } @@ -50,9 +70,9 @@ void SoftSourceSyntacticConstraintsFeature::SetParameter(const std::string& key, } else if (key == "noMismatches") { m_noMismatches = Scan<bool>(value); // for a hard constraint, allow no mismatches (also set: weights 1 0 0 0 0 0, tuneable=false) } else if (key == "logProbabilities") { - m_useLogprobs = Scan<bool>(value); + m_useLogprobs = Scan<bool>(value); } else if (key == "sparse") { - m_useSparse = Scan<bool>(value); + m_useSparse = Scan<bool>(value); } else { StatelessFeatureFunction::SetParameter(key, value); } @@ -146,8 +166,8 @@ void SoftSourceSyntacticConstraintsFeature::LoadCoreSourceLabelSet() LoadLabelSet(m_coreSourceLabelSetFile, m_coreSourceLabels); } -void SoftSourceSyntacticConstraintsFeature::LoadLabelSet(std::string &filename, - boost::unordered_set<size_t> &labelSet) +void SoftSourceSyntacticConstraintsFeature::LoadLabelSet(std::string &filename, + boost::unordered_set<size_t> &labelSet) { FEATUREVERBOSE(2, "Loading core source label set from file " << m_coreSourceLabelSetFile << std::endl); InputFileStream inFile(filename); diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.h b/moses/FF/SoftSourceSyntacticConstraintsFeature.h index e73993df1..550f432a6 100644 --- a/moses/FF/SoftSourceSyntacticConstraintsFeature.h +++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.h @@ -30,7 +30,7 @@ public: } void SetParameter(const std::string& key, const std::string& value); - + void Load(); void EvaluateInIsolation(const Phrase &source diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h index f54f3a746..c5364fd0d 100644 --- a/moses/FF/StatefulFeatureFunction.h +++ b/moses/FF/StatefulFeatureFunction.h @@ -17,9 +17,8 @@ class StatefulFeatureFunction: public FeatureFunction static std::vector<const StatefulFeatureFunction*> m_statefulFFs; public: - static const std::vector<const StatefulFeatureFunction*>& - GetStatefulFeatureFunctions() - { + static const std::vector<const StatefulFeatureFunction*>& + GetStatefulFeatureFunctions() { return m_statefulFFs; } diff --git a/moses/FF/UnalignedWordCountFeature.cpp b/moses/FF/UnalignedWordCountFeature.cpp index 83a2ac0c3..9f0fe10db 100644 --- a/moses/FF/UnalignedWordCountFeature.cpp +++ b/moses/FF/UnalignedWordCountFeature.cpp @@ -32,8 +32,7 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source std::vector<bool> alignedSource(sourceLength, false); std::vector<bool> alignedTarget(targetLength, false); - for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); ++alignmentPoint) - { + for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); ++alignmentPoint) { alignedSource[ alignmentPoint->first ] = true; alignedTarget[ alignmentPoint->second ] = true; } diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h index 9be44c8b6..8b7330440 100644 --- a/moses/FF/VW/VW.h +++ b/moses/FF/VW/VW.h @@ -157,8 +157,8 @@ public: // optionally update translation options using leave-one-out std::vector<bool> keep = (m_leaveOneOut.size() > 0) - ? LeaveOneOut(translationOptionList) - : std::vector<bool>(translationOptionList.size(), true); + ? LeaveOneOut(translationOptionList) + : std::vector<bool>(translationOptionList.size(), true); std::vector<float> losses(translationOptionList.size()); std::vector<float>::iterator iterLoss; @@ -187,7 +187,7 @@ public: const std::vector<VWFeatureBase*>& targetFeatures = VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription()); - for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin(), iterKeep = keep.begin() ; + for(iterTransOpt = translationOptionList.begin(), iterLoss = losses.begin(), iterKeep = keep.begin() ; iterTransOpt != translationOptionList.end() ; ++iterTransOpt, ++iterLoss) { if (! *iterKeep) @@ -369,7 +369,7 @@ private: float sourceRawCount = 0.0; const float ONE = 1.0001; // I don't understand floating point numbers - + std::vector<bool> keepOpt; TranslationOptionList::const_iterator iterTransOpt; @@ -426,7 +426,7 @@ private: std::string m_vwOptions; // optionally contains feature name of a phrase table where we recompute scores with leaving one out - std::string m_leaveOneOut; + std::string m_leaveOneOut; Discriminative::Normalizer *m_normalizer = NULL; TLSClassifier *m_tlsClassifier; |