diff options
author | phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-09-28 22:36:38 +0400 |
---|---|---|
committer | phkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230> | 2006-09-28 22:36:38 +0400 |
commit | 3e7e4570b527282a12415ddf3a0908ebf2425dde (patch) | |
tree | 86b711da04e9b3c7505e9479ee06d64fedab11dd /moses | |
parent | ef42ad791e432545add95e0b8a6a7bef80f52ba8 (diff) |
bug fix to lexicalized reordering model
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@834 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rwxr-xr-x | moses/src/Hypothesis.cpp | 36 | ||||
-rwxr-xr-x | moses/src/Hypothesis.h | 18 | ||||
-rwxr-xr-x | moses/src/HypothesisCollection.cpp | 14 | ||||
-rwxr-xr-x | moses/src/HypothesisCollection.h | 12 | ||||
-rwxr-xr-x | moses/src/LexicalReordering.cpp | 268 | ||||
-rwxr-xr-x | moses/src/LexicalReordering.h | 40 | ||||
-rwxr-xr-x | moses/src/StaticData.cpp | 54 | ||||
-rwxr-xr-x | moses/src/StaticData.h | 5 | ||||
-rwxr-xr-x | moses/src/TypeDef.h | 2 | ||||
-rwxr-xr-x | moses/src/WordsBitmap.cpp | 2 | ||||
-rwxr-xr-x | moses/src/WordsBitmap.h | 2 |
11 files changed, 249 insertions, 204 deletions
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp index 0365fa61d..0e946aa5f 100755 --- a/moses/src/Hypothesis.cpp +++ b/moses/src/Hypothesis.cpp @@ -55,7 +55,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget) , _lmstats(0) { // used for initial seeding of trans process // initialize scores - _hash_computed = false; + //_hash_computed = false; s_HypothesesCreated = 1; ResetScore(); } @@ -85,7 +85,7 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran // that this hypothesis has already translated! assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange)); - _hash_computed = false; + //_hash_computed = false; m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true); m_wordDeleted = transOpt.IsDeletionOption(); m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown()); @@ -160,24 +160,36 @@ Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &em return new(ptr) Hypothesis(m_source, emptyTarget); } -void Hypothesis::GenerateNGramCompareHash() const -{ - _hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137); - _hash_computed = true; - vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedReprentation(); - _hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash); -} - +//void Hypothesis::GenerateNGramCompareHash() const +//{ +// _hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137); +// _hash_computed = true; +// vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedRepresentation(); +// _hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash); +//} + +/** check, if two hypothesis can be recombined. + this is actually a sorting function that allows us to + keep an ordered list of hypotheses. This makes recombination + much quicker. +*/ int Hypothesis::NGramCompare(const Hypothesis &compare) const { // -1 = this < compare // +1 = this > compare // 0 = this ==compare if (m_languageModelStates < compare.m_languageModelStates) return -1; if (m_languageModelStates > compare.m_languageModelStates) return 1; + if (m_sourceCompleted.GetCompressedRepresentation() < compare.m_sourceCompleted.GetCompressedRepresentation()) return -1; + if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1; + if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1; + if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1; + if (! StaticData::Instance()->GetSourceStartPosMattersForRecombination()) return 0; + if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1; + if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1; return 0; } -/** - * Calculates the overall language model score by combining the scores + +/** Calculates the overall language model score by combining the scores * of language models generated for each of the factors. Because the factors * represent a variety of tag sets, and because factors with smaller tag sets * (such as POS instead of words) allow us to calculate richer statistics, we diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h index 56bc1a0fe..df1a423e0 100755 --- a/moses/src/Hypothesis.h +++ b/moses/src/Hypothesis.h @@ -83,9 +83,9 @@ protected: void CalcDistortionScore(); //TODO: add appropriate arguments to score calculator - void GenerateNGramCompareHash() const; - mutable size_t _hash; - mutable bool _hash_computed; + // void GenerateNGramCompareHash() const; + // mutable size_t _hash; + // mutable bool _hash_computed; public: static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total @@ -205,12 +205,12 @@ public: int NGramCompare(const Hypothesis &compare) const; - inline size_t hash() const - { - if (_hash_computed) return _hash; - GenerateNGramCompareHash(); - return _hash; - } + // inline size_t hash() const + // { + // if (_hash_computed) return _hash; + // GenerateNGramCompareHash(); + // return _hash; + // } void ToStream(std::ostream& out) const { diff --git a/moses/src/HypothesisCollection.cpp b/moses/src/HypothesisCollection.cpp index 4569b77e0..9012f22c6 100755 --- a/moses/src/HypothesisCollection.cpp +++ b/moses/src/HypothesisCollection.cpp @@ -102,7 +102,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo) Hypothesis *hypoExisting = *iter; if (hypo->GetTotalScore() > hypoExisting->GetTotalScore()) { // incoming hypo is better than the one we have - VERBOSE(3,"better than matching hyp, recombining, "); + VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, "); if (m_nBestIsEnabled) { hypo->AddArc(hypoExisting); Detach(iter); @@ -114,7 +114,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo) } else { // already storing the best hypo. discard current hypo - VERBOSE(3,"worse than matching hyp, recombining" << std::endl) + VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl) if (m_nBestIsEnabled) { (*iter)->AddArc(hypo); } else { @@ -183,6 +183,16 @@ void HypothesisCollection::PruneToSize(size_t newSize) } VERBOSE(3,", pruned to size " << size() << endl); + IFVERBOSE(3) { + cerr << "stack now contains: "; + for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) + { + Hypothesis *hypo = *iter; + cerr << hypo->GetId() << " (" << hypo->GetTotalScore() << ") "; + } + cerr << endl; + } + // set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack m_worstScore = scoreThreshold; // cerr << "Heap contains " << bestScores.size() << " items" << endl; diff --git a/moses/src/HypothesisCollection.h b/moses/src/HypothesisCollection.h index efd3ab257..236ce1599 100755 --- a/moses/src/HypothesisCollection.h +++ b/moses/src/HypothesisCollection.h @@ -66,12 +66,12 @@ struct HypothesisRecombinationComparer } }; -struct HypothesisRecombinationHasher -{ - size_t operator()(const Hypothesis* hypo) const { - return hypo->hash(); - } -}; +//struct HypothesisRecombinationHasher +//{ +// size_t operator()(const Hypothesis* hypo) const { +// return hypo->hash(); +// } +//}; /** Stack for instances of Hypothesis, includes functions for pruning. */ class HypothesisCollection diff --git a/moses/src/LexicalReordering.cpp b/moses/src/LexicalReordering.cpp index 9de9ab90c..bdef84786 100755 --- a/moses/src/LexicalReordering.cpp +++ b/moses/src/LexicalReordering.cpp @@ -7,45 +7,56 @@ #include <algorithm> #include "LexicalReordering.h" #include "InputFileStream.h" -#include "DistortionOrientation.h" #include "StaticData.h" #include "Util.h" using namespace std; -/* - * Load the file pointed to by filename; set up the table according to - * the orientation and condition parameters. Direction will be used - * later for computing the score. - */ +/** Load the file pointed to by filename; set up the table according to + * the orientation and condition parameters. Direction will be used + * later for computing the score. + * \param filename file that contains the table + * \param orientation orientation as defined in DistortionOrientationType (monotone/msd) + * \param direction direction as defined in LexReorderType (forward/backward/bidirectional) + * \param condition either conditioned on foreign or foreign+english + * \param weights weight setting for this model + * \param input input factors + * \param output output factors + */ LexicalReordering::LexicalReordering(const std::string &filename, int orientation, int direction, int condition, const std::vector<float>& weights, vector<FactorType> input, vector<FactorType> output) : - m_orientation(orientation), m_condition(condition), m_numberscores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output) + m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output) { //add score producer const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this); //manage the weights by SetWeightsForScoreProducer method of static data. if(direction == LexReorderType::Bidirectional) { + m_direction.push_back(LexReorderType::Backward); // this order is important m_direction.push_back(LexReorderType::Forward); - m_direction.push_back(LexReorderType::Backward); } else { m_direction.push_back(direction); } + // set number of orientations + if( orientation == DistortionOrientationType::Monotone) { + m_numOrientationTypes = 2; + } + else if ( orientation == DistortionOrientationType::Msd) { + m_numOrientationTypes = 3; + } const_cast<StaticData*>(StaticData::Instance())->SetWeightsForScoreProducer(this, weights); // Load the file LoadFile(); -// PrintTable(); + // PrintTable(); } -/* - * Loads the file into a map. - */ +/** Loads the orientation file into a map + */ void LexicalReordering::LoadFile() { InputFileStream inFile(m_filename); @@ -73,14 +84,11 @@ void LexicalReordering::LoadFile() probs = Scan<float>(Tokenize(tokens[F_PROBS])); } - if (m_orientation == DistortionOrientationType::Monotone) - { - assert(probs.size() == MONO_NUM_PROBS); // 2 backward, 2 forward - } - else - { - assert(probs.size() == MSD_NUM_PROBS); // 3 backward, 3 forward - } + if (probs.size() != m_direction.size() * m_numOrientationTypes) { + TRACE_ERR("found " << probs.size() << " probabilities, expected " + << m_direction.size() * m_numOrientationTypes << endl); + exit(0); + } std::vector<float> scv(probs.size()); std::transform(probs.begin(),probs.end(),probs.begin(),TransformScore); m_orientation_table[key] = probs; @@ -88,9 +96,8 @@ void LexicalReordering::LoadFile() inFile.Close(); } -/* - * Print the table in a readable format. - */ +/** print the table in a readable format (not used at this point) + */ void LexicalReordering::PrintTable() { // iterate over map @@ -113,140 +120,141 @@ void LexicalReordering::PrintTable() } } -std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis) +/** compute the orientation given a hypothesis + */ +int LexicalReordering::GetOrientation(const Hypothesis *curr_hypothesis) { - std::vector<float> score(m_numberscores, 0); - vector<float> val; - for(unsigned int i=0; i < m_direction.size(); i++) + const Hypothesis *prevHypo = curr_hypothesis->GetPrevHypo(); + + const WordsRange &currSourceRange = curr_hypothesis->GetCurrSourceWordsRange(); + size_t curr_source_start = currSourceRange.GetStartPos(); + size_t curr_source_end = currSourceRange.GetEndPos(); + + //if there's no previous source... + if(prevHypo->GetId() == 0){ + if (curr_source_start == 0) + { + return ORIENTATION_MONOTONE; + } + else { + return ORIENTATION_DISCONTINUOUS; + } + } + + + const WordsRange &prevSourceRange = prevHypo->GetCurrSourceWordsRange(); + size_t prev_source_start = prevSourceRange.GetStartPos(); + size_t prev_source_end = prevSourceRange.GetEndPos(); + if(prev_source_end==curr_source_start-1) { - int direction = m_direction[i]; - int orientation = DistortionOrientation::GetOrientation(hypothesis, direction); - if(m_condition==LexReorderType::Fe) + return ORIENTATION_MONOTONE; + } + // distinguish between monotone, swap, discontinuous + else if(m_orientation==DistortionOrientationType::Msd) + { + if(prev_source_start==curr_source_end+1) { - //this key string is F+'|||'+E from the hypothesis - val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors) - +"||| " - +hypothesis->GetTargetPhraseStringRep(m_targetFactors)]; + return ORIENTATION_SWAP; } else { - //this key string is F from the hypothesis - val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)]; + return ORIENTATION_DISCONTINUOUS; } - if(val.size()> 0) - { - if(m_orientation==DistortionOrientationType::Msd) - { - if(direction==LexReorderType::Backward) - { - if(orientation==DistortionOrientationType::MONO) - { - score[BACK_M] = val[BACK_M]; - } - else if(orientation==DistortionOrientationType::SWAP) - { - score[BACK_S] = val[BACK_S]; - } - else - { - score[BACK_D] = val[BACK_D]; - } - - } - else - { - //if we only have forward scores (no backward scores) in the table, - //then forward scores have no offset so we can use the indices of the backwards scores - if(orientation==DistortionOrientationType::MONO) - { - if(m_numberscores>3) - { - score[FOR_M] = val[FOR_M]; - } - else - { - score[BACK_M] = val[BACK_M]; - } - } - else if(orientation==DistortionOrientationType::SWAP) + } + // only distinguish between monotone, non monotone + else + { + return ORIENTATION_NON_MONOTONE; + } +} + +/** calculate the score(s) for a hypothesis + */ +std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis) +{ + std::vector<float> score(m_numScores, 0); + for(unsigned int i=0; i < m_direction.size(); i++) // backward, forward, or both + { + vector<float> val; // we will score the matching probability here + + // FIRST, get probability distribution + + int direction = m_direction[i]; // either backward or forward + + // no score, if we would have to compute the forward score from the initial hypothesis + if (direction == LexReorderType::Backward || hypothesis->GetPrevHypo()->GetId() != 0) { + + if (direction == LexReorderType::Backward) { + // conditioned on both foreign and English + if(m_condition==LexReorderType::Fe) { - if(m_numberscores>3) - { - score[FOR_S] = val[FOR_S]; - } - else - { - score[BACK_S] = val[BACK_S]; - } + //this key string is F+'|||'+E from the hypothesis + val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors) + +"||| " + +hypothesis->GetTargetPhraseStringRep(m_targetFactors)]; } - else + // only conditioned on foreign + else { - if(m_numberscores>3) - { - score[FOR_D] = val[FOR_D]; - } - else - { - score[BACK_D] = val[BACK_D]; - } + //this key string is F from the hypothesis + val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)]; } - } } - else - { - if(direction==LexReorderType::Backward) - { - if(orientation==DistortionOrientationType::MONO) - { - score[BACK_MONO] = val[BACK_MONO]; - } - else - { - score[BACK_NONMONO] = val[BACK_NONMONO]; - } - } - else - { - //if we only have forward scores (no backward scores) in the table, - //then forward scores have no offset so we can use the indices of the backwards scores - if(orientation==DistortionOrientationType::MONO) + + // if forward looking, condition on previous phrase + else { + // conditioned on both foreign and English + if(m_condition==LexReorderType::Fe) { - if(m_numberscores>3) - { - score[FOR_MONO] = val[FOR_MONO]; - } - else - { - score[BACK_MONO] = val[BACK_MONO]; - } + //this key string is F+'|||'+E from the hypothesis + val=m_orientation_table[hypothesis->GetPrevHypo()->GetSourcePhraseStringRep(m_sourceFactors) + +"||| " + +hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_targetFactors)]; } - else + // only conditioned on foreign + else { - if(m_numberscores>3) - { - score[FOR_NONMONO] = val[FOR_NONMONO]; - } - else - { - score[BACK_NONMONO] = val[BACK_NONMONO]; - } + //this key string is F from the hypothesis + val=m_orientation_table[hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_sourceFactors)]; } - } } - + } + + // SECOND, look up score + + if(val.size()> 0) // valid entry + { + int orientation = GetOrientation(hypothesis); + float value = val[ orientation + i * m_numOrientationTypes ]; + // one weight per direction + if ( m_numScores < m_numOrientationTypes ) { + score[i] = value; + } + // one weight per direction and type + else { + score[ orientation + i * m_numOrientationTypes ] = value; + } + + // IFVERBOSE(3) { + // cerr << "\tdistortion type " << orientation << " =>"; + // for(unsigned int j=0;j<score.size();j++) { + // cerr << " " << score[j]; + // } + // cerr << endl; + // } } } return score; } - +/** return the number of scores produced by this model */ unsigned int LexicalReordering::GetNumScoreComponents() const { - return m_numberscores; + return m_numScores; } +/** returns description of the model */ const std::string LexicalReordering::GetScoreProducerDescription() const { return "Lexicalized reordering score, file=" + m_filename; } - diff --git a/moses/src/LexicalReordering.h b/moses/src/LexicalReordering.h index 8bab36b99..329c5ec83 100755 --- a/moses/src/LexicalReordering.h +++ b/moses/src/LexicalReordering.h @@ -37,11 +37,11 @@ class Phrase; class Hypothesis; using namespace std; -/*** - * The LexicalReordering class handles everything involved with +/** The LexicalReordering class handles everything involved with * lexical reordering. It loads a probability table P(orientation|f,e) * and computes scores in either forward, backward, or bidirectional * direction. + * This model is described in Koehn et al. [IWSLT 2005] */ class LexicalReordering : public ScoreProducer @@ -49,14 +49,9 @@ class LexicalReordering : public ScoreProducer private: - // Members + // This stores the model table typedef std::map<std::string, std::vector<float> > ORIENTATION_TABLE; - // This is the order in which the different forward/backward - // probabilities are stored in the table. - enum TableLookupMsd { BACK_M, BACK_S, BACK_D, FOR_M,FOR_S, FOR_D }; - enum TableLookupMonotone { BACK_MONO, BACK_NONMONO, FOR_MONO, FOR_NONMONO}; - // This is the order in which pieces appear in the orientation table // when conditioning on f and e. enum FEFileFormat { FE_FOREIGN, FE_ENGLISH, FE_PROBS }; @@ -67,19 +62,25 @@ private: // different numbers of probabilities for different ranges of // orientation variable - static const unsigned int MSD_NUM_PROBS = 6; - static const unsigned int MONO_NUM_PROBS = 4; + static const unsigned int MSD_NUM_PROBS = 3; + static const unsigned int MONO_NUM_PROBS = 2; + + static const unsigned int ORIENTATION_MONOTONE = 0; + static const unsigned int ORIENTATION_NON_MONOTONE = 1; + static const unsigned int ORIENTATION_SWAP = 1; + static const unsigned int ORIENTATION_DISCONTINUOUS = 2; - int m_orientation; // msd or monotone - std::vector<int> m_direction; // contains forward, backward, or both (bidirectional) - int m_condition; // fe or f - int m_numberscores; //2, 3, 4 or 6 - std::string m_filename; // probability table location - vector<FactorType> m_sourceFactors; - vector<FactorType> m_targetFactors; + int m_orientation; /**< msd or monotone */ + std::vector<int> m_direction; /**< contains forward, backward, or both (bidirectional) */ + int m_condition; /**< fe or f */ + int m_numScores; /**< 1, 2, 3, or 6 */ + int m_numOrientationTypes; /**< 2(mono) or 3(msd) */ + std::string m_filename; /**< probability table location */ + vector<FactorType> m_sourceFactors; /**< source factors to condition on */ + vector<FactorType> m_targetFactors; /**< target factors to condition on */ - ORIENTATION_TABLE m_orientation_table; // probability table + ORIENTATION_TABLE m_orientation_table; /**< probability table */ // Functions void LoadFile(void); @@ -95,6 +96,9 @@ public: // Descructor ~LexicalReordering(void) {} + // Compute Orientation + int GetOrientation(const Hypothesis *curr_hypothesis); + // Compute and return a score for a hypothesis std::vector<float> CalcScore(Hypothesis *curr_hypothesis); diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index 813c6aef2..3b70e7fa4 100755 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -186,36 +186,35 @@ bool StaticData::LoadParameters(int argc, char* argv[]) // load Lexical Reordering model const vector<string> &lrFileVector = m_parameter.GetParam("distortion-file"); + for(unsigned int i=0; i< lrFileVector.size(); i++ ) //loops for each distortion model { - - //if this went wrong, something went wrong in the parsing. - const vector<string> &lrTypeVector = m_parameter.GetParam("distortion"); + vector<string> specification = Tokenize<string>(lrFileVector[i]," "); + if (specification.size() != 4 ) + { + TRACE_ERR("ERROR: Expected format 'factors type weight-count filename' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl); + return false; + } + //defaults, but at least one of these per model should be explicitly specified in the .ini file int orientation = DistortionOrientationType::Msd, direction = LexReorderType::Bidirectional, condition = LexReorderType::Fe; - if(lrTypeVector.size() < i) - { - std::cerr<<"ERROR: please specify one line of configuration under [distortion] per distortion model in the moses configuration file\n"; - abort(); - } - //Loop through, overriding defaults with specifications - vector<string> parameters = Tokenize<string>(lrTypeVector[i],"-"); + vector<string> parameters = Tokenize<string>(specification[1],"-"); for (size_t param=0; param<parameters.size(); param++) { string val = ToLower(parameters[param]); //orientation - if(val == "monotone") + if(val == "monotone" || val == "monotonicity") orientation = DistortionOrientationType::Monotone; - else if(val == "msd") + else if(val == "msd" || val == "orientation") orientation = DistortionOrientationType::Msd; //direction else if(val == "forward") direction = LexReorderType::Forward; - else if(val == "backward") + else if(val == "backward" || val == "unidirectional") direction = LexReorderType::Backward; else if(val == "bidirectional") direction = LexReorderType::Bidirectional; @@ -224,7 +223,10 @@ bool StaticData::LoadParameters(int argc, char* argv[]) condition = LexReorderType::F; else if(val == "fe") condition = LexReorderType::Fe; + if (orientation == DistortionOrientationType::Msd) + m_sourceStartPosMattersForRecombination = true; } + //compute the number of weights that ought to be in the table from this size_t numWeightsInTable = 0; if(orientation == DistortionOrientationType::Monotone) @@ -239,10 +241,17 @@ bool StaticData::LoadParameters(int argc, char* argv[]) { numWeightsInTable *= 2; } - - vector<string> token = Tokenize(lrFileVector[i]); - //characteristics of the phrase table - vector<string> inputfactors = Tokenize(token[0],"-"); + size_t specifiedNumWeights = Scan<size_t>(specification[2]); + if (specifiedNumWeights != numWeightsInTable) { + std::cerr << "specified number of weights (" + << specifiedNumWeights + << ") does not match correct number of weights for this type (" + << numWeightsInTable << std::endl; + abort(); + } + + //factors involved in this table + vector<string> inputfactors = Tokenize(specification[0],"-"); vector<FactorType> input,output; if(inputfactors.size() > 1) { @@ -254,11 +263,9 @@ bool StaticData::LoadParameters(int argc, char* argv[]) input.push_back(0); // default, just in case the user is actually using a bidirectional model output = Tokenize<FactorType>(inputfactors[0],","); } - size_t numWeights = Scan<size_t>(token[1]); - std::string filePath= token[2]; std::vector<float> m_lexWeights; //will store the weights for this particular distortion reorderer std::vector<float> newLexWeights; //we'll remove the weights used by this distortion reorder, leaving the weights yet to be used - if(numWeights == 1) // this is useful if the user just wants to train one weight for the model + if(specifiedNumWeights == 1) // this is useful if the user just wants to train one weight for the model { //add appropriate weight to weight vector assert(distortionModelWeights.size()> 0); //if this fails the user has not specified enough weights @@ -298,10 +305,11 @@ bool StaticData::LoadParameters(int argc, char* argv[]) // TRACE_ERR(m_lexWeights[weight] << "\t"); //} //TRACE_ERR(endl); - timer.check("Starting to load lexical reorder table..."); - TRACE_ERR(filePath << "..."); + + // loading the file + std::string filePath= specification[3]; + timer.check(("Start loading distortion table " + filePath).c_str()); m_reorderModels.push_back(new LexicalReordering(filePath, orientation, direction, condition, m_lexWeights, input, output)); - // timer.check("Finished loading lexical reorder table."); } if (m_parameter.GetParam("lmodel-file").size() > 0) diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h index 38cf1a525..d06f9cf5e 100755 --- a/moses/src/StaticData.h +++ b/moses/src/StaticData.h @@ -87,6 +87,7 @@ protected: bool m_dropUnknown; bool m_wordDeletionEnabled; + bool m_sourceStartPosMattersForRecombination; int m_inputType; unsigned m_numInputScores; @@ -156,6 +157,10 @@ public: return m_decodeStepList; } + inline bool GetSourceStartPosMattersForRecombination() const + { + return m_sourceStartPosMattersForRecombination; + } inline bool GetDropUnknown() const { return m_dropUnknown; diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h index b67724369..4a744ffda 100755 --- a/moses/src/TypeDef.h +++ b/moses/src/TypeDef.h @@ -111,8 +111,6 @@ namespace DistortionOrientationType Monotone, //distinguish only between monotone and non-monotone as possible orientations Msd //further separate non-monotone into swapped and discontinuous }; - // Possible values for orientation. - enum ORIENTATIONS { MONO, NON_MONO, SWAP, DISC }; //TODO explain values } enum IOMethod diff --git a/moses/src/WordsBitmap.cpp b/moses/src/WordsBitmap.cpp index 1b12aa487..9d0e3b09b 100755 --- a/moses/src/WordsBitmap.cpp +++ b/moses/src/WordsBitmap.cpp @@ -58,7 +58,7 @@ int WordsBitmap::GetFutureCosts(int lastPos) const } -std::vector<size_t> WordsBitmap::GetCompressedReprentation() const +std::vector<size_t> WordsBitmap::GetCompressedRepresentation() const { std::vector<size_t> res(1 + (m_size >> (sizeof(int) + 3)), 0); size_t c=0; size_t x=0; size_t ci=0; diff --git a/moses/src/WordsBitmap.h b/moses/src/WordsBitmap.h index b98df1a1b..af6429fe0 100755 --- a/moses/src/WordsBitmap.h +++ b/moses/src/WordsBitmap.h @@ -140,7 +140,7 @@ public: return m_size; } - std::vector<size_t> GetCompressedReprentation() const; + std::vector<size_t> GetCompressedRepresentation() const; inline int Compare (const WordsBitmap &compare) const { |