Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2006-09-28 22:36:38 +0400
committerphkoehn <phkoehn@1f5c12ca-751b-0410-a591-d2e778427230>2006-09-28 22:36:38 +0400
commit3e7e4570b527282a12415ddf3a0908ebf2425dde (patch)
tree86b711da04e9b3c7505e9479ee06d64fedab11dd /moses
parentef42ad791e432545add95e0b8a6a7bef80f52ba8 (diff)
bug fix to lexicalized reordering model
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@834 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rwxr-xr-xmoses/src/Hypothesis.cpp36
-rwxr-xr-xmoses/src/Hypothesis.h18
-rwxr-xr-xmoses/src/HypothesisCollection.cpp14
-rwxr-xr-xmoses/src/HypothesisCollection.h12
-rwxr-xr-xmoses/src/LexicalReordering.cpp268
-rwxr-xr-xmoses/src/LexicalReordering.h40
-rwxr-xr-xmoses/src/StaticData.cpp54
-rwxr-xr-xmoses/src/StaticData.h5
-rwxr-xr-xmoses/src/TypeDef.h2
-rwxr-xr-xmoses/src/WordsBitmap.cpp2
-rwxr-xr-xmoses/src/WordsBitmap.h2
11 files changed, 249 insertions, 204 deletions
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index 0365fa61d..0e946aa5f 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -55,7 +55,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
, _lmstats(0)
{ // used for initial seeding of trans process
// initialize scores
- _hash_computed = false;
+ //_hash_computed = false;
s_HypothesesCreated = 1;
ResetScore();
}
@@ -85,7 +85,7 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
// that this hypothesis has already translated!
assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
- _hash_computed = false;
+ //_hash_computed = false;
m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
m_wordDeleted = transOpt.IsDeletionOption();
m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
@@ -160,24 +160,36 @@ Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &em
return new(ptr) Hypothesis(m_source, emptyTarget);
}
-void Hypothesis::GenerateNGramCompareHash() const
-{
- _hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137);
- _hash_computed = true;
- vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedReprentation();
- _hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash);
-}
-
+//void Hypothesis::GenerateNGramCompareHash() const
+//{
+// _hash = quick_hash((const char*)&m_languageModelStates[0], sizeof(LanguageModelSingleFactor::State) * m_languageModelStates.size(), 0xcafe5137);
+// _hash_computed = true;
+// vector<size_t> wordCoverage = m_sourceCompleted.GetCompressedRepresentation();
+// _hash = quick_hash((const char*)&wordCoverage[0], sizeof(size_t)*wordCoverage.size(), _hash);
+//}
+
+/** check, if two hypothesis can be recombined.
+ this is actually a sorting function that allows us to
+ keep an ordered list of hypotheses. This makes recombination
+ much quicker.
+*/
int Hypothesis::NGramCompare(const Hypothesis &compare) const
{ // -1 = this < compare
// +1 = this > compare
// 0 = this ==compare
if (m_languageModelStates < compare.m_languageModelStates) return -1;
if (m_languageModelStates > compare.m_languageModelStates) return 1;
+ if (m_sourceCompleted.GetCompressedRepresentation() < compare.m_sourceCompleted.GetCompressedRepresentation()) return -1;
+ if (m_sourceCompleted.GetCompressedRepresentation() > compare.m_sourceCompleted.GetCompressedRepresentation()) return 1;
+ if (m_currSourceWordsRange.GetEndPos() < compare.m_currSourceWordsRange.GetEndPos()) return -1;
+ if (m_currSourceWordsRange.GetEndPos() > compare.m_currSourceWordsRange.GetEndPos()) return 1;
+ if (! StaticData::Instance()->GetSourceStartPosMattersForRecombination()) return 0;
+ if (m_currSourceWordsRange.GetStartPos() < compare.m_currSourceWordsRange.GetStartPos()) return -1;
+ if (m_currSourceWordsRange.GetStartPos() > compare.m_currSourceWordsRange.GetStartPos()) return 1;
return 0;
}
-/**
- * Calculates the overall language model score by combining the scores
+
+/** Calculates the overall language model score by combining the scores
* of language models generated for each of the factors. Because the factors
* represent a variety of tag sets, and because factors with smaller tag sets
* (such as POS instead of words) allow us to calculate richer statistics, we
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index 56bc1a0fe..df1a423e0 100755
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -83,9 +83,9 @@ protected:
void CalcDistortionScore();
//TODO: add appropriate arguments to score calculator
- void GenerateNGramCompareHash() const;
- mutable size_t _hash;
- mutable bool _hash_computed;
+ // void GenerateNGramCompareHash() const;
+ // mutable size_t _hash;
+ // mutable bool _hash_computed;
public:
static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
@@ -205,12 +205,12 @@ public:
int NGramCompare(const Hypothesis &compare) const;
- inline size_t hash() const
- {
- if (_hash_computed) return _hash;
- GenerateNGramCompareHash();
- return _hash;
- }
+ // inline size_t hash() const
+ // {
+ // if (_hash_computed) return _hash;
+ // GenerateNGramCompareHash();
+ // return _hash;
+ // }
void ToStream(std::ostream& out) const
{
diff --git a/moses/src/HypothesisCollection.cpp b/moses/src/HypothesisCollection.cpp
index 4569b77e0..9012f22c6 100755
--- a/moses/src/HypothesisCollection.cpp
+++ b/moses/src/HypothesisCollection.cpp
@@ -102,7 +102,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
Hypothesis *hypoExisting = *iter;
if (hypo->GetTotalScore() > hypoExisting->GetTotalScore())
{ // incoming hypo is better than the one we have
- VERBOSE(3,"better than matching hyp, recombining, ");
+ VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
if (m_nBestIsEnabled) {
hypo->AddArc(hypoExisting);
Detach(iter);
@@ -114,7 +114,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
}
else
{ // already storing the best hypo. discard current hypo
- VERBOSE(3,"worse than matching hyp, recombining" << std::endl)
+ VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
if (m_nBestIsEnabled) {
(*iter)->AddArc(hypo);
} else {
@@ -183,6 +183,16 @@ void HypothesisCollection::PruneToSize(size_t newSize)
}
VERBOSE(3,", pruned to size " << size() << endl);
+ IFVERBOSE(3) {
+ cerr << "stack now contains: ";
+ for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++)
+ {
+ Hypothesis *hypo = *iter;
+ cerr << hypo->GetId() << " (" << hypo->GetTotalScore() << ") ";
+ }
+ cerr << endl;
+ }
+
// set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack
m_worstScore = scoreThreshold;
// cerr << "Heap contains " << bestScores.size() << " items" << endl;
diff --git a/moses/src/HypothesisCollection.h b/moses/src/HypothesisCollection.h
index efd3ab257..236ce1599 100755
--- a/moses/src/HypothesisCollection.h
+++ b/moses/src/HypothesisCollection.h
@@ -66,12 +66,12 @@ struct HypothesisRecombinationComparer
}
};
-struct HypothesisRecombinationHasher
-{
- size_t operator()(const Hypothesis* hypo) const {
- return hypo->hash();
- }
-};
+//struct HypothesisRecombinationHasher
+//{
+// size_t operator()(const Hypothesis* hypo) const {
+// return hypo->hash();
+// }
+//};
/** Stack for instances of Hypothesis, includes functions for pruning. */
class HypothesisCollection
diff --git a/moses/src/LexicalReordering.cpp b/moses/src/LexicalReordering.cpp
index 9de9ab90c..bdef84786 100755
--- a/moses/src/LexicalReordering.cpp
+++ b/moses/src/LexicalReordering.cpp
@@ -7,45 +7,56 @@
#include <algorithm>
#include "LexicalReordering.h"
#include "InputFileStream.h"
-#include "DistortionOrientation.h"
#include "StaticData.h"
#include "Util.h"
using namespace std;
-/*
- * Load the file pointed to by filename; set up the table according to
- * the orientation and condition parameters. Direction will be used
- * later for computing the score.
- */
+/** Load the file pointed to by filename; set up the table according to
+ * the orientation and condition parameters. Direction will be used
+ * later for computing the score.
+ * \param filename file that contains the table
+ * \param orientation orientation as defined in DistortionOrientationType (monotone/msd)
+ * \param direction direction as defined in LexReorderType (forward/backward/bidirectional)
+ * \param condition either conditioned on foreign or foreign+english
+ * \param weights weight setting for this model
+ * \param input input factors
+ * \param output output factors
+ */
LexicalReordering::LexicalReordering(const std::string &filename,
int orientation, int direction,
int condition, const std::vector<float>& weights,
vector<FactorType> input, vector<FactorType> output) :
- m_orientation(orientation), m_condition(condition), m_numberscores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
+ m_orientation(orientation), m_condition(condition), m_numScores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
{
//add score producer
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
//manage the weights by SetWeightsForScoreProducer method of static data.
if(direction == LexReorderType::Bidirectional)
{
+ m_direction.push_back(LexReorderType::Backward); // this order is important
m_direction.push_back(LexReorderType::Forward);
- m_direction.push_back(LexReorderType::Backward);
}
else
{
m_direction.push_back(direction);
}
+ // set number of orientations
+ if( orientation == DistortionOrientationType::Monotone) {
+ m_numOrientationTypes = 2;
+ }
+ else if ( orientation == DistortionOrientationType::Msd) {
+ m_numOrientationTypes = 3;
+ }
const_cast<StaticData*>(StaticData::Instance())->SetWeightsForScoreProducer(this, weights);
// Load the file
LoadFile();
-// PrintTable();
+ // PrintTable();
}
-/*
- * Loads the file into a map.
- */
+/** Loads the orientation file into a map
+ */
void LexicalReordering::LoadFile()
{
InputFileStream inFile(m_filename);
@@ -73,14 +84,11 @@ void LexicalReordering::LoadFile()
probs = Scan<float>(Tokenize(tokens[F_PROBS]));
}
- if (m_orientation == DistortionOrientationType::Monotone)
- {
- assert(probs.size() == MONO_NUM_PROBS); // 2 backward, 2 forward
- }
- else
- {
- assert(probs.size() == MSD_NUM_PROBS); // 3 backward, 3 forward
- }
+ if (probs.size() != m_direction.size() * m_numOrientationTypes) {
+ TRACE_ERR("found " << probs.size() << " probabilities, expected "
+ << m_direction.size() * m_numOrientationTypes << endl);
+ exit(0);
+ }
std::vector<float> scv(probs.size());
std::transform(probs.begin(),probs.end(),probs.begin(),TransformScore);
m_orientation_table[key] = probs;
@@ -88,9 +96,8 @@ void LexicalReordering::LoadFile()
inFile.Close();
}
-/*
- * Print the table in a readable format.
- */
+/** print the table in a readable format (not used at this point)
+ */
void LexicalReordering::PrintTable()
{
// iterate over map
@@ -113,140 +120,141 @@ void LexicalReordering::PrintTable()
}
}
-std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis)
+/** compute the orientation given a hypothesis
+ */
+int LexicalReordering::GetOrientation(const Hypothesis *curr_hypothesis)
{
- std::vector<float> score(m_numberscores, 0);
- vector<float> val;
- for(unsigned int i=0; i < m_direction.size(); i++)
+ const Hypothesis *prevHypo = curr_hypothesis->GetPrevHypo();
+
+ const WordsRange &currSourceRange = curr_hypothesis->GetCurrSourceWordsRange();
+ size_t curr_source_start = currSourceRange.GetStartPos();
+ size_t curr_source_end = currSourceRange.GetEndPos();
+
+ //if there's no previous source...
+ if(prevHypo->GetId() == 0){
+ if (curr_source_start == 0)
+ {
+ return ORIENTATION_MONOTONE;
+ }
+ else {
+ return ORIENTATION_DISCONTINUOUS;
+ }
+ }
+
+
+ const WordsRange &prevSourceRange = prevHypo->GetCurrSourceWordsRange();
+ size_t prev_source_start = prevSourceRange.GetStartPos();
+ size_t prev_source_end = prevSourceRange.GetEndPos();
+ if(prev_source_end==curr_source_start-1)
{
- int direction = m_direction[i];
- int orientation = DistortionOrientation::GetOrientation(hypothesis, direction);
- if(m_condition==LexReorderType::Fe)
+ return ORIENTATION_MONOTONE;
+ }
+ // distinguish between monotone, swap, discontinuous
+ else if(m_orientation==DistortionOrientationType::Msd)
+ {
+ if(prev_source_start==curr_source_end+1)
{
- //this key string is F+'|||'+E from the hypothesis
- val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors)
- +"||| "
- +hypothesis->GetTargetPhraseStringRep(m_targetFactors)];
+ return ORIENTATION_SWAP;
}
else
{
- //this key string is F from the hypothesis
- val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)];
+ return ORIENTATION_DISCONTINUOUS;
}
- if(val.size()> 0)
- {
- if(m_orientation==DistortionOrientationType::Msd)
- {
- if(direction==LexReorderType::Backward)
- {
- if(orientation==DistortionOrientationType::MONO)
- {
- score[BACK_M] = val[BACK_M];
- }
- else if(orientation==DistortionOrientationType::SWAP)
- {
- score[BACK_S] = val[BACK_S];
- }
- else
- {
- score[BACK_D] = val[BACK_D];
- }
-
- }
- else
- {
- //if we only have forward scores (no backward scores) in the table,
- //then forward scores have no offset so we can use the indices of the backwards scores
- if(orientation==DistortionOrientationType::MONO)
- {
- if(m_numberscores>3)
- {
- score[FOR_M] = val[FOR_M];
- }
- else
- {
- score[BACK_M] = val[BACK_M];
- }
- }
- else if(orientation==DistortionOrientationType::SWAP)
+ }
+ // only distinguish between monotone, non monotone
+ else
+ {
+ return ORIENTATION_NON_MONOTONE;
+ }
+}
+
+/** calculate the score(s) for a hypothesis
+ */
+std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis)
+{
+ std::vector<float> score(m_numScores, 0);
+ for(unsigned int i=0; i < m_direction.size(); i++) // backward, forward, or both
+ {
+ vector<float> val; // we will score the matching probability here
+
+ // FIRST, get probability distribution
+
+ int direction = m_direction[i]; // either backward or forward
+
+ // no score, if we would have to compute the forward score from the initial hypothesis
+ if (direction == LexReorderType::Backward || hypothesis->GetPrevHypo()->GetId() != 0) {
+
+ if (direction == LexReorderType::Backward) {
+ // conditioned on both foreign and English
+ if(m_condition==LexReorderType::Fe)
{
- if(m_numberscores>3)
- {
- score[FOR_S] = val[FOR_S];
- }
- else
- {
- score[BACK_S] = val[BACK_S];
- }
+ //this key string is F+'|||'+E from the hypothesis
+ val=m_orientation_table[hypothesis->GetSourcePhraseStringRep(m_sourceFactors)
+ +"||| "
+ +hypothesis->GetTargetPhraseStringRep(m_targetFactors)];
}
- else
+ // only conditioned on foreign
+ else
{
- if(m_numberscores>3)
- {
- score[FOR_D] = val[FOR_D];
- }
- else
- {
- score[BACK_D] = val[BACK_D];
- }
+ //this key string is F from the hypothesis
+ val=m_orientation_table[hypothesis->GetTargetPhraseStringRep(m_sourceFactors)];
}
- }
}
- else
- {
- if(direction==LexReorderType::Backward)
- {
- if(orientation==DistortionOrientationType::MONO)
- {
- score[BACK_MONO] = val[BACK_MONO];
- }
- else
- {
- score[BACK_NONMONO] = val[BACK_NONMONO];
- }
- }
- else
- {
- //if we only have forward scores (no backward scores) in the table,
- //then forward scores have no offset so we can use the indices of the backwards scores
- if(orientation==DistortionOrientationType::MONO)
+
+ // if forward looking, condition on previous phrase
+ else {
+ // conditioned on both foreign and English
+ if(m_condition==LexReorderType::Fe)
{
- if(m_numberscores>3)
- {
- score[FOR_MONO] = val[FOR_MONO];
- }
- else
- {
- score[BACK_MONO] = val[BACK_MONO];
- }
+ //this key string is F+'|||'+E from the hypothesis
+ val=m_orientation_table[hypothesis->GetPrevHypo()->GetSourcePhraseStringRep(m_sourceFactors)
+ +"||| "
+ +hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_targetFactors)];
}
- else
+ // only conditioned on foreign
+ else
{
- if(m_numberscores>3)
- {
- score[FOR_NONMONO] = val[FOR_NONMONO];
- }
- else
- {
- score[BACK_NONMONO] = val[BACK_NONMONO];
- }
+ //this key string is F from the hypothesis
+ val=m_orientation_table[hypothesis->GetPrevHypo()->GetTargetPhraseStringRep(m_sourceFactors)];
}
- }
}
-
+ }
+
+ // SECOND, look up score
+
+ if(val.size()> 0) // valid entry
+ {
+ int orientation = GetOrientation(hypothesis);
+ float value = val[ orientation + i * m_numOrientationTypes ];
+ // one weight per direction
+ if ( m_numScores < m_numOrientationTypes ) {
+ score[i] = value;
+ }
+ // one weight per direction and type
+ else {
+ score[ orientation + i * m_numOrientationTypes ] = value;
+ }
+
+ // IFVERBOSE(3) {
+ // cerr << "\tdistortion type " << orientation << " =>";
+ // for(unsigned int j=0;j<score.size();j++) {
+ // cerr << " " << score[j];
+ // }
+ // cerr << endl;
+ // }
}
}
return score;
}
-
+/** return the number of scores produced by this model */
unsigned int LexicalReordering::GetNumScoreComponents() const
{
- return m_numberscores;
+ return m_numScores;
}
+/** returns description of the model */
const std::string LexicalReordering::GetScoreProducerDescription() const
{
return "Lexicalized reordering score, file=" + m_filename;
}
-
diff --git a/moses/src/LexicalReordering.h b/moses/src/LexicalReordering.h
index 8bab36b99..329c5ec83 100755
--- a/moses/src/LexicalReordering.h
+++ b/moses/src/LexicalReordering.h
@@ -37,11 +37,11 @@ class Phrase;
class Hypothesis;
using namespace std;
-/***
- * The LexicalReordering class handles everything involved with
+/** The LexicalReordering class handles everything involved with
* lexical reordering. It loads a probability table P(orientation|f,e)
* and computes scores in either forward, backward, or bidirectional
* direction.
+ * This model is described in Koehn et al. [IWSLT 2005]
*/
class LexicalReordering : public ScoreProducer
@@ -49,14 +49,9 @@ class LexicalReordering : public ScoreProducer
private:
- // Members
+ // This stores the model table
typedef std::map<std::string, std::vector<float> > ORIENTATION_TABLE;
- // This is the order in which the different forward/backward
- // probabilities are stored in the table.
- enum TableLookupMsd { BACK_M, BACK_S, BACK_D, FOR_M,FOR_S, FOR_D };
- enum TableLookupMonotone { BACK_MONO, BACK_NONMONO, FOR_MONO, FOR_NONMONO};
-
// This is the order in which pieces appear in the orientation table
// when conditioning on f and e.
enum FEFileFormat { FE_FOREIGN, FE_ENGLISH, FE_PROBS };
@@ -67,19 +62,25 @@ private:
// different numbers of probabilities for different ranges of
// orientation variable
- static const unsigned int MSD_NUM_PROBS = 6;
- static const unsigned int MONO_NUM_PROBS = 4;
+ static const unsigned int MSD_NUM_PROBS = 3;
+ static const unsigned int MONO_NUM_PROBS = 2;
+
+ static const unsigned int ORIENTATION_MONOTONE = 0;
+ static const unsigned int ORIENTATION_NON_MONOTONE = 1;
+ static const unsigned int ORIENTATION_SWAP = 1;
+ static const unsigned int ORIENTATION_DISCONTINUOUS = 2;
- int m_orientation; // msd or monotone
- std::vector<int> m_direction; // contains forward, backward, or both (bidirectional)
- int m_condition; // fe or f
- int m_numberscores; //2, 3, 4 or 6
- std::string m_filename; // probability table location
- vector<FactorType> m_sourceFactors;
- vector<FactorType> m_targetFactors;
+ int m_orientation; /**< msd or monotone */
+ std::vector<int> m_direction; /**< contains forward, backward, or both (bidirectional) */
+ int m_condition; /**< fe or f */
+ int m_numScores; /**< 1, 2, 3, or 6 */
+ int m_numOrientationTypes; /**< 2(mono) or 3(msd) */
+ std::string m_filename; /**< probability table location */
+ vector<FactorType> m_sourceFactors; /**< source factors to condition on */
+ vector<FactorType> m_targetFactors; /**< target factors to condition on */
- ORIENTATION_TABLE m_orientation_table; // probability table
+ ORIENTATION_TABLE m_orientation_table; /**< probability table */
// Functions
void LoadFile(void);
@@ -95,6 +96,9 @@ public:
// Descructor
~LexicalReordering(void) {}
+ // Compute Orientation
+ int GetOrientation(const Hypothesis *curr_hypothesis);
+
// Compute and return a score for a hypothesis
std::vector<float> CalcScore(Hypothesis *curr_hypothesis);
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 813c6aef2..3b70e7fa4 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -186,36 +186,35 @@ bool StaticData::LoadParameters(int argc, char* argv[])
// load Lexical Reordering model
const vector<string> &lrFileVector =
m_parameter.GetParam("distortion-file");
+
for(unsigned int i=0; i< lrFileVector.size(); i++ ) //loops for each distortion model
{
-
- //if this went wrong, something went wrong in the parsing.
- const vector<string> &lrTypeVector = m_parameter.GetParam("distortion");
+ vector<string> specification = Tokenize<string>(lrFileVector[i]," ");
+ if (specification.size() != 4 )
+ {
+ TRACE_ERR("ERROR: Expected format 'factors type weight-count filename' in specification of distortion file " << i << std::endl << lrFileVector[i] << std::endl);
+ return false;
+ }
+
//defaults, but at least one of these per model should be explicitly specified in the .ini file
int orientation = DistortionOrientationType::Msd,
direction = LexReorderType::Bidirectional,
condition = LexReorderType::Fe;
- if(lrTypeVector.size() < i)
- {
- std::cerr<<"ERROR: please specify one line of configuration under [distortion] per distortion model in the moses configuration file\n";
- abort();
- }
-
//Loop through, overriding defaults with specifications
- vector<string> parameters = Tokenize<string>(lrTypeVector[i],"-");
+ vector<string> parameters = Tokenize<string>(specification[1],"-");
for (size_t param=0; param<parameters.size(); param++)
{
string val = ToLower(parameters[param]);
//orientation
- if(val == "monotone")
+ if(val == "monotone" || val == "monotonicity")
orientation = DistortionOrientationType::Monotone;
- else if(val == "msd")
+ else if(val == "msd" || val == "orientation")
orientation = DistortionOrientationType::Msd;
//direction
else if(val == "forward")
direction = LexReorderType::Forward;
- else if(val == "backward")
+ else if(val == "backward" || val == "unidirectional")
direction = LexReorderType::Backward;
else if(val == "bidirectional")
direction = LexReorderType::Bidirectional;
@@ -224,7 +223,10 @@ bool StaticData::LoadParameters(int argc, char* argv[])
condition = LexReorderType::F;
else if(val == "fe")
condition = LexReorderType::Fe;
+ if (orientation == DistortionOrientationType::Msd)
+ m_sourceStartPosMattersForRecombination = true;
}
+
//compute the number of weights that ought to be in the table from this
size_t numWeightsInTable = 0;
if(orientation == DistortionOrientationType::Monotone)
@@ -239,10 +241,17 @@ bool StaticData::LoadParameters(int argc, char* argv[])
{
numWeightsInTable *= 2;
}
-
- vector<string> token = Tokenize(lrFileVector[i]);
- //characteristics of the phrase table
- vector<string> inputfactors = Tokenize(token[0],"-");
+ size_t specifiedNumWeights = Scan<size_t>(specification[2]);
+ if (specifiedNumWeights != numWeightsInTable) {
+ std::cerr << "specified number of weights ("
+ << specifiedNumWeights
+ << ") does not match correct number of weights for this type ("
+ << numWeightsInTable << std::endl;
+ abort();
+ }
+
+ //factors involved in this table
+ vector<string> inputfactors = Tokenize(specification[0],"-");
vector<FactorType> input,output;
if(inputfactors.size() > 1)
{
@@ -254,11 +263,9 @@ bool StaticData::LoadParameters(int argc, char* argv[])
input.push_back(0); // default, just in case the user is actually using a bidirectional model
output = Tokenize<FactorType>(inputfactors[0],",");
}
- size_t numWeights = Scan<size_t>(token[1]);
- std::string filePath= token[2];
std::vector<float> m_lexWeights; //will store the weights for this particular distortion reorderer
std::vector<float> newLexWeights; //we'll remove the weights used by this distortion reorder, leaving the weights yet to be used
- if(numWeights == 1) // this is useful if the user just wants to train one weight for the model
+ if(specifiedNumWeights == 1) // this is useful if the user just wants to train one weight for the model
{
//add appropriate weight to weight vector
assert(distortionModelWeights.size()> 0); //if this fails the user has not specified enough weights
@@ -298,10 +305,11 @@ bool StaticData::LoadParameters(int argc, char* argv[])
// TRACE_ERR(m_lexWeights[weight] << "\t");
//}
//TRACE_ERR(endl);
- timer.check("Starting to load lexical reorder table...");
- TRACE_ERR(filePath << "...");
+
+ // loading the file
+ std::string filePath= specification[3];
+ timer.check(("Start loading distortion table " + filePath).c_str());
m_reorderModels.push_back(new LexicalReordering(filePath, orientation, direction, condition, m_lexWeights, input, output));
- // timer.check("Finished loading lexical reorder table.");
}
if (m_parameter.GetParam("lmodel-file").size() > 0)
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index 38cf1a525..d06f9cf5e 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -87,6 +87,7 @@ protected:
bool m_dropUnknown;
bool m_wordDeletionEnabled;
+ bool m_sourceStartPosMattersForRecombination;
int m_inputType;
unsigned m_numInputScores;
@@ -156,6 +157,10 @@ public:
return m_decodeStepList;
}
+ inline bool GetSourceStartPosMattersForRecombination() const
+ {
+ return m_sourceStartPosMattersForRecombination;
+ }
inline bool GetDropUnknown() const
{
return m_dropUnknown;
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index b67724369..4a744ffda 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -111,8 +111,6 @@ namespace DistortionOrientationType
Monotone, //distinguish only between monotone and non-monotone as possible orientations
Msd //further separate non-monotone into swapped and discontinuous
};
- // Possible values for orientation.
- enum ORIENTATIONS { MONO, NON_MONO, SWAP, DISC }; //TODO explain values
}
enum IOMethod
diff --git a/moses/src/WordsBitmap.cpp b/moses/src/WordsBitmap.cpp
index 1b12aa487..9d0e3b09b 100755
--- a/moses/src/WordsBitmap.cpp
+++ b/moses/src/WordsBitmap.cpp
@@ -58,7 +58,7 @@ int WordsBitmap::GetFutureCosts(int lastPos) const
}
-std::vector<size_t> WordsBitmap::GetCompressedReprentation() const
+std::vector<size_t> WordsBitmap::GetCompressedRepresentation() const
{
std::vector<size_t> res(1 + (m_size >> (sizeof(int) + 3)), 0);
size_t c=0; size_t x=0; size_t ci=0;
diff --git a/moses/src/WordsBitmap.h b/moses/src/WordsBitmap.h
index b98df1a1b..af6429fe0 100755
--- a/moses/src/WordsBitmap.h
+++ b/moses/src/WordsBitmap.h
@@ -140,7 +140,7 @@ public:
return m_size;
}
- std::vector<size_t> GetCompressedReprentation() const;
+ std::vector<size_t> GetCompressedRepresentation() const;
inline int Compare (const WordsBitmap &compare) const
{