Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorEva <eva@deimos.(none)>2012-05-28 10:03:45 +0400
committerEva <eva@deimos.(none)>2012-05-28 10:03:45 +0400
commitd81ebe2337c0de18aa6d510bf13c408b83a083ca (patch)
treea37087830326bc26288d8d436ea1fba315029dbf /moses
parentb8d68b455353d008395d2c53192ab566b07e678b (diff)
Fix nbest lists for chart mira
Diffstat (limited to 'moses')
-rwxr-xr-xmoses/src/BleuScoreFeature.cpp23
-rwxr-xr-xmoses/src/ChartManager.cpp52
-rwxr-xr-xmoses/src/Parameter.cpp2
-rwxr-xr-xmoses/src/StaticData.cpp12
-rwxr-xr-xmoses/src/StaticData.h3
5 files changed, 45 insertions, 47 deletions
diff --git a/moses/src/BleuScoreFeature.cpp b/moses/src/BleuScoreFeature.cpp
index f1c888e70..dbeabe8b9 100755
--- a/moses/src/BleuScoreFeature.cpp
+++ b/moses/src/BleuScoreFeature.cpp
@@ -20,30 +20,13 @@ BleuScoreState::BleuScoreState(): m_words(1),
int BleuScoreState::Compare(const FFState& o) const
{
if (&o == this)
- return 0;
+ return 0;
const BleuScoreState& other = dynamic_cast<const BleuScoreState&>(o);
-
- /*if (m_target_length < other.m_target_length)
- return -1;
- if (m_target_length > other.m_target_length)
- return 1;*/
-
int c = m_words.Compare(other.m_words);
-
if (c != 0)
- return c;
-
- for(size_t i = 0; i < m_ngram_counts.size(); i++) {
- if (m_ngram_counts[i] < other.m_ngram_counts[i])
- return -1;
- if (m_ngram_counts[i] > other.m_ngram_counts[i])
- return 1;
- if (m_ngram_matches[i] < other.m_ngram_matches[i])
- return -1;
- if (m_ngram_matches[i] > other.m_ngram_matches[i])
- return 1;
- }
+ return c;
+
return 0;
}
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state) {
diff --git a/moses/src/ChartManager.cpp b/moses/src/ChartManager.cpp
index b2ed3a435..ad8f0dc84 100755
--- a/moses/src/ChartManager.cpp
+++ b/moses/src/ChartManager.cpp
@@ -141,7 +141,7 @@ const ChartHypothesis *ChartManager::GetBestHypothesis() const
}
}
-void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDistinct) const
+void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret, bool onlyDistinct) const
{
size_t size = m_source.GetSize();
if (count == 0 || size == 0)
@@ -158,21 +158,16 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDi
boost::shared_ptr<ChartTrellisPath> basePath(new ChartTrellisPath(*hypo));
// Add it to the n-best list.
- ret.Add(basePath);
if (count == 1) {
+ ret.Add(basePath);
return;
}
- // Record the output phrase if distinct translations are required.
- set<Phrase> distinctHyps;
- if (onlyDistinct) {
- distinctHyps.insert(basePath->GetOutputPhrase());
- }
-
// Set a limit on the number of detours to pop. If the n-best list is
// restricted to distinct translations then this limit should be bigger
// than n. The n-best factor determines how much bigger the limit should be.
- const size_t nBestFactor = StaticData::Instance().GetNBestFactor();
+ const StaticData &staticData = StaticData::Instance();
+ const size_t nBestFactor = staticData.GetNBestFactor();
size_t popLimit;
if (!onlyDistinct) {
popLimit = count-1;
@@ -188,36 +183,47 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDi
// contain no more than popLimit items.
ChartTrellisDetourQueue contenders(popLimit);
- // Create a ChartTrellisDetour for each single-point deviation from basePath
- // and add them to the queue.
- CreateDeviantPaths(basePath, contenders);
-
+ // Get all complete translations
+ Word *w = new Word();
+ w->CreateFromString(Output, staticData.GetOutputFactorOrder(), "TOP", true);
+ const HypoList topHypos = lastCell.GetSortedHypotheses(*w);
+
+ // Create a ChartTrellisDetour for each complete translation and add it to the queue
+ for (size_t i=0; i<topHypos.size(); ++i) {
+ const ChartHypothesis &hypo = *(topHypos[i]);
+ boost::shared_ptr<ChartTrellisPath> basePath(new ChartTrellisPath(hypo));
+ ChartTrellisDetour *detour = new ChartTrellisDetour(basePath, basePath->GetFinalNode(), hypo);
+ contenders.Push(detour);
+ }
+
+ // Record the output phrase if distinct translations are required.
+ set<Phrase> distinctHyps;
+
// MAIN loop
- for (size_t i = 0;
- ret.GetSize() < count && !contenders.Empty() && i < popLimit;
- ++i) {
+ for (size_t i = 0; ret.GetSize() < count && !contenders.Empty() && i < popLimit; ++i) {
// Get the best detour from the queue.
std::auto_ptr<const ChartTrellisDetour> detour(contenders.Pop());
CHECK(detour.get());
// Create a full base path from the chosen detour.
- basePath.reset(new ChartTrellisPath(*detour));
-
+ //basePath.reset(new ChartTrellisPath(*detour));
+ boost::shared_ptr<ChartTrellisPath> path(new ChartTrellisPath(*detour));
+
// Generate new detours from this base path and add them to the queue of
// contenders. The new detours deviate from the base path by a single
// replacement along the previous detour sub-path.
- CHECK(basePath->GetDeviationPoint());
- CreateDeviantPaths(basePath, *(basePath->GetDeviationPoint()), contenders);
+ CHECK(path->GetDeviationPoint());
+ CreateDeviantPaths(path, *(path->GetDeviationPoint()), contenders);
// If the n-best list is allowed to contain duplicate translations (at the
// surface level) then add the new path unconditionally, otherwise check
// whether the translation has seen before.
if (!onlyDistinct) {
- ret.Add(basePath);
+ ret.Add(path);
} else {
- Phrase tgtPhrase = basePath->GetOutputPhrase();
+ Phrase tgtPhrase = path->GetOutputPhrase();
if (distinctHyps.insert(tgtPhrase).second) {
- ret.Add(basePath);
+ ret.Add(path);
}
}
}
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index 64b842074..a24c6033c 100755
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -48,6 +48,7 @@ Parameter::Parameter()
AddParam("dlm-model", "Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
AddParam("drop-unknown", "du", "drop unknown words instead of copying them");
AddParam("disable-discarding", "dd", "disable hypothesis discarding");
+ AddParam("distinct-nbest", "only distinct translations in nbest list");
AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
AddParam("generation-file", "location and properties of the generation table");
AddParam("global-lexical-file", "gl", "discriminatively trained global lexical translation model file");
@@ -116,6 +117,7 @@ Parameter::Parameter()
AddParam("xml-brackets", "xb", "specify strings to be used as xml tags opening and closing, e.g. \"{{ }}\" (default \"< >\"). Avoid square brackets because of configuration file format. Valid only with text input mode" );
AddParam("minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
AddParam("lminimum-bayes-risk", "lmbr", "use lattice miminum Bayes risk to determine best translation");
+ AddParam("mira", "do mira training");
AddParam("consensus-decoding", "con", "use consensus decoding (De Nero et. al. 2009)");
AddParam("mbr-size", "number of translation candidates considered in MBR decoding (default 200)");
AddParam("mbr-scale", "scaling factor to convert log linear score probability in MBR decoding (default 1.0)");
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 90418ace4..dc8c10d60 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -246,6 +246,9 @@ bool StaticData::LoadData(Parameter *parameter)
} else {
m_nBestFactor = 20;
}
+
+ // explicit setting of distinct nbest
+ SetBooleanParameter( &m_onlyDistinctNBest, "distinct-nbest", false);
//lattice samples
if (m_parameter->GetParam("lattice-samples").size() ==2 ) {
@@ -451,6 +454,9 @@ bool StaticData::LoadData(Parameter *parameter)
cerr << "Errror: Cannot use both n-best mbr and lattice mbr together" << endl;
exit(1);
}
+
+ //mira training
+ SetBooleanParameter( &m_mira, "mira", false );
if (m_useLatticeMBR) m_mbr = true;
@@ -1793,7 +1799,7 @@ bool StaticData::LoadTargetWordInsertionFeature()
return false;
}
- if (!m_UseAlignmentInfo) {
+ if (!m_UseAlignmentInfo && GetSearchAlgorithm() != ChartDecoding) {
UserMessage::Add("Target word insertion feature needs word alignments in phrase table.");
return false;
}
@@ -1832,7 +1838,7 @@ bool StaticData::LoadSourceWordDeletionFeature()
return false;
}
- if (!m_UseAlignmentInfo) {
+ if (!m_UseAlignmentInfo && GetSearchAlgorithm() != ChartDecoding) {
UserMessage::Add("Source word deletion feature needs word alignments in phrase table.");
return false;
}
@@ -1878,7 +1884,7 @@ bool StaticData::LoadWordTranslationFeature()
return false;
}
- if (!m_UseAlignmentInfo) {
+ if (!m_UseAlignmentInfo && GetSearchAlgorithm() != ChartDecoding) {
UserMessage::Add("Word translation feature needs word alignments in phrase table.");
return false;
}
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index cd0b69f4d..1b8075e25 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -184,6 +184,7 @@ protected:
bool m_mbr; //! use MBR decoder
bool m_useLatticeMBR; //! use MBR decoder
+ bool m_mira; // do mira training
bool m_useConsensusDecoding; //! Use Consensus decoding (DeNero et al 2009)
size_t m_mbrSize; //! number of translation candidates considered
float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation
@@ -454,7 +455,7 @@ public:
return m_nBestFilePath;
}
bool IsNBestEnabled() const {
- return (!m_nBestFilePath.empty()) || m_mbr || m_useLatticeMBR || m_outputSearchGraph || m_useConsensusDecoding || !m_latticeSamplesFilePath.empty()
+ return (!m_nBestFilePath.empty()) || m_mbr || m_useLatticeMBR || m_mira || m_outputSearchGraph || m_useConsensusDecoding || !m_latticeSamplesFilePath.empty()
#ifdef HAVE_PROTOBUF
|| m_outputSearchGraphPB
#endif