Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-18 23:12:42 +0400
committerredpony <redpony@1f5c12ca-751b-0410-a591-d2e778427230>2006-07-18 23:12:42 +0400
commit507b8a7df764ff2f2928335baa43ed6d56f7d042 (patch)
tree5d1b279189275fad10da51c9c0f9bbc78d69fd2f /moses
parent5c5d971895c177be55cf6b43acd3c827915f2bb4 (diff)
remove overhead for word deletion
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@177 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/DeletionHypothesis.cpp69
-rw-r--r--moses/src/DeletionHypothesis.h58
-rwxr-xr-xmoses/src/Hypothesis.cpp30
-rw-r--r--moses/src/Makefile.am2
-rwxr-xr-xmoses/src/PhraseDictionary.cpp9
-rwxr-xr-xmoses/src/PhraseDictionary.h5
-rwxr-xr-xmoses/src/StaticData.cpp13
-rwxr-xr-xmoses/src/StaticData.h18
-rwxr-xr-xmoses/src/TypeDef.h4
-rw-r--r--moses/src/WordDeletionTable.cpp54
10 files changed, 25 insertions, 237 deletions
diff --git a/moses/src/DeletionHypothesis.cpp b/moses/src/DeletionHypothesis.cpp
deleted file mode 100644
index 88bcacb99..000000000
--- a/moses/src/DeletionHypothesis.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include "DeletionHypothesis.h"
-
-
-
-/***
- * calculate the score due to source words dropped; set the appropriate elements of m_score
- */
-void DeletionHypothesis::CalcDeletionScore(const Sentence& sourceSentence, const WordsRange& sourceWordsRange, const WordDeletionTable& wordDeletionTable)
-{
- m_score[ScoreType::DeletedWords] = wordDeletionTable.GetDeletionCost(sourceSentence.GetSubString(sourceWordsRange));
-}
-
-/***
- * Set the total-score field from the various individual score parts
- * (not necessarily using all of them)
- */
-void DeletionHypothesis::SumIndividualScores(const StaticData& staticData)
-{
- m_score[ScoreType::Total] = m_score[ScoreType::PhraseTrans]
- + m_score[ScoreType::Generation]
- + m_score[ScoreType::LanguageModelScore]
- + m_score[ScoreType::Distortion] * staticData.GetWeightDistortion()
- + m_score[ScoreType::WordPenalty] * staticData.GetWeightWordPenalty()
- + m_score[ScoreType::DeletedWords]
- + m_score[ScoreType::FutureScoreEnum];
-}
-
-void DeletionHypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source)
-{
- // DISTORTION COST
- CalcDistortionScore();
-
- // LANGUAGE MODEL COST
- CalcLMScore(staticData.GetLanguageModel(Initial), staticData.GetLanguageModel(Other));
-
- // WORD PENALTY
- m_score[ScoreType::WordPenalty] = - (float) GetSize();
-
- // FUTURE COST
- CalcFutureScore(futureScore);
-
- //cost for deleting source words
-// CalcDeletionScore(source, GetCurrSourceWordsRange(), staticData.GetWordDeletionTable());
-
- //LEXICAL REORDERING COST
- CalcLexicalReorderingScore();
-
- // TOTAL COST
- SumIndividualScores(staticData);
-}
diff --git a/moses/src/DeletionHypothesis.h b/moses/src/DeletionHypothesis.h
deleted file mode 100644
index f6df1f64b..000000000
--- a/moses/src/DeletionHypothesis.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#pragma once
-
-#include "Sentence.h"
-#include "WordsBitmap.h"
-#include "WordsRange.h"
-#include "WordDeletionTable.h"
-#include "StaticData.h"
-#include "Hypothesis.h"
-#include "TranslationOption.h"
-
-/***
- * Describe a hypothesis extension that involves translating a source phrase to the empty phrase
- * (ie dropping the source words)
- */
-class DeletionHypothesis : public Hypothesis
-{
- friend class Hypothesis; //for the factory functions
-
- protected:
-
- DeletionHypothesis(const WordsBitmap &initialCoverage) : Hypothesis(Phrase(), initialCoverage) {}
- DeletionHypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt) : Hypothesis(prevHypo, transOpt) {}
- virtual ~DeletionHypothesis() {}
-
- /***
- * calculate the score due to source words dropped; set the appropriate elements of m_score
- */
- void CalcDeletionScore(const Sentence& sourceSentence, const WordsRange& sourceWordsRange, const WordDeletionTable& wordDeletionTable);
-
- /***
- * Set the total-score field from the various individual score parts
- * (not necessarily using all of them)
- */
- virtual void SumIndividualScores(const StaticData& staticData);
-
- public:
-
- virtual void CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source);
-};
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index d70800d9b..fd4327d92 100755
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -29,7 +29,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Arc.h"
#include "SquareMatrix.h"
#include "StaticData.h"
-//#include "DeletionHypothesis.h"
//TODO: add this include in when it compiles
//#include "LexicalReordering.h"
@@ -96,7 +95,7 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
m_score[ScoreType::PhraseTrans] += transOpt.GetTranslationScore();
m_score[ScoreType::FutureScoreEnum] += transOpt.GetFutureScore();
m_score[ScoreType::LanguageModelScore] += transOpt.GetNgramScore();
-// m_wordDeleted = transOpt.IsDeletionOption();
+ m_wordDeleted = transOpt.IsDeletionOption();
#ifdef N_BEST
// language model score (ngram)
@@ -461,16 +460,6 @@ void Hypothesis::CalcDistortionScore()
}
/***
- * calculate the score due to source words dropped; set the appropriate elements of m_score
- */
-void Hypothesis::CalcDeletionScore(const Sentence& sourceSentence, const WordsRange& sourceWordsRange, const WordDeletionTable& wordDeletionTable)
-{
- m_score[ScoreType::DeletedWords] =
- wordDeletionTable.GetDeletionCost(sourceSentence.GetSubString(sourceWordsRange));
-}
-
-
-/***
* calculate the logarithm of our total translation score (sum up components)
*/
void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &futureScore, const Sentence &source)
@@ -490,19 +479,12 @@ void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &fut
//LEXICAL REORDERING COST
CalcLexicalReorderingScore();
- //cost for deleting source words
- if (m_wordDeleted)
- {
- CalcDeletionScore(source, GetCurrSourceWordsRange(), staticData.GetWordDeletionTable());
- }
-
// TOTAL COST
m_score[ScoreType::Total] = m_score[ScoreType::PhraseTrans]
+ m_score[ScoreType::Generation]
+ m_score[ScoreType::LanguageModelScore]
+ m_score[ScoreType::Distortion] * staticData.GetWeightDistortion()
+ m_score[ScoreType::WordPenalty] * staticData.GetWeightWordPenalty()
- + m_score[ScoreType::DeletedWords] * staticData.GetWordDeletionWeight()
+ m_score[ScoreType::FutureScoreEnum];
}
@@ -565,11 +547,17 @@ void Hypothesis::PrintHypothesis(const Sentence &source, float weightDistortion,
cout<<" )"<<endl;
cout<<"\tbase score "<<m_prevHypo->m_score[ScoreType::Total]<<endl;
cout<<"\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "<< source.GetSubString(m_currSourceWordsRange) <<endl;
- cout<<"\ttranslated as: "<<m_targetPhrase<<" => translation cost "<<m_score[ScoreType::PhraseTrans]<<endl;
+ cout<<"\ttranslated as: "<<m_targetPhrase<<" => translation cost "<<m_score[ScoreType::PhraseTrans];
+ if (m_wordDeleted) cout <<" word_deleted";
+ cout<<endl;
cout<<"\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange()) << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
cout<<"\tlanguage model cost "<<m_score[ScoreType::LanguageModelScore]<<endl;
- cout<<"\tword penalty "<<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<< "\tdeletion cost "<<m_score[ScoreType::DeletedWords] << endl;
+ cout<<"\tword penalty "<<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
cout<<"\tscore "<<m_score[ScoreType::Total] - m_score[ScoreType::FutureScoreEnum]<<" + future cost "<<m_score[ScoreType::FutureScoreEnum]<<" = "<<m_score[ScoreType::Total]<<endl;
+ cout<<"\tscore "<<m_score[ScoreType::Total] - m_score[ScoreType::FutureScoreEnum]<<" + future cost "<<m_score[ScoreType::FutureScoreEnum]<<" = "<<m_score[ScoreType::Total]<<endl;
+#if N_BEST
+ cout<<"\tweighted feature scores: " << this->GetScoreComponent() << endl;
+#endif
//PrintLMScores();
}
diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am
index acb560f65..94f4e189c 100644
--- a/moses/src/Makefile.am
+++ b/moses/src/Makefile.am
@@ -2,7 +2,6 @@ lib_LIBRARIES = libmoses.a
libmoses_a_SOURCES = \
Arc.cpp \
ConfusionNet.cpp \
- DeletionHypothesis.cpp \
Factor.cpp \
FactorCollection.cpp \
FactorTypeSet.cpp \
@@ -32,7 +31,6 @@ libmoses_a_SOURCES = \
Util.cpp \
WeightOptimization.cpp \
Word.cpp \
- WordDeletionTable.cpp \
WordsBitmap.cpp \
WordsRange.cpp
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
index 2afcce5b7..caf1bde6e 100755
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Word.h"
#include "Util.h"
#include "InputFileStream.h"
+#include "StaticData.h"
using namespace std;
@@ -42,7 +43,8 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
, bool filter
, const list< Phrase > &inputPhraseList
, const LMList &languageModels
- , float weightWP)
+ , float weightWP
+ , const StaticData& staticData)
{
m_maxTargetPhrase = maxTargetPhrase;
@@ -76,8 +78,9 @@ void PhraseDictionary::Load(const std::vector<FactorType> &input
TRACE_ERR("Syntax error at " << filePath << ":" << line_num);
abort(); // TODO- error handling
}
- if (tokens[1].find_first_not_of(" \t", 0) == string::npos) {
- TRACE_ERR(filePath << ":" << line_num << ": phrase contains empty target, skipping\n");
+ bool isLHSEmpty = (tokens[1].find_first_not_of(" \t", 0) == string::npos);
+ if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
+ TRACE_ERR(filePath << ":" << line_num << ": pt entry contains empty target, skipping\n");
continue;
}
if (!filter)
diff --git a/moses/src/PhraseDictionary.h b/moses/src/PhraseDictionary.h
index a0cc94e09..d899fd2e3 100755
--- a/moses/src/PhraseDictionary.h
+++ b/moses/src/PhraseDictionary.h
@@ -32,6 +32,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
typedef std::list <TargetPhrase> TargetPhraseCollection;
+class StaticData;
+
class PhraseDictionary : public Dictionary
{
friend std::ostream& operator<<(std::ostream&, const PhraseDictionary&);
@@ -67,7 +69,8 @@ public:
, bool filter
, const std::list< Phrase > &inputPhraseList
, const LMList &languageModels
- , float weightWP);
+ , float weightWP
+ , const StaticData& staticData);
size_t GetSize() const
{
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 91e02f4c5..c7ea32a28 100755
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -96,9 +96,8 @@ bool StaticData::LoadParameters(int argc, char* argv[])
}
//source word deletion
- if(m_parameter.GetParam("dtable-file").size() > 0)
+ if(m_parameter.GetParam("word-deletion").size() > 0)
{
- m_wordDeletionWeight = Scan<float>(m_parameter.GetParam("weight-e")[0]);
m_wordDeletionEnabled = true;
if (GetVerboseLevel() > 0) { std::cerr << "Word deletion enabled." << std::endl; }
} else { m_wordDeletionEnabled = false; }
@@ -471,19 +470,13 @@ void StaticData::LoadPhraseTables(bool filter
, filterPhrase
, inputPhraseList
, this->GetLanguageModel(Initial)
- , this->GetWeightWordPenalty());
+ , this->GetWeightWordPenalty()
+ , *this);
index++;
timer.check("Finished loading PhraseTable");
}
}
-/*
- //load word deletion table
- if(m_parameter.GetParam("dtable-file").size() > 0)
- {
- m_wordDeletionTable.Load(m_parameter.GetParam("dtable-file")[0], *this);
- }
-*/
timer.check("Finished loading phrase tables");
}
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index 0ab55b46a..dafc1a9ea 100755
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -34,7 +34,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "InputOutput.h"
#include "DecodeStep.h"
//#include "UnknownWordHandler.h"
-#include "WordDeletionTable.h"
class StaticData
{
@@ -42,7 +41,6 @@ protected:
FactorCollection m_factorCollection;
std::vector<PhraseDictionary*> m_phraseDictionary;
std::vector<GenerationDictionary*> m_generationDictionary;
- WordDeletionTable m_wordDeletionTable;
std::list < DecodeStep > m_decodeStepList;
Parameter m_parameter;
std::vector<FactorType> m_inputFactorOrder;
@@ -52,7 +50,7 @@ protected:
// Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed
float m_beamThreshold
- ,m_weightDistortion, m_weightWordPenalty, m_wordDeletionWeight;
+ ,m_weightDistortion, m_weightWordPenalty;
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
int m_maxDistortion;
// do it differently from old pharaoh
@@ -146,11 +144,7 @@ public:
{
return m_weightWordPenalty;
}
- float GetWordDeletionWeight() const
- {
- return m_wordDeletionWeight;
- }
- bool LittleChrisAsksWhetherWordDeletionIsEnabledAndWeAnswerHim() const
+ bool IsWordDeletionEnabled() const
{
return m_wordDeletionEnabled;
}
@@ -191,14 +185,6 @@ public:
{
return m_cachePath;
}
- /***
- * only call this if word deletion is enabled
- */
- const WordDeletionTable& GetWordDeletionTable() const
- {
- return m_wordDeletionTable;
- }
-
size_t GetVerboseLevel() const
{
return m_verboseLevel;
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index 4c62356fa..2bdfc1e4e 100755
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -110,8 +110,6 @@ namespace ScoreType {
LanguageModelScore,
Distortion,
WordPenalty,
- DeletedWords, //source words dropped from translation
- InsertedWords, //words inserted into target phrase independently of phrase translation
FutureScoreEnum,
LexicalReordering,
Total
@@ -119,7 +117,7 @@ namespace ScoreType {
}
// count of above
-const size_t NUM_SCORES = 10;
+const size_t NUM_SCORES = 8;
namespace LexReorderType
{
diff --git a/moses/src/WordDeletionTable.cpp b/moses/src/WordDeletionTable.cpp
deleted file mode 100644
index 201652d6f..000000000
--- a/moses/src/WordDeletionTable.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <cstdlib>
-#include <iostream>
-using std::ifstream;
-#include <vector>
-using std::vector;
-#include "TypeDef.h"
-#include "StaticData.h"
-#include "WordDeletionTable.h"
-using std::string;
-
-void WordDeletionTable::Load(const string& filename, StaticData& staticData)
-{
- std::cout << "in WordDeletionTable::Load()" << std::endl;
- ifstream infile(filename.c_str());
- if(!infile)
- {
- std::cerr << "WordDeletionTable::Load(): can't open '" << filename << "' for read; exiting" << std::endl;
- exit(-1);
- }
-
- //each line is of format PHRASE ||| DELETION_COST
- string line;
- while(getline(infile, line, '\n'))
- {
- vector<string> token = TokenizeMultiCharSeparator(line, "|||");
- //parse phrase
- Phrase sourcePhrase(Input);
- const std::vector<FactorType>& input = staticData.GetInputFactorOrder();
- sourcePhrase.CreateFromString(input, token[0], staticData.GetFactorCollection());
- //parse cost
- m_deletionCosts[sourcePhrase] = Scan<float>(token[1]);
- std::cout << "dtable entry: " << sourcePhrase << " -> " << m_deletionCosts[sourcePhrase] << std::endl;
- }
- infile.close();
-}