Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2015-02-13 05:22:37 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2015-02-13 05:22:37 +0300
commitfa3f82a04db309dddda1c3e89fc57e4339b19fce (patch)
tree991e8c6269a40a5c1b8cc6c139ea158278d142aa /moses
parent31da9e8a01693b2afca1048159b2d0be4cca8353 (diff)
parent3fb8f58b4e88e7db9dae26e5c1768a70a0845b8f (diff)
Merge branch 'master' into sampled-lexicalized-reordering
Diffstat (limited to 'moses')
-rw-r--r--moses/ConfusionNet.cpp2
-rw-r--r--moses/FF/BleuScoreFeature.cpp2
-rw-r--r--moses/FF/InputFeature.cpp14
-rw-r--r--moses/FF/OSM-Feature/KenOSM.h2
-rw-r--r--moses/LM/Ken.cpp15
-rw-r--r--moses/Parameter.cpp2
-rw-r--r--moses/Sentence.cpp6
-rw-r--r--moses/StaticData.cpp6
-rw-r--r--moses/StaticData.h9
-rw-r--r--moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp17
-rw-r--r--moses/TranslationTask.cpp2
-rw-r--r--moses/TypeDef.h5
-rw-r--r--moses/WordLattice.h4
13 files changed, 49 insertions, 37 deletions
diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp
index ce0d5e1c0..f6d7b4168 100644
--- a/moses/ConfusionNet.cpp
+++ b/moses/ConfusionNet.cpp
@@ -67,7 +67,7 @@ ConfusionNet()
stats.createOne();
const StaticData& staticData = StaticData::Instance();
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 5be3b0b6b..24887c373 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -27,7 +27,7 @@ int BleuScoreState::Compare(const FFState& o) const
if (&o == this)
return 0;
- if (StaticData::Instance().IsChart())
+ if (StaticData::Instance().IsSyntax())
return 0;
const BleuScoreState& other = dynamic_cast<const BleuScoreState&>(o);
diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp
index 39535f58f..10e5347e4 100644
--- a/moses/FF/InputFeature.cpp
+++ b/moses/FF/InputFeature.cpp
@@ -52,15 +52,15 @@ void InputFeature::EvaluateWithSourceContext(const InputType &input
, ScoreComponentCollection *estimatedFutureScore) const
{
if (m_legacy) {
- //binary phrase-table does input feature itself
- return;
+ //binary phrase-table does input feature itself
+ return;
}
- /*
- const ScorePair *scores = inputPath.GetInputScore();
- if (scores) {
- scoreBreakdown.PlusEquals(this, *scores);
+ else if (input.GetType() == WordLatticeInput){
+ const ScorePair *scores = inputPath.GetInputScore();
+ if (scores) {
+ scoreBreakdown.PlusEquals(this, *scores);
+ }
}
- */
}
} // namespace
diff --git a/moses/FF/OSM-Feature/KenOSM.h b/moses/FF/OSM-Feature/KenOSM.h
index a50589edc..03deead07 100644
--- a/moses/FF/OSM-Feature/KenOSM.h
+++ b/moses/FF/OSM-Feature/KenOSM.h
@@ -10,6 +10,8 @@ namespace Moses
class KenOSMBase
{
public:
+ virtual ~KenOSMBase() {}
+
virtual float Score(const lm::ngram::State&, const std::string&,
lm::ngram::State&) const = 0;
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 71f300481..e7267f66c 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -146,6 +146,8 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
:LanguageModel(line)
,m_factorType(factorType)
{
+ ReadParameters();
+
lm::ngram::Config config;
IFVERBOSE(1) {
config.messages = &std::cerr;
@@ -441,15 +443,18 @@ bool LanguageModelKen<Model>::IsUseable(const FactorMask &mask) const
return ret;
}
-LanguageModel *ConstructKenLM(const std::string &line)
+LanguageModel *ConstructKenLM(const std::string &lineOrig)
{
FactorType factorType = 0;
string filePath;
bool lazy = false;
- util::TokenIter<util::SingleCharacter, true> argument(line, ' ');
+ util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
++argument; // KENLM
+ stringstream line;
+ line << "KENLM";
+
for (; argument; ++argument) {
const char *equals = std::find(argument->data(), argument->data() + argument->size(), '=');
UTIL_THROW_IF2(equals == argument->data() + argument->size(),
@@ -465,12 +470,12 @@ LanguageModel *ConstructKenLM(const std::string &line)
} else if (name == "lazyken") {
lazy = boost::lexical_cast<bool>(value);
} else {
- // that's ok. do nothing, passes onto LM constructor
- //UTIL_THROW2("Unknown KenLM argument " << name);
+ // pass to base class to interpret
+ line << " " << name << "=" << value;
}
}
- return ConstructKenLM(line, filePath, factorType, lazy);
+ return ConstructKenLM(line.str(), filePath, factorType, lazy);
}
LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index c5677b73b..6052624cc 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -978,7 +978,7 @@ void Parameter::WeightOverwrite()
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
- if (starts_with(tok, "=")) {
+ if (ends_with(tok, "=")) {
// start of new feature
if (name != "") {
diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp
index 58d650aa3..a937f21e3 100644
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@@ -43,7 +43,7 @@ Sentence::Sentence()
, InputType()
{
const StaticData& staticData = StaticData::Instance();
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
}
}
@@ -168,7 +168,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
if (staticData.GetXmlInputType() != XmlPassThrough) {
int offset = 0;
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
offset = 1;
}
@@ -188,7 +188,7 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
// placeholders
ProcessPlaceholders(placeholders);
- if (staticData.IsChart()) {
+ if (staticData.IsSyntax()) {
InitStartEndWord();
}
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 2fad752da..8709d758f 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -112,7 +112,7 @@ bool StaticData::LoadData(Parameter *parameter)
// to cube or not to cube
m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
- if (IsChart())
+ if (IsSyntax())
LoadChartDecodingParameters();
// input type has to be specified BEFORE loading the phrase tables!
@@ -698,7 +698,7 @@ void StaticData::LoadDecodeGraphsOld(const vector<string> &mappingVector, const
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
DecodeGraph *decodeGraph;
- if (IsChart()) {
+ if (IsSyntax()) {
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
@@ -765,7 +765,7 @@ void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVect
UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
DecodeGraph *decodeGraph;
- if (IsChart()) {
+ if (IsSyntax()) {
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
diff --git a/moses/StaticData.h b/moses/StaticData.h
index 193f79aad..d9a96aaa3 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -436,8 +436,13 @@ public:
SearchAlgorithm GetSearchAlgorithm() const {
return m_searchAlgorithm;
}
- bool IsChart() const {
- return m_searchAlgorithm == CYKPlus || m_searchAlgorithm == ChartIncremental;
+ bool IsSyntax() const {
+ return m_searchAlgorithm == CYKPlus ||
+ m_searchAlgorithm == ChartIncremental ||
+ m_searchAlgorithm == SyntaxS2T ||
+ m_searchAlgorithm == SyntaxT2S ||
+ m_searchAlgorithm == SyntaxT2S_SCFG ||
+ m_searchAlgorithm == SyntaxF2S;
}
const ScoreComponentCollection& GetAllWeights() const {
diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
index 90d5575a1..9c3f6b513 100644
--- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
+++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp
@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <queue>
#include <algorithm>
#include <sys/stat.h>
+#include <boost/algorithm/string/predicate.hpp>
#include "PhraseDictionaryCompact.h"
#include "moses/FactorCollection.h"
@@ -37,6 +38,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "util/exception.hh"
using namespace std;
+using namespace boost::algorithm;
namespace Moses
{
@@ -63,18 +65,9 @@ void PhraseDictionaryCompact::Load()
std::string tFilePath = m_filePath;
std::string suffix = ".minphr";
- if(tFilePath.substr(tFilePath.length() - suffix.length(), suffix.length()) == suffix) {
- if(!FileExists(tFilePath)) {
- throw runtime_error("Error: File " + tFilePath + " does not exit.");
- exit(1);
- }
- } else {
- if(FileExists(tFilePath + suffix)) {
- tFilePath += suffix;
- } else {
- throw runtime_error("Error: File " + tFilePath + ".minphr does not exit.");
- }
- }
+ if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
+ if (!FileExists(tFilePath))
+ throw runtime_error("Error: File " + tFilePath + " does not exist.");
m_phraseDecoder = new PhraseDecoder(*this, &m_input, &m_output,
m_numScoreComponents, &m_weight);
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index 7c629db7f..eff0588b6 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -61,7 +61,7 @@ void TranslationTask::Run()
// which manager
BaseManager *manager;
- if (!staticData.IsChart()) {
+ if (!staticData.IsSyntax()) {
// phrase-based
manager = new Manager(*m_source);
} else if (staticData.GetSearchAlgorithm() == SyntaxF2S ||
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index 0a1e1ad9b..a619639bc 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -122,7 +122,7 @@ enum InputTypeEnum {
,ConfusionNetworkInput = 1
,WordLatticeInput = 2
,TreeInputType = 3
- ,WordLatticeInput2 = 4
+ //,WordLatticeInput2 = 4
, TabbedSentenceInput = 5
,ForestInputType = 6
};
@@ -140,6 +140,9 @@ enum DictionaryFind {
,All = 1
};
+// Note: StaticData uses SearchAlgorithm to determine whether the translation
+// model is phrase-based or syntax-based. If you add a syntax-based search
+// algorithm here then you should also update StaticData::IsSyntax().
enum SearchAlgorithm {
Normal = 0
,CubePruning = 1
diff --git a/moses/WordLattice.h b/moses/WordLattice.h
index 325271234..dc1582d78 100644
--- a/moses/WordLattice.h
+++ b/moses/WordLattice.h
@@ -21,6 +21,10 @@ private:
public:
WordLattice();
+
+ InputTypeEnum GetType() const
+ { return WordLatticeInput; }
+
size_t GetColumnIncrement(size_t ic, size_t j) const;
void Print(std::ostream&) const;
/** Get shortest path between two nodes