Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2017-05-25 01:22:04 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2017-05-25 01:22:04 +0300
commit0eec9270f2f70226e43409fef93860f9142f0d22 (patch)
tree076aec75d4dee0006cb747c837ea296ec1ee7d78 /moses
parent66bd0452308f9a99c13652f6d5b400b908ee714b (diff)
parentb8de7c352840a9786af832774831c51c3863ec60 (diff)
Merge branch 'master' of https://github.com/moses-smt/mosesdecoder
Diffstat (limited to 'moses')
-rw-r--r--moses/FF/ExampleStatefulFF.cpp (renamed from moses/FF/SkeletonStatefulFF.cpp)20
-rw-r--r--moses/FF/ExampleStatefulFF.h (renamed from moses/FF/SkeletonStatefulFF.h)12
-rw-r--r--moses/FF/ExampleStatelessFF.cpp (renamed from moses/FF/SkeletonStatelessFF.cpp)16
-rw-r--r--moses/FF/ExampleStatelessFF.h (renamed from moses/FF/SkeletonStatelessFF.h)4
-rw-r--r--moses/FF/ExampleTranslationOptionListFeature.h (renamed from moses/FF/SkeletonTranslationOptionListFeature.h)4
-rw-r--r--moses/FF/Factory.cpp26
-rw-r--r--moses/GenerationDictionary.cpp9
-rw-r--r--moses/Jamfile4
-rw-r--r--moses/LM/ExampleLM.cpp (renamed from moses/LM/SkeletonLM.cpp)8
-rw-r--r--moses/LM/ExampleLM.h (renamed from moses/LM/SkeletonLM.h)6
-rw-r--r--moses/LM/Jamfile2
-rw-r--r--moses/StaticData.cpp9
-rw-r--r--moses/TrainingTask.h12
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp (renamed from moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp)20
-rw-r--r--moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h (renamed from moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h)14
-rw-r--r--moses/TranslationModel/ExamplePT.cpp (renamed from moses/TranslationModel/SkeletonPT.cpp)26
-rw-r--r--moses/TranslationModel/ExamplePT.h (renamed from moses/TranslationModel/SkeletonPT.h)6
-rw-r--r--moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp2
-rw-r--r--moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp1
-rw-r--r--moses/TranslationModel/PhraseDictionaryTransliteration.cpp1
-rw-r--r--moses/TranslationModel/ProbingPT.cpp (renamed from moses/TranslationModel/ProbingPT/ProbingPT.cpp)33
-rw-r--r--moses/TranslationModel/ProbingPT.h (renamed from moses/TranslationModel/ProbingPT/ProbingPT.h)15
-rw-r--r--moses/TranslationModel/ProbingPT/Jamfile8
-rw-r--r--moses/TranslationModel/ProbingPT/StoreTarget.cpp264
-rw-r--r--moses/TranslationModel/ProbingPT/StoreTarget.h51
-rw-r--r--moses/TranslationModel/ProbingPT/StoreVocab.cpp13
-rw-r--r--moses/TranslationModel/ProbingPT/StoreVocab.h60
-rw-r--r--moses/TranslationModel/ProbingPT/hash.cpp44
-rw-r--r--moses/TranslationModel/ProbingPT/hash.hh17
-rw-r--r--moses/TranslationModel/ProbingPT/line_splitter.cpp103
-rw-r--r--moses/TranslationModel/ProbingPT/line_splitter.hh57
-rw-r--r--moses/TranslationModel/ProbingPT/probing_hash_utils.cpp50
-rw-r--r--moses/TranslationModel/ProbingPT/probing_hash_utils.hh51
-rw-r--r--moses/TranslationModel/ProbingPT/querying.cpp141
-rw-r--r--moses/TranslationModel/ProbingPT/querying.hh66
-rw-r--r--moses/TranslationModel/ProbingPT/storing.cpp298
-rw-r--r--moses/TranslationModel/ProbingPT/storing.hh92
-rw-r--r--moses/TranslationModel/ProbingPT/vocabid.cpp59
-rw-r--r--moses/TranslationModel/ProbingPT/vocabid.hh29
-rw-r--r--moses/parameters/AllOptions.cpp5
-rw-r--r--moses/parameters/CubePruningOptions.cpp5
-rw-r--r--moses/parameters/InputOptions.cpp5
-rw-r--r--moses/parameters/NBestOptions.cpp5
-rw-r--r--moses/parameters/OOVHandlingOptions.cpp5
-rw-r--r--moses/parameters/OptionsBaseClass.cpp5
-rw-r--r--moses/parameters/ReportingOptions.cpp14
-rw-r--r--moses/parameters/SearchOptions.cpp5
-rw-r--r--moses/parameters/SyntaxOptions.cpp5
-rw-r--r--moses/server/TranslationRequest.cpp19
-rw-r--r--moses/server/TranslationRequest.h4
50 files changed, 216 insertions, 1514 deletions
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/ExampleStatefulFF.cpp
index 2acaf2d2e..5a53c4f87 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/ExampleStatefulFF.cpp
@@ -1,5 +1,5 @@
#include <vector>
-#include "SkeletonStatefulFF.h"
+#include "ExampleStatefulFF.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/Hypothesis.h"
@@ -9,7 +9,7 @@ namespace Moses
{
////////////////////////////////////////////////////////////////
-SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
+ExampleStatefulFF::ExampleStatefulFF(const std::string &line)
:StatefulFeatureFunction(3, line)
{
ReadParameters();
@@ -19,7 +19,7 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
-void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
+void ExampleStatefulFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
@@ -28,7 +28,7 @@ void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
-void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
+void ExampleStatefulFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -39,11 +39,11 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
// An empty implementation of this function is provided by StatefulFeatureFunction.
// Unless you are actually implementing this, please remove it from your
// implementation (and the declaration in the header file to reduce code clutter.
-void SkeletonStatefulFF::EvaluateTranslationOptionListWithSourceContext
+void ExampleStatefulFF::EvaluateTranslationOptionListWithSourceContext
(const InputType &input, const TranslationOptionList &translationOptionList) const
{}
-FFState* SkeletonStatefulFF::EvaluateWhenApplied(
+FFState* ExampleStatefulFF::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
@@ -59,18 +59,18 @@ FFState* SkeletonStatefulFF::EvaluateWhenApplied(
accumulator->PlusEquals(this, "sparse-name", 2.4);
// int targetLen = cur_hypo.GetCurrTargetPhrase().GetSize(); // ??? [UG]
- return new SkeletonState(0);
+ return new ExampleState(0);
}
-FFState* SkeletonStatefulFF::EvaluateWhenApplied(
+FFState* ExampleStatefulFF::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
- return new SkeletonState(0);
+ return new ExampleState(0);
}
-void SkeletonStatefulFF::SetParameter(const std::string& key, const std::string& value)
+void ExampleStatefulFF::SetParameter(const std::string& key, const std::string& value)
{
if (key == "arg") {
// set value here
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/ExampleStatefulFF.h
index 7544ddd30..d66274295 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/ExampleStatefulFF.h
@@ -7,11 +7,11 @@
namespace Moses
{
-class SkeletonState : public FFState
+class ExampleState : public FFState
{
int m_targetLen;
public:
- SkeletonState(int targetLen)
+ ExampleState(int targetLen)
:m_targetLen(targetLen) {
}
@@ -19,22 +19,22 @@ public:
return (size_t) m_targetLen;
}
virtual bool operator==(const FFState& o) const {
- const SkeletonState& other = static_cast<const SkeletonState&>(o);
+ const ExampleState& other = static_cast<const ExampleState&>(o);
return m_targetLen == other.m_targetLen;
}
};
-class SkeletonStatefulFF : public StatefulFeatureFunction
+class ExampleStatefulFF : public StatefulFeatureFunction
{
public:
- SkeletonStatefulFF(const std::string &line);
+ ExampleStatefulFF(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
- return new SkeletonState(0);
+ return new ExampleState(0);
}
// An empty implementation of this function is provided by StatefulFeatureFunction.
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/ExampleStatelessFF.cpp
index 8474efe76..0e62ad0ad 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/ExampleStatelessFF.cpp
@@ -1,5 +1,5 @@
#include <vector>
-#include "SkeletonStatelessFF.h"
+#include "ExampleStatelessFF.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TargetPhrase.h"
@@ -7,13 +7,13 @@ using namespace std;
namespace Moses
{
-SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line)
+ExampleStatelessFF::ExampleStatelessFF(const std::string &line)
:StatelessFeatureFunction(2, line)
{
ReadParameters();
}
-void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
+void ExampleStatelessFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
@@ -29,7 +29,7 @@ void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
}
-void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
+void ExampleStatelessFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
@@ -43,20 +43,20 @@ void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
}
}
-void SkeletonStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
+void ExampleStatelessFF::EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
-void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
+void ExampleStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
-void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
+void ExampleStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
-void SkeletonStatelessFF::SetParameter(const std::string& key, const std::string& value)
+void ExampleStatelessFF::SetParameter(const std::string& key, const std::string& value)
{
if (key == "arg") {
// set value here
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/ExampleStatelessFF.h
index 0dc46e214..e1f007d21 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/ExampleStatelessFF.h
@@ -6,10 +6,10 @@
namespace Moses
{
-class SkeletonStatelessFF : public StatelessFeatureFunction
+class ExampleStatelessFF : public StatelessFeatureFunction
{
public:
- SkeletonStatelessFF(const std::string &line);
+ ExampleStatelessFF(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
return true;
diff --git a/moses/FF/SkeletonTranslationOptionListFeature.h b/moses/FF/ExampleTranslationOptionListFeature.h
index e47e691aa..7686eb3ff 100644
--- a/moses/FF/SkeletonTranslationOptionListFeature.h
+++ b/moses/FF/ExampleTranslationOptionListFeature.h
@@ -6,10 +6,10 @@
namespace Moses
{
-class SkeletonTranslationOptionListFeature : public StatelessFeatureFunction
+class ExampleTranslationOptionListFeature : public StatelessFeatureFunction
{
public:
- SkeletonTranslationOptionListFeature(const std::string &line)
+ ExampleTranslationOptionListFeature(const std::string &line)
:StatelessFeatureFunction(1, line) {
ReadParameters();
}
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 9ae145504..398d6593c 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -14,7 +14,7 @@
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
-#include "moses/TranslationModel/ProbingPT/ProbingPT.h"
+#include "moses/TranslationModel/ProbingPT.h"
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h"
#include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h"
@@ -65,16 +65,16 @@
#include "SyntaxRHS.h"
#include "DeleteRules.h"
-#include "moses/FF/SkeletonStatelessFF.h"
-#include "moses/FF/SkeletonStatefulFF.h"
-#include "moses/LM/SkeletonLM.h"
-#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
-#include "moses/FF/SkeletonTranslationOptionListFeature.h"
+#include "moses/FF/ExampleStatelessFF.h"
+#include "moses/FF/ExampleStatefulFF.h"
+#include "moses/LM/ExampleLM.h"
+#include "moses/FF/ExampleTranslationOptionListFeature.h"
#include "moses/LM/BilingualLM.h"
-#include "moses/TranslationModel/SkeletonPT.h"
+#include "moses/TranslationModel/ExamplePT.h"
#include "moses/Syntax/InputWeightFF.h"
#include "moses/Syntax/RuleTableFF.h"
+#include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
#include "moses/FF/EditOps.h"
#include "moses/FF/CorrectionPattern.h"
@@ -297,13 +297,13 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(UnalignedWordCountFeature);
MOSES_FNAME(DeleteRules);
- MOSES_FNAME(SkeletonStatelessFF);
- MOSES_FNAME(SkeletonStatefulFF);
- MOSES_FNAME(SkeletonLM);
- MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
- MOSES_FNAME(SkeletonTranslationOptionListFeature);
- MOSES_FNAME(SkeletonPT);
+ MOSES_FNAME(ExampleStatelessFF);
+ MOSES_FNAME(ExampleStatefulFF);
+ MOSES_FNAME(ExampleLM);
+ MOSES_FNAME(ExampleTranslationOptionListFeature);
+ MOSES_FNAME(ExamplePT);
+ MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
MOSES_FNAME(EditOps);
MOSES_FNAME(CorrectionPattern);
diff --git a/moses/GenerationDictionary.cpp b/moses/GenerationDictionary.cpp
index 29a4fa2b3..35546e62c 100644
--- a/moses/GenerationDictionary.cpp
+++ b/moses/GenerationDictionary.cpp
@@ -120,7 +120,14 @@ const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) con
{
const OutputWordCollection *ret;
- Collection::const_iterator iter = m_collection.find(&word);
+ Word wordInput;
+ const std::vector<FactorType> &inputFactors = GetInput();
+ for (size_t i = 0; i < inputFactors.size(); ++i) {
+ FactorType factorType = inputFactors[i];
+ wordInput[factorType] = word[factorType];
+ }
+
+ Collection::const_iterator iter = m_collection.find(&wordInput);
if (iter == m_collection.end()) {
// can't find source phrase
ret = NULL;
diff --git a/moses/Jamfile b/moses/Jamfile
index 49aab9025..5200029fb 100644
--- a/moses/Jamfile
+++ b/moses/Jamfile
@@ -122,10 +122,10 @@ vwfiles synlm mmlib mserver headers
FF_Factory.o
LM//LM
TranslationModel/CompactPT//CompactPT
-TranslationModel/ProbingPT//ProbingPT
ThreadPool
..//search
../util/double-conversion//double-conversion
+../probingpt//probingpt
..//z
../OnDiskPt//OnDiskPt
$(TOP)//boost_filesystem
@@ -139,5 +139,5 @@ alias headers-to-install : [ glob-tree *.h ] ;
import testing ;
-unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
+unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] ..//boost_filesystem moses headers ..//z ../OnDiskPt//OnDiskPt ../probingpt//probingpt ..//boost_unit_test_framework ;
diff --git a/moses/LM/SkeletonLM.cpp b/moses/LM/ExampleLM.cpp
index f944de23a..034afef2e 100644
--- a/moses/LM/SkeletonLM.cpp
+++ b/moses/LM/ExampleLM.cpp
@@ -1,12 +1,12 @@
-#include "SkeletonLM.h"
+#include "ExampleLM.h"
#include "moses/FactorCollection.h"
using namespace std;
namespace Moses
{
-SkeletonLM::SkeletonLM(const std::string &line)
+ExampleLM::ExampleLM(const std::string &line)
:LanguageModelSingleFactor(line)
{
ReadParameters();
@@ -24,11 +24,11 @@ SkeletonLM::SkeletonLM(const std::string &line)
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
}
-SkeletonLM::~SkeletonLM()
+ExampleLM::~ExampleLM()
{
}
-LMResult SkeletonLM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
+LMResult ExampleLM::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
{
LMResult ret;
ret.score = contextFactor.size();
diff --git a/moses/LM/SkeletonLM.h b/moses/LM/ExampleLM.h
index 988c9def9..292462917 100644
--- a/moses/LM/SkeletonLM.h
+++ b/moses/LM/ExampleLM.h
@@ -7,13 +7,13 @@
namespace Moses
{
-class SkeletonLM : public LanguageModelSingleFactor
+class ExampleLM : public LanguageModelSingleFactor
{
protected:
public:
- SkeletonLM(const std::string &line);
- ~SkeletonLM();
+ ExampleLM(const std::string &line);
+ ~ExampleLM();
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;
};
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 4eafbd632..0c152d555 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -138,7 +138,7 @@ if $(with-dalm) {
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
-alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
+alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp InMemoryPerSentenceOnDemandLM.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ExampleLM.cpp
../../lm//kenlm ..//headers $(dependencies) ;
alias macros : : : : <define>$(lmmacros) ;
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index 4d3e96000..b65c22eb4 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -423,8 +423,13 @@ LoadDecodeGraphsOld(const vector<string> &mappingVector,
if (m_decodeGraphs.size() < decodeGraphInd + 1) {
DecodeGraph *decodeGraph;
if (is_syntax(m_options->search.algo)) {
- size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
- VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
+ size_t maxChartSpan;
+ if (decodeGraphInd < maxChartSpans.size()) {
+ maxChartSpan = maxChartSpans[decodeGraphInd];
+ VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
+ } else {
+ maxChartSpan = DEFAULT_MAX_CHART_SPAN;
+ }
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
} else {
decodeGraph = new DecodeGraph(m_decodeGraphs.size());
diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h
index 4d2152920..83933691d 100644
--- a/moses/TrainingTask.h
+++ b/moses/TrainingTask.h
@@ -39,6 +39,18 @@ public:
boost::shared_ptr<IOWrapper> const& ioWrapper) {
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
ret->m_self = ret;
+ ret->m_scope.reset(new ContextScope);
+ return ret;
+ }
+
+ // factory function
+ static boost::shared_ptr<TrainingTask>
+ create(boost::shared_ptr<InputType> const& source,
+ boost::shared_ptr<IOWrapper> const& ioWrapper,
+ boost::shared_ptr<ContextScope> const& scope) {
+ boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
+ ret->m_self = ret;
+ ret->m_scope = scope;
return ret;
}
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp
index ca219f249..6c80e30af 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.cpp
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.cpp
@@ -18,7 +18,7 @@
***********************************************************************/
#include <iostream>
-#include "ChartRuleLookupManagerSkeleton.h"
+#include "ChartRuleLookupManagerExample.h"
#include "DotChartInMemory.h"
#include "moses/Util.h"
@@ -29,29 +29,29 @@
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
-#include "moses/TranslationModel/SkeletonPT.h"
+#include "moses/TranslationModel/ExamplePT.h"
using namespace std;
namespace Moses
{
-ChartRuleLookupManagerSkeleton::ChartRuleLookupManagerSkeleton(
+ChartRuleLookupManagerExample::ChartRuleLookupManagerExample(
const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
- const SkeletonPT &skeletonPt)
+ const ExamplePT &skeletonPt)
: ChartRuleLookupManager(parser, cellColl)
, m_skeletonPT(skeletonPt)
{
- cerr << "starting ChartRuleLookupManagerSkeleton" << endl;
+ cerr << "starting ChartRuleLookupManagerExample" << endl;
}
-ChartRuleLookupManagerSkeleton::~ChartRuleLookupManagerSkeleton()
+ChartRuleLookupManagerExample::~ChartRuleLookupManagerExample()
{
// RemoveAllInColl(m_tpColl);
}
-void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
+void ChartRuleLookupManagerExample::GetChartRuleCollection(
const InputPath &inputPath,
size_t last,
ChartParserCallback &outColl)
@@ -74,12 +74,12 @@ void ChartRuleLookupManagerSkeleton::GetChartRuleCollection(
}
TargetPhrase *
-ChartRuleLookupManagerSkeleton::
+ChartRuleLookupManagerExample::
CreateTargetPhrase(const Word &sourceWord) const
{
- // create a target phrase from the 1st word of the source, prefix with 'ChartManagerSkeleton:'
+ // create a target phrase from the 1st word of the source, prefix with 'ChartManagerExample:'
string str = sourceWord.GetFactor(0)->GetString().as_string();
- str = "ChartManagerSkeleton:" + str;
+ str = "ChartManagerExample:" + str;
TargetPhrase *tp = new TargetPhrase(&m_skeletonPT);
Word &word = tp->AddWord();
diff --git a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h
index d01f3b9bd..3b3f59ace 100644
--- a/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h
+++ b/moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h
@@ -29,16 +29,16 @@ class TargetPhraseCollection;
class ChartParserCallback;
class DottedRuleColl;
class Range;
-class SkeletonPT;
+class ExamplePT;
-class ChartRuleLookupManagerSkeleton : public ChartRuleLookupManager
+class ChartRuleLookupManagerExample : public ChartRuleLookupManager
{
public:
- ChartRuleLookupManagerSkeleton(const ChartParser &parser,
- const ChartCellCollectionBase &cellColl,
- const SkeletonPT &skeletonPt);
+ ChartRuleLookupManagerExample(const ChartParser &parser,
+ const ChartCellCollectionBase &cellColl,
+ const ExamplePT &skeletonPt);
- ~ChartRuleLookupManagerSkeleton();
+ ~ChartRuleLookupManagerExample();
virtual void GetChartRuleCollection(
const InputPath &inputPath,
@@ -50,7 +50,7 @@ private:
StackVec m_stackVec;
std::vector<TargetPhraseCollection::shared_ptr > m_tpColl;
- const SkeletonPT &m_skeletonPT;
+ const ExamplePT &m_skeletonPT;
};
} // namespace Moses
diff --git a/moses/TranslationModel/SkeletonPT.cpp b/moses/TranslationModel/ExamplePT.cpp
index 6b42212f9..198ce2814 100644
--- a/moses/TranslationModel/SkeletonPT.cpp
+++ b/moses/TranslationModel/ExamplePT.cpp
@@ -1,29 +1,29 @@
// vim:tabstop=2
-#include "SkeletonPT.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
+#include "ExamplePT.h"
+#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h"
using namespace std;
namespace Moses
{
-SkeletonPT::SkeletonPT(const std::string &line)
+ExamplePT::ExamplePT(const std::string &line)
: PhraseDictionary(line, true)
{
ReadParameters();
}
-void SkeletonPT::Load(AllOptions::ptr const& opts)
+void ExamplePT::Load(AllOptions::ptr const& opts)
{
m_options = opts;
SetFeaturesToApply();
}
-void SkeletonPT::InitializeForInput(ttasksptr const& ttask)
+void ExamplePT::InitializeForInput(ttasksptr const& ttask)
{
ReduceCache();
}
-void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
+void ExamplePT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
CacheColl &cache = GetCache();
@@ -46,14 +46,14 @@ void SkeletonPT::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQu
}
}
-TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
+TargetPhrase *ExamplePT::CreateTargetPhrase(const Phrase &sourcePhrase) const
{
- // create a target phrase from the 1st word of the source, prefix with 'SkeletonPT:'
+ // create a target phrase from the 1st word of the source, prefix with 'ExamplePT:'
assert(sourcePhrase.GetSize());
assert(m_output.size() == 1);
string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
- str = "SkeletonPT:" + str;
+ str = "ExamplePT:" + str;
TargetPhrase *tp = new TargetPhrase(this);
Word &word = tp->AddWord();
@@ -69,17 +69,17 @@ TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
return tp;
}
-ChartRuleLookupManager* SkeletonPT::CreateRuleLookupManager(const ChartParser &parser,
+ChartRuleLookupManager* ExamplePT::CreateRuleLookupManager(const ChartParser &parser,
const ChartCellCollectionBase &cellCollection,
std::size_t /*maxChartSpan*/)
{
- return new ChartRuleLookupManagerSkeleton(parser, cellCollection, *this);
+ return new ChartRuleLookupManagerExample(parser, cellCollection, *this);
}
-TO_STRING_BODY(SkeletonPT);
+TO_STRING_BODY(ExamplePT);
// friend
-ostream& operator<<(ostream& out, const SkeletonPT& phraseDict)
+ostream& operator<<(ostream& out, const ExamplePT& phraseDict)
{
return out;
}
diff --git a/moses/TranslationModel/SkeletonPT.h b/moses/TranslationModel/ExamplePT.h
index 443f1cc8e..6ec7764c9 100644
--- a/moses/TranslationModel/SkeletonPT.h
+++ b/moses/TranslationModel/ExamplePT.h
@@ -9,12 +9,12 @@ class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
-class SkeletonPT : public PhraseDictionary
+class ExamplePT : public PhraseDictionary
{
- friend std::ostream& operator<<(std::ostream&, const SkeletonPT&);
+ friend std::ostream& operator<<(std::ostream&, const ExamplePT&);
public:
- SkeletonPT(const std::string &line);
+ ExamplePT(const std::string &line);
void Load(AllOptions::ptr const& opts);
diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp
index 36a28089b..fc62f0679 100644
--- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentence.cpp
@@ -1,6 +1,6 @@
// vim:tabstop=2
#include "PhraseDictionaryMemoryPerSentence.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
+#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerExample.h"
using namespace std;
diff --git a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp
index 072e482de..acf834cbd 100644
--- a/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.cpp
@@ -1,6 +1,5 @@
// vim:tabstop=2
#include "PhraseDictionaryMemoryPerSentenceOnDemand.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include <sstream>
using namespace std;
diff --git a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
index 3d1664822..2ffe880c7 100644
--- a/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
+++ b/moses/TranslationModel/PhraseDictionaryTransliteration.cpp
@@ -2,7 +2,6 @@
#include <cstdlib>
#include "PhraseDictionaryTransliteration.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include "moses/DecodeGraph.h"
#include "moses/DecodeStep.h"
#include "util/tempfile.hh"
diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.cpp b/moses/TranslationModel/ProbingPT.cpp
index 1ae0c67c3..dca7835f5 100644
--- a/moses/TranslationModel/ProbingPT/ProbingPT.cpp
+++ b/moses/TranslationModel/ProbingPT.cpp
@@ -4,8 +4,8 @@
#include "moses/FactorCollection.h"
#include "moses/TargetPhraseCollection.h"
#include "moses/InputFileStream.h"
-#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
-#include "querying.hh"
+#include "probingpt/querying.h"
+#include "probingpt/probing_hash_utils.h"
using namespace std;
@@ -14,6 +14,7 @@ namespace Moses
ProbingPT::ProbingPT(const std::string &line)
: PhraseDictionary(line,true)
,m_engine(NULL)
+ ,load_method(util::POPULATE_OR_READ)
{
ReadParameters();
@@ -31,7 +32,7 @@ void ProbingPT::Load(AllOptions::ptr const& opts)
m_options = opts;
SetFeaturesToApply();
- m_engine = new QueryEngine(m_filePath.c_str());
+ m_engine = new probingpt::QueryEngine(m_filePath.c_str(), load_method);
m_unkId = 456456546456;
@@ -116,6 +117,28 @@ void ProbingPT::CreateAlignmentMap(const std::string path)
}
}
+void ProbingPT::SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "load") {
+ if (value == "lazy") {
+ load_method = util::LAZY;
+ } else if (value == "populate_or_lazy") {
+ load_method = util::POPULATE_OR_LAZY;
+ } else if (value == "populate_or_read" || value == "populate") {
+ load_method = util::POPULATE_OR_READ;
+ } else if (value == "read") {
+ load_method = util::READ;
+ } else if (value == "parallel_read") {
+ load_method = util::PARALLEL_READ;
+ } else {
+ UTIL_THROW2("load method not supported" << value);
+ }
+ } else {
+ PhraseDictionary::SetParameter(key, value);
+ }
+
+}
+
void ProbingPT::InitializeForInput(ttasksptr const& ttask)
{
@@ -256,12 +279,12 @@ TargetPhraseCollection *ProbingPT::CreateTargetPhrases(
TargetPhrase *ProbingPT::CreateTargetPhrase(
const char *&offset) const
{
- TargetPhraseInfo *tpInfo = (TargetPhraseInfo*) offset;
+ probingpt::TargetPhraseInfo *tpInfo = (probingpt::TargetPhraseInfo*) offset;
size_t numRealWords = tpInfo->numWords / m_output.size();
TargetPhrase *tp = new TargetPhrase(this);
- offset += sizeof(TargetPhraseInfo);
+ offset += sizeof(probingpt::TargetPhraseInfo);
// scores
float *scores = (float*) offset;
diff --git a/moses/TranslationModel/ProbingPT/ProbingPT.h b/moses/TranslationModel/ProbingPT.h
index 953a2dc2f..1c996f5fa 100644
--- a/moses/TranslationModel/ProbingPT/ProbingPT.h
+++ b/moses/TranslationModel/ProbingPT.h
@@ -3,16 +3,20 @@
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/bimap.hpp>
#include <boost/unordered_map.hpp>
-#include "../PhraseDictionary.h"
+#include "PhraseDictionary.h"
+#include "util/mmap.hh"
+namespace probingpt
+{
+class QueryEngine;
+class target_text;
+}
namespace Moses
{
class ChartParser;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
-class QueryEngine;
-class target_text;
class ProbingPT : public PhraseDictionary
{
@@ -26,6 +30,8 @@ public:
void InitializeForInput(ttasksptr const& ttask);
+ void SetParameter(const std::string& key, const std::string& value);
+
// for phrase-based model
void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
@@ -39,12 +45,13 @@ public:
protected:
- QueryEngine *m_engine;
+ probingpt::QueryEngine *m_engine;
uint64_t m_unkId;
std::vector<uint64_t> m_sourceVocab; // factor id -> pt id
std::vector<const Factor*> m_targetVocab; // pt id -> factor*
std::vector<const AlignmentInfo*> m_aligns;
+ util::LoadMethod load_method;
boost::iostreams::mapped_file_source file;
const char *data;
diff --git a/moses/TranslationModel/ProbingPT/Jamfile b/moses/TranslationModel/ProbingPT/Jamfile
deleted file mode 100644
index 29c6ec41d..000000000
--- a/moses/TranslationModel/ProbingPT/Jamfile
+++ /dev/null
@@ -1,8 +0,0 @@
-local current = "" ;
-local includes = ;
-
-fakelib ProbingPT : [ glob *.cpp ] ../..//headers : $(includes) <dependency>$(PT-LOG) : : $(includes) ;
-
-path-constant PT-LOG : bin/pt.log ;
-update-if-changed $(PT-LOG) $(current) ;
-
diff --git a/moses/TranslationModel/ProbingPT/StoreTarget.cpp b/moses/TranslationModel/ProbingPT/StoreTarget.cpp
deleted file mode 100644
index f586a26b9..000000000
--- a/moses/TranslationModel/ProbingPT/StoreTarget.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * StoreTarget.cpp
- *
- * Created on: 19 Jan 2016
- * Author: hieu
- */
-#include <boost/foreach.hpp>
-#include "StoreTarget.h"
-#include "line_splitter.hh"
-#include "probing_hash_utils.hh"
-#include "moses/OutputFileStream.h"
-#include "moses/Util.h"
-
-using namespace std;
-
-namespace Moses
-{
-
-StoreTarget::StoreTarget(const std::string &basepath)
- :m_basePath(basepath)
- ,m_vocab(basepath + "/TargetVocab.dat")
-{
- std::string path = basepath + "/TargetColl.dat";
- m_fileTargetColl.open(path.c_str(),
- std::ios::out | std::ios::binary | std::ios::ate | std::ios::trunc);
- if (!m_fileTargetColl.is_open()) {
- throw "can't create file ";
- }
-
-}
-
-StoreTarget::~StoreTarget()
-{
- assert(m_coll.empty());
- m_fileTargetColl.close();
-
- // vocab
- m_vocab.Save();
-}
-
-uint64_t StoreTarget::Save()
-{
- uint64_t ret = m_fileTargetColl.tellp();
-
- // save to disk
- uint64_t numTP = m_coll.size();
- m_fileTargetColl.write((char*) &numTP, sizeof(uint64_t));
-
- for (size_t i = 0; i < m_coll.size(); ++i) {
- Save(*m_coll[i]);
- }
-
- // clear coll
- RemoveAllInColl(m_coll);
- m_coll.clear();
-
- // starting position of coll
- return ret;
-}
-
-void StoreTarget::Save(const target_text &rule)
-{
- // metadata for each tp
- TargetPhraseInfo tpInfo;
- tpInfo.alignTerm = GetAlignId(rule.word_align_term);
- tpInfo.alignNonTerm = GetAlignId(rule.word_align_non_term);
- tpInfo.numWords = rule.target_phrase.size();
- tpInfo.propLength = rule.property.size();
-
- //cerr << "TPInfo=" << sizeof(TPInfo);
- m_fileTargetColl.write((char*) &tpInfo, sizeof(TargetPhraseInfo));
-
- // scores
- for (size_t i = 0; i < rule.prob.size(); ++i) {
- float prob = rule.prob[i];
- m_fileTargetColl.write((char*) &prob, sizeof(prob));
- }
-
- // tp
- for (size_t i = 0; i < rule.target_phrase.size(); ++i) {
- uint32_t vocabId = rule.target_phrase[i];
- m_fileTargetColl.write((char*) &vocabId, sizeof(vocabId));
- }
-
- // prop TODO
-
-}
-
-void StoreTarget::SaveAlignment()
-{
- std::string path = m_basePath + "/Alignments.dat";
- OutputFileStream file(path);
-
- BOOST_FOREACH(Alignments::value_type &valPair, m_aligns) {
- file << valPair.second << "\t";
-
- const std::vector<size_t> &aligns = valPair.first;
- BOOST_FOREACH(size_t align, aligns) {
- file << align << " ";
- }
- file << endl;
- }
-
-}
-
-void StoreTarget::Append(const line_text &line, bool log_prob, bool scfg)
-{
- target_text *rule = new target_text;
- //cerr << "line.target_phrase=" << line.target_phrase << endl;
-
- // target_phrase
- vector<bool> nonTerms;
- util::TokenIter<util::SingleCharacter> it;
- it = util::TokenIter<util::SingleCharacter>(line.target_phrase,
- util::SingleCharacter(' '));
- while (it) {
- StringPiece word = *it;
- //cerr << "word=" << word << endl;
-
- bool nonTerm = false;
- if (scfg) {
- // not really sure how to handle factored SCFG and NT
- if (scfg && word[0] == '[' && word[word.size() - 1] == ']') {
- //cerr << "NON-TERM=" << tok << " " << nonTerms.size() << endl;
- nonTerm = true;
- }
- nonTerms.push_back(nonTerm);
- }
-
- util::TokenIter<util::SingleCharacter> itFactor;
- itFactor = util::TokenIter<util::SingleCharacter>(word,
- util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
-
- string factorStr = factor.as_string();
- uint32_t vocabId = m_vocab.GetVocabId(factorStr);
-
- rule->target_phrase.push_back(vocabId);
-
- itFactor++;
- }
-
- it++;
- }
-
- // probs
- it = util::TokenIter<util::SingleCharacter>(line.prob,
- util::SingleCharacter(' '));
- while (it) {
- string tok = it->as_string();
- float prob = Scan<float>(tok);
-
- if (log_prob) {
- prob = FloorScore(log(prob));
- if (prob == 0.0f) prob = 0.0000000001;
- }
-
- rule->prob.push_back(prob);
- it++;
- }
-
- /*
- cerr << "nonTerms=";
- for (size_t i = 0; i < nonTerms.size(); ++i) {
- cerr << nonTerms[i] << " ";
- }
- cerr << endl;
- */
-
- // alignment
- it = util::TokenIter<util::SingleCharacter>(line.word_align,
- util::SingleCharacter(' '));
- while (it) {
- string tokPair = Trim(it->as_string());
- if (tokPair.empty()) {
- break;
- }
-
- vector<size_t> alignPair = Tokenize<size_t>(tokPair, "-");
- assert(alignPair.size() == 2);
-
- bool nonTerm = false;
- size_t sourcePos = alignPair[0];
- size_t targetPos = alignPair[1];
- if (scfg) {
- nonTerm = nonTerms[targetPos];
- }
-
- //cerr << targetPos << "=" << nonTerm << endl;
-
- if (nonTerm) {
- rule->word_align_non_term.push_back(sourcePos);
- rule->word_align_non_term.push_back(targetPos);
- //cerr << (int) rule->word_all1.back() << " ";
- } else {
- rule->word_align_term.push_back(sourcePos);
- rule->word_align_term.push_back(targetPos);
- }
-
- it++;
- }
-
- // extra scores
- string prop = line.property.as_string();
- AppendLexRO(prop, rule->prob, log_prob);
-
- //cerr << "line.property=" << line.property << endl;
- //cerr << "prop=" << prop << endl;
-
- // properties
- /*
- for (size_t i = 0; i < prop.size(); ++i) {
- rule->property.push_back(prop[i]);
- }
- */
- m_coll.push_back(rule);
-}
-
-uint32_t StoreTarget::GetAlignId(const std::vector<size_t> &align)
-{
- boost::unordered_map<std::vector<size_t>, uint32_t>::iterator iter =
- m_aligns.find(align);
- if (iter == m_aligns.end()) {
- uint32_t ind = m_aligns.size();
- m_aligns[align] = ind;
- return ind;
- } else {
- return iter->second;
- }
-}
-
-void StoreTarget::AppendLexRO(std::string &prop, std::vector<float> &retvector,
- bool log_prob) const
-{
- size_t startPos = prop.find("{{LexRO ");
-
- if (startPos != string::npos) {
- size_t endPos = prop.find("}}", startPos + 8);
- string lexProb = prop.substr(startPos + 8, endPos - startPos - 8);
- //cerr << "lexProb=" << lexProb << endl;
-
- // append lex probs to pt probs
- vector<float> scores = Tokenize<float>(lexProb);
-
- if (log_prob) {
- for (size_t i = 0; i < scores.size(); ++i) {
- scores[i] = FloorScore(log(scores[i]));
- if (scores[i] == 0.0f) scores[i] = 0.0000000001;
- }
- }
-
- for (size_t i = 0; i < scores.size(); ++i) {
- retvector.push_back(scores[i]);
- }
-
- // exclude LexRO property from property column
- prop = prop.substr(0, startPos)
- + prop.substr(endPos + 2, prop.size() - endPos - 2);
- //cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl;
- }
-}
-
-} /* namespace Moses2 */
diff --git a/moses/TranslationModel/ProbingPT/StoreTarget.h b/moses/TranslationModel/ProbingPT/StoreTarget.h
deleted file mode 100644
index 331c197b3..000000000
--- a/moses/TranslationModel/ProbingPT/StoreTarget.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * StoreTarget.h
- *
- * Created on: 19 Jan 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <fstream>
-#include <vector>
-#include <inttypes.h>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
-#include "StoreVocab.h"
-
-namespace Moses
-{
-
-class line_text;
-class target_text;
-
-class StoreTarget
-{
-public:
- StoreTarget(const std::string &basepath);
- virtual ~StoreTarget();
-
- uint64_t Save();
- void SaveAlignment();
-
- void Append(const line_text &line, bool log_prob, bool scfg);
-protected:
- std::string m_basePath;
- std::fstream m_fileTargetColl;
- StoreVocab<uint32_t> m_vocab;
-
- typedef boost::unordered_map<std::vector<size_t>, uint32_t> Alignments;
- Alignments m_aligns;
-
- std::vector<target_text*> m_coll;
-
- uint32_t GetAlignId(const std::vector<size_t> &align);
- void Save(const target_text &rule);
-
- void AppendLexRO(std::string &prop, std::vector<float> &retvector,
- bool log_prob) const;
-
-};
-
-} /* namespace Moses2 */
-
diff --git a/moses/TranslationModel/ProbingPT/StoreVocab.cpp b/moses/TranslationModel/ProbingPT/StoreVocab.cpp
deleted file mode 100644
index 6515bac63..000000000
--- a/moses/TranslationModel/ProbingPT/StoreVocab.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * StoreVocab.cpp
- *
- * Created on: 15 Jun 2016
- * Author: hieu
- */
-#include <fstream>
-#include "StoreVocab.h"
-
-namespace Moses
-{
-
-} /* namespace Moses2 */
diff --git a/moses/TranslationModel/ProbingPT/StoreVocab.h b/moses/TranslationModel/ProbingPT/StoreVocab.h
deleted file mode 100644
index 806dcebf4..000000000
--- a/moses/TranslationModel/ProbingPT/StoreVocab.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * StoreVocab.h
- *
- * Created on: 15 Jun 2016
- * Author: hieu
- */
-#pragma once
-#include <string>
-#include <boost/unordered_map.hpp>
-#include "moses/OutputFileStream.h"
-#include "moses/Util.h"
-
-namespace Moses
-{
-
-template<typename VOCABID>
-class StoreVocab
-{
-protected:
- std::string m_path;
-
- typedef boost::unordered_map<std::string, VOCABID> Coll;
- Coll m_vocab;
-
-public:
- StoreVocab(const std::string &path)
- :m_path(path)
- {}
-
- virtual ~StoreVocab() {}
-
- VOCABID GetVocabId(const std::string &word) {
- typename Coll::iterator iter = m_vocab.find(word);
- if (iter == m_vocab.end()) {
- VOCABID ind = m_vocab.size() + 1;
- m_vocab[word] = ind;
- return ind;
- } else {
- return iter->second;
- }
- }
-
- void Insert(VOCABID id, const std::string &word) {
- m_vocab[word] = id;
- }
-
- void Save() {
- OutputFileStream strme(m_path);
-
- typename Coll::const_iterator iter;
- for (iter = m_vocab.begin(); iter != m_vocab.end(); ++iter) {
- strme << iter->first << "\t" << iter->second << std::endl;
- }
-
- strme.Close();
- }
-};
-
-} /* namespace Moses2 */
-
diff --git a/moses/TranslationModel/ProbingPT/hash.cpp b/moses/TranslationModel/ProbingPT/hash.cpp
deleted file mode 100644
index 47242e25d..000000000
--- a/moses/TranslationModel/ProbingPT/hash.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#include <iostream>
-#include "hash.hh"
-
-using namespace std;
-
-namespace Moses
-{
-
-uint64_t getHash(StringPiece text)
-{
- std::size_t len = text.size();
- uint64_t key = util::MurmurHashNative(text.data(), len);
- return key;
-}
-
-std::vector<uint64_t> getVocabIDs(const StringPiece &textin)
-{
- //Tokenize
- std::vector<uint64_t> output;
-
- util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
-
- while (itWord) {
- StringPiece word = *itWord;
- uint64_t id = 0;
-
- util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
- //cerr << "factor=" << factor << endl;
-
- id += getHash(factor);
- itFactor++;
- }
-
- output.push_back(id);
- itWord++;
- }
-
- return output;
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/hash.hh b/moses/TranslationModel/ProbingPT/hash.hh
deleted file mode 100644
index f218ad9da..000000000
--- a/moses/TranslationModel/ProbingPT/hash.hh
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include "util/string_piece.hh"
-#include "util/murmur_hash.hh"
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-#include <vector>
-
-namespace Moses
-{
-
-//Gets the MurmurmurHash for give string
-uint64_t getHash(StringPiece text);
-
-std::vector<uint64_t> getVocabIDs(const StringPiece &textin);
-
-}
diff --git a/moses/TranslationModel/ProbingPT/line_splitter.cpp b/moses/TranslationModel/ProbingPT/line_splitter.cpp
deleted file mode 100644
index cb9e47fec..000000000
--- a/moses/TranslationModel/ProbingPT/line_splitter.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#include "line_splitter.hh"
-
-namespace Moses
-{
-
-line_text splitLine(const StringPiece &textin, bool scfg)
-{
- const char delim[] = "|||";
- line_text output;
-
- //Tokenize
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
- //Get source phrase
- output.source_phrase = Trim(*it);
- //std::cerr << "output.source_phrase=" << output.source_phrase << "AAAA" << std::endl;
-
- //Get target_phrase
- it++;
- output.target_phrase = Trim(*it);
- //std::cerr << "output.target_phrase=" << output.target_phrase << "AAAA" << std::endl;
-
- if (scfg) {
- /*
- std::cerr << "output.source_phrase=" << output.source_phrase << std::endl;
- std::cerr << "output.target_phrase=" << output.target_phrase << std::endl;
- reformatSCFG(output);
- std::cerr << "output.source_phrase=" << output.source_phrase << std::endl;
- std::cerr << "output.target_phrase=" << output.target_phrase << std::endl;
- */
- }
-
- //Get probabilities
- it++;
- output.prob = Trim(*it);
- //std::cerr << "output.prob=" << output.prob << "AAAA" << std::endl;
-
- //Get WordAllignment
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.word_align = Trim(*it);
- //std::cerr << "output.word_align=" << output.word_align << "AAAA" << std::endl;
-
- //Get count
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.counts = Trim(*it);
- //std::cerr << "output.counts=" << output.counts << "AAAA" << std::endl;
-
- //Get sparse_score
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.sparse_score = Trim(*it);
- //std::cerr << "output.sparse_score=" << output.sparse_score << "AAAA" << std::endl;
-
- //Get property
- it++;
- if (it == util::TokenIter<util::MultiCharacter>::end()) return output;
- output.property = Trim(*it);
- //std::cerr << "output.property=" << output.property << "AAAA" << std::endl;
-
- return output;
-}
-
-std::vector<unsigned char> splitWordAll1(const StringPiece &textin)
-{
- const char delim[] = " ";
- const char delim2[] = "-";
- std::vector<unsigned char> output;
-
- //Case with no word alignments.
- if (textin.size() == 0) {
- return output;
- }
-
- //Split on space
- util::TokenIter<util::MultiCharacter> it(textin, util::MultiCharacter(delim));
-
- //For each int
- while (it) {
- //Split on dash (-)
- util::TokenIter<util::MultiCharacter> itInner(*it,
- util::MultiCharacter(delim2));
-
- //Insert the two entries in the vector. User will read entry 0 and 1 to get the first,
- //2 and 3 for second etc. Use unsigned char instead of int to save space, as
- //word allignments are all very small numbers that fit in a single byte
- output.push_back((unsigned char) (atoi(itInner->data())));
- itInner++;
- output.push_back((unsigned char) (atoi(itInner->data())));
- it++;
- }
-
- return output;
-
-}
-
-void reformatSCFG(line_text &output)
-{
-
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/line_splitter.hh b/moses/TranslationModel/ProbingPT/line_splitter.hh
deleted file mode 100644
index 01b86fc9b..000000000
--- a/moses/TranslationModel/ProbingPT/line_splitter.hh
+++ /dev/null
@@ -1,57 +0,0 @@
-#pragma once
-
-#include "util/string_piece.hh"
-#include "util/tokenize_piece.hh"
-#include "util/file_piece.hh"
-#include <vector>
-#include <cstdlib> //atof
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-#include <vector>
-
-namespace Moses
-{
-
-//Struct for holding processed line
-struct line_text {
- StringPiece source_phrase;
- StringPiece target_phrase;
- StringPiece prob;
- StringPiece word_align;
- StringPiece counts;
- StringPiece sparse_score;
- StringPiece property;
- std::string property_to_be_binarized;
-};
-
-//Struct for holding processed line
-struct target_text {
- std::vector<unsigned int> target_phrase;
- std::vector<float> prob;
- std::vector<size_t> word_align_term;
- std::vector<size_t> word_align_non_term;
- std::vector<char> counts;
- std::vector<char> sparse_score;
- std::vector<char> property;
-
- /*
- void Reset()
- {
- target_phrase.clear();
- prob.clear();
- word_all1.clear();
- counts.clear();
- sparse_score.clear();
- property.clear();
- }
- */
-};
-
-//Ask if it's better to have it receive a pointer to a line_text struct
-line_text splitLine(const StringPiece &textin, bool scfg);
-void reformatSCFG(line_text &output);
-
-std::vector<unsigned char> splitWordAll1(const StringPiece &textin);
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp b/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
deleted file mode 100644
index f23f57d66..000000000
--- a/moses/TranslationModel/ProbingPT/probing_hash_utils.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#include "probing_hash_utils.hh"
-
-namespace Moses
-{
-
-//Read table from disk, return memory map location
-char * readTable(const char * filename, size_t size)
-{
- //Initial position of the file is the end of the file, thus we know the size
- int fd;
- char * map;
-
- fd = open(filename, O_RDONLY);
- if (fd == -1) {
- perror("Error opening file for reading");
- exit(EXIT_FAILURE);
- }
-
- map = (char *) mmap(0, size, PROT_READ, MAP_SHARED, fd, 0);
-
- if (map == MAP_FAILED) {
- close(fd);
- perror("Error mmapping the file");
- exit(EXIT_FAILURE);
- }
-
- return map;
-}
-
-void serialize_table(char *mem, size_t size, const std::string &filename)
-{
- std::ofstream os(filename.c_str(), std::ios::binary);
- os.write((const char*) &mem[0], size);
- os.close();
-
-}
-
-uint64_t getKey(const uint64_t source_phrase[], size_t size)
-{
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- uint64_t key = 0;
- for (size_t i = 0; i < size; i++) {
- key += (source_phrase[i] << i);
- }
- return key;
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/probing_hash_utils.hh b/moses/TranslationModel/ProbingPT/probing_hash_utils.hh
deleted file mode 100644
index 998686b2e..000000000
--- a/moses/TranslationModel/ProbingPT/probing_hash_utils.hh
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-
-#include "util/probing_hash_table.hh"
-
-#include <sys/mman.h>
-#include <boost/functional/hash.hpp>
-#include <fcntl.h>
-#include <fstream>
-
-namespace Moses
-{
-
-#define API_VERSION 15
-
-//Hash table entry
-struct Entry {
- typedef uint64_t Key;
- Key key;
-
- Key GetKey() const {
- return key;
- }
-
- void SetKey(Key to) {
- key = to;
- }
-
- uint64_t value;
-};
-
-#define NONE std::numeric_limits<uint64_t>::max()
-
-//Define table
-typedef util::ProbingHashTable<Entry, boost::hash<uint64_t> > Table;
-
-void serialize_table(char *mem, size_t size, const std::string &filename);
-
-char * readTable(const char * filename, size_t size);
-
-uint64_t getKey(const uint64_t source_phrase[], size_t size);
-
-struct TargetPhraseInfo {
- uint32_t alignTerm;
- uint32_t alignNonTerm;
- uint16_t numWords;
- uint16_t propLength;
- uint16_t filler;
-};
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/querying.cpp b/moses/TranslationModel/ProbingPT/querying.cpp
deleted file mode 100644
index 10c35e361..000000000
--- a/moses/TranslationModel/ProbingPT/querying.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-#include "querying.hh"
-#include "util/exception.hh"
-
-using namespace std;
-
-namespace Moses
-{
-
-QueryEngine::QueryEngine(const char * filepath)
-{
-
- //Create filepaths
- std::string basepath(filepath);
- std::string path_to_config = basepath + "/config";
- std::string path_to_hashtable = basepath + "/probing_hash.dat";
- std::string path_to_source_vocabid = basepath + "/source_vocabids";
- std::string alignPath = basepath + "/Alignments.dat";
-
- if (!FileExists(path_to_config)) {
- UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config);
- }
-
- ///Source phrase vocabids
- read_map(source_vocabids, path_to_source_vocabid.c_str());
-
- // alignments
- read_alignments(alignPath);
-
- //Read config file
- boost::unordered_map<std::string, std::string> keyValue;
-
- std::ifstream config(path_to_config.c_str());
- std::string line;
- while (getline(config, line)) {
- std::vector<std::string> toks = Tokenize(line, "\t");
- UTIL_THROW_IF2(toks.size() != 2, "Wrong config format:" << line);
- keyValue[ toks[0] ] = toks[1];
- }
-
- bool found;
- //Check API version:
- int version;
- found = Get(keyValue, "API_VERSION", version);
- if (!found) {
- std::cerr << "Old or corrupted version of ProbingPT. Please rebinarize your phrase tables." << std::endl;
- } else if (version != API_VERSION) {
- std::cerr << "The ProbingPT API has changed. " << version << "!="
- << API_VERSION << " Please rebinarize your phrase tables." << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //Get tablesize.
- int tablesize;
- found = Get(keyValue, "uniq_entries", tablesize);
- if (!found) {
- std::cerr << "uniq_entries not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //Number of scores
- found = Get(keyValue, "num_scores", num_scores);
- if (!found) {
- std::cerr << "num_scores not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- //How may scores from lex reordering models
- found = Get(keyValue, "num_lex_scores", num_lex_scores);
- if (!found) {
- std::cerr << "num_lex_scores not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- // have the scores been log() and FloorScore()?
- found = Get(keyValue, "log_prob", logProb);
- if (!found) {
- std::cerr << "logProb not found" << std::endl;
- exit(EXIT_FAILURE);
- }
-
- config.close();
-
- //Read hashtable
- table_filesize = Table::Size(tablesize, 1.2);
- mem = readTable(path_to_hashtable.c_str(), table_filesize);
- Table table_init(mem, table_filesize);
- table = table_init;
-
- std::cerr << "Initialized successfully! " << std::endl;
-}
-
-QueryEngine::~QueryEngine()
-{
- //Clear mmap content from memory.
- munmap(mem, table_filesize);
-
-}
-
-uint64_t QueryEngine::getKey(uint64_t source_phrase[], size_t size) const
-{
- //TOO SLOW
- //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size());
- return Moses::getKey(source_phrase, size);
-}
-
-std::pair<bool, uint64_t> QueryEngine::query(uint64_t key)
-{
- std::pair<bool, uint64_t> ret;
-
- const Entry * entry;
- ret.first = table.Find(key, entry);
- if (ret.first) {
- ret.second = entry->value;
- }
- return ret;
-}
-
-void QueryEngine::read_alignments(const std::string &alignPath)
-{
- std::ifstream strm(alignPath.c_str());
-
- string line;
- while (getline(strm, line)) {
- vector<string> toks = Tokenize(line, "\t ");
- UTIL_THROW_IF2(toks.size() == 0, "Corrupt alignment file");
-
- uint32_t alignInd = Scan<uint32_t>(toks[0]);
- if (alignInd >= alignColl.size()) {
- alignColl.resize(alignInd + 1);
- }
-
- Alignments &aligns = alignColl[alignInd];
- for (size_t i = 1; i < toks.size(); ++i) {
- size_t pos = Scan<size_t>(toks[i]);
- aligns.push_back(pos);
- }
- }
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/querying.hh b/moses/TranslationModel/ProbingPT/querying.hh
deleted file mode 100644
index 915bc4806..000000000
--- a/moses/TranslationModel/ProbingPT/querying.hh
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#include <boost/unordered_map.hpp>
-#include <sys/stat.h> //For finding size of file
-#include "vocabid.hh"
-#include <algorithm> //toLower
-#include <deque>
-#include "probing_hash_utils.hh"
-#include "hash.hh" //Includes line splitter
-#include "line_splitter.hh"
-#include "moses//Util.h"
-
-namespace Moses
-{
-
-class QueryEngine
-{
- std::map<uint64_t, std::string> source_vocabids;
-
- typedef std::vector<unsigned char> Alignments;
- std::vector<Alignments> alignColl;
-
- Table table;
- char *mem; //Memory for the table, necessary so that we can correctly destroy the object
-
- size_t table_filesize;
- bool is_reordering;
-
- void read_alignments(const std::string &alignPath);
-
-public:
- int num_scores;
- int num_lex_scores;
- bool logProb;
-
- QueryEngine(const char *);
- ~QueryEngine();
-
- std::pair<bool, uint64_t> query(uint64_t key);
-
- const std::map<uint64_t, std::string> &getSourceVocab() const {
- return source_vocabids;
- }
-
- const std::vector<Alignments> &getAlignments() const {
- return alignColl;
- }
-
- uint64_t getKey(uint64_t source_phrase[], size_t size) const;
-
- template<typename T>
- inline bool Get(const boost::unordered_map<std::string, std::string> &keyValue, const std::string &sought, T &found) const {
- boost::unordered_map<std::string, std::string>::const_iterator iter = keyValue.find(sought);
- if (iter == keyValue.end()) {
- return false;
- }
-
- const std::string &foundStr = iter->second;
- found = Scan<T>(foundStr);
- return true;
- }
-
-};
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/storing.cpp b/moses/TranslationModel/ProbingPT/storing.cpp
deleted file mode 100644
index baf6ae91e..000000000
--- a/moses/TranslationModel/ProbingPT/storing.cpp
+++ /dev/null
@@ -1,298 +0,0 @@
-#include <sys/stat.h>
-#include <boost/foreach.hpp>
-#include "line_splitter.hh"
-#include "storing.hh"
-#include "StoreTarget.h"
-#include "StoreVocab.h"
-#include "moses/Util.h"
-#include "moses/InputFileStream.h"
-
-using namespace std;
-
-namespace Moses
-{
-
-///////////////////////////////////////////////////////////////////////
-void Node::Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos)
-{
- if (pos < sourcePhrase.size()) {
- uint64_t vocabId = sourcePhrase[pos];
-
- Node *child;
- Children::iterator iter = m_children.find(vocabId);
- if (iter == m_children.end()) {
- // New node. Write other children then discard them
- BOOST_FOREACH(Children::value_type &valPair, m_children) {
- Node &otherChild = valPair.second;
- otherChild.Write(table);
- }
- m_children.clear();
-
- // create new node
- child = &m_children[vocabId];
- assert(!child->done);
- child->key = key + (vocabId << pos);
- } else {
- child = &iter->second;
- }
-
- child->Add(table, sourcePhrase, pos + 1);
- } else {
- // this node was written previously 'cos it has rules
- done = true;
- }
-}
-
-void Node::Write(Table &table)
-{
- //cerr << "START write " << done << " " << key << endl;
- BOOST_FOREACH(Children::value_type &valPair, m_children) {
- Node &child = valPair.second;
- child.Write(table);
- }
-
- if (!done) {
- // save
- Entry sourceEntry;
- sourceEntry.value = NONE;
- sourceEntry.key = key;
-
- //Put into table
- table.Insert(sourceEntry);
- }
-}
-
-///////////////////////////////////////////////////////////////////////
-void createProbingPT(const std::string &phrasetable_path,
- const std::string &basepath, int num_scores, int num_lex_scores,
- bool log_prob, int max_cache_size, bool scfg)
-{
- std::cerr << "Starting..." << std::endl;
-
- //Get basepath and create directory if missing
- mkdir(basepath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-
- StoreTarget storeTarget(basepath);
-
- //Get uniq lines:
- unsigned long uniq_entries = countUniqueSource(phrasetable_path);
-
- //Source phrase vocabids
- StoreVocab<uint64_t> sourceVocab(basepath + "/source_vocabids");
-
- //Read the file
- util::FilePiece filein(phrasetable_path.c_str());
-
- //Init the probing hash table
- size_t size = Table::Size(uniq_entries, 1.2);
- char * mem = new char[size];
- memset(mem, 0, size);
- Table sourceEntries(mem, size);
-
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> cache;
- float totalSourceCount = 0;
-
- //Keep track of the size of each group of target phrases
- size_t line_num = 0;
-
- //Read everything and processs
- std::string prevSource;
-
- Node sourcePhrases;
- sourcePhrases.done = true;
- sourcePhrases.key = 0;
-
- while (true) {
- try {
- //Process line read
- line_text line;
- line = splitLine(filein.ReadLine(), scfg);
- //cerr << "line=" << line.source_phrase << endl;
-
- ++line_num;
- if (line_num % 1000000 == 0) {
- std::cerr << line_num << " " << std::flush;
- }
-
- //Add source phrases to vocabularyIDs
- add_to_map(sourceVocab, line.source_phrase);
-
- if (prevSource.empty()) {
- // 1st line
- prevSource = line.source_phrase.as_string();
- storeTarget.Append(line, log_prob, scfg);
- } else if (prevSource == line.source_phrase) {
- //If we still have the same line, just append to it:
- storeTarget.Append(line, log_prob, scfg);
- } else {
- assert(prevSource != line.source_phrase);
-
- //Create a new entry even
-
- // save
- uint64_t targetInd = storeTarget.Save();
-
- // next line
- storeTarget.Append(line, log_prob, scfg);
-
- //Create an entry for the previous source phrase:
- Entry sourceEntry;
- sourceEntry.value = targetInd;
- //The key is the sum of hashes of individual words bitshifted by their position in the phrase.
- //Probably not entirerly correct, but fast and seems to work fine in practise.
- std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
- if (scfg) {
- // storing prefixes?
- sourcePhrases.Add(sourceEntries, vocabid_source);
- }
- sourceEntry.key = getKey(vocabid_source);
-
- /*
- cerr << "prevSource=" << prevSource << flush
- << " vocabids=" << Debug(vocabid_source) << flush
- << " key=" << sourceEntry.key << endl;
- */
- //Put into table
- sourceEntries.Insert(sourceEntry);
-
- // update cache - CURRENT source phrase, not prev
- if (max_cache_size) {
- std::string countStr = line.counts.as_string();
- countStr = Trim(countStr);
- if (!countStr.empty()) {
- std::vector<float> toks = Tokenize<float>(countStr);
- //cerr << "CACHE:" << line.source_phrase << " " << countStr << " " << toks[1] << endl;
-
- if (toks.size() >= 2) {
- totalSourceCount += toks[1];
-
- // compute key for CURRENT source
- std::vector<uint64_t> currVocabidSource = getVocabIDs(line.source_phrase.as_string());
- uint64_t currKey = getKey(currVocabidSource);
-
- CacheItem *item = new CacheItem(
- Trim(line.source_phrase.as_string()),
- currKey,
- toks[1]);
- cache.push(item);
-
- if (max_cache_size > 0 && cache.size() > max_cache_size) {
- cache.pop();
- }
- }
- }
- }
-
- //Set prevLine
- prevSource = line.source_phrase.as_string();
- }
-
- } catch (util::EndOfFileException e) {
- std::cerr
- << "Reading phrase table finished, writing remaining files to disk."
- << std::endl;
-
- //After the final entry is constructed we need to add it to the phrase_table
- //Create an entry for the previous source phrase:
- uint64_t targetInd = storeTarget.Save();
-
- Entry sourceEntry;
- sourceEntry.value = targetInd;
-
- //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
- std::vector<uint64_t> vocabid_source = getVocabIDs(prevSource);
- sourceEntry.key = getKey(vocabid_source);
-
- //Put into table
- sourceEntries.Insert(sourceEntry);
-
- break;
- }
- }
-
- sourcePhrases.Write(sourceEntries);
-
- storeTarget.SaveAlignment();
-
- serialize_table(mem, size, (basepath + "/probing_hash.dat"));
-
- sourceVocab.Save();
-
- serialize_cache(cache, (basepath + "/cache"), totalSourceCount);
-
- delete[] mem;
-
- //Write configfile
- std::ofstream configfile;
- configfile.open((basepath + "/config").c_str());
- configfile << "API_VERSION\t" << API_VERSION << '\n';
- configfile << "uniq_entries\t" << uniq_entries << '\n';
- configfile << "num_scores\t" << num_scores << '\n';
- configfile << "num_lex_scores\t" << num_lex_scores << '\n';
- configfile << "log_prob\t" << log_prob << '\n';
- configfile.close();
-}
-
-size_t countUniqueSource(const std::string &path)
-{
- size_t ret = 0;
- InputFileStream strme(path);
-
- std::string line, prevSource;
- while (std::getline(strme, line)) {
- std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
- assert(toks.size() != 0);
-
- if (prevSource != toks[0]) {
- prevSource = toks[0];
- ++ret;
- }
- }
-
- return ret;
-}
-
-void serialize_cache(
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
- const std::string &path, float totalSourceCount)
-{
- std::vector<const CacheItem*> vec(cache.size());
-
- size_t ind = cache.size() - 1;
- while (!cache.empty()) {
- const CacheItem *item = cache.top();
- vec[ind] = item;
- cache.pop();
- --ind;
- }
-
- std::ofstream os(path.c_str());
-
- os << totalSourceCount << std::endl;
- for (size_t i = 0; i < vec.size(); ++i) {
- const CacheItem *item = vec[i];
- os << item->count << "\t" << item->sourceKey << "\t" << item->source << std::endl;
- delete item;
- }
-
- os.close();
-}
-
-uint64_t getKey(const std::vector<uint64_t> &vocabid_source)
-{
- return getKey(vocabid_source.data(), vocabid_source.size());
-}
-
-std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos)
-{
- assert(endPos < vocabid_source.size());
-
- std::vector<uint64_t> ret(endPos + 1);
- for (size_t i = 0; i <= endPos; ++i) {
- ret[i] = vocabid_source[i];
- }
- return ret;
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/storing.hh b/moses/TranslationModel/ProbingPT/storing.hh
deleted file mode 100644
index 994067515..000000000
--- a/moses/TranslationModel/ProbingPT/storing.hh
+++ /dev/null
@@ -1,92 +0,0 @@
-#pragma once
-
-#include <boost/unordered_set.hpp>
-#include <boost/unordered_map.hpp>
-#include <cstdio>
-#include <sstream>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <queue>
-#include <sys/stat.h> //mkdir
-
-#include "hash.hh" //Includes line_splitter
-#include "probing_hash_utils.hh"
-
-#include "util/file_piece.hh"
-#include "util/file.hh"
-#include "vocabid.hh"
-
-namespace Moses
-{
-typedef std::vector<uint64_t> SourcePhrase;
-
-
-class Node
-{
- typedef boost::unordered_map<uint64_t, Node> Children;
- Children m_children;
-
-public:
- uint64_t key;
- bool done;
-
- Node()
- :done(false)
- {}
-
- void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
- void Write(Table &table);
-};
-
-
-void createProbingPT(const std::string &phrasetable_path,
- const std::string &basepath, int num_scores, int num_lex_scores,
- bool log_prob, int max_cache_size, bool scfg);
-uint64_t getKey(const std::vector<uint64_t> &source_phrase);
-
-std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
-
-template<typename T>
-std::string Debug(const std::vector<T> &vec)
-{
- std::stringstream strm;
- for (size_t i = 0; i < vec.size(); ++i) {
- strm << vec[i] << " ";
- }
- return strm.str();
-}
-
-size_t countUniqueSource(const std::string &path);
-
-class CacheItem
-{
-public:
- std::string source;
- uint64_t sourceKey;
- float count;
- CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
- :source(vSource)
- ,sourceKey(vSourceKey)
- ,count(vCount) {
- }
-
- bool operator<(const CacheItem &other) const {
- return count > other.count;
- }
-};
-
-class CacheItemOrderer
-{
-public:
- bool operator()(const CacheItem* a, const CacheItem* b) const {
- return (*a) < (*b);
- }
-};
-
-void serialize_cache(
- std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
- const std::string &path, float totalSourceCount);
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/vocabid.cpp b/moses/TranslationModel/ProbingPT/vocabid.cpp
deleted file mode 100644
index d6f442323..000000000
--- a/moses/TranslationModel/ProbingPT/vocabid.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <boost/foreach.hpp>
-#include "vocabid.hh"
-#include "StoreVocab.h"
-#include "moses/Util.h"
-
-namespace Moses
-{
-
-void add_to_map(StoreVocab<uint64_t> &sourceVocab,
- const StringPiece &textin)
-{
- //Tokenize
- util::TokenIter<util::SingleCharacter> itWord(textin, util::SingleCharacter(' '));
-
- while (itWord) {
- StringPiece word = *itWord;
-
- util::TokenIter<util::SingleCharacter> itFactor(word, util::SingleCharacter('|'));
- while (itFactor) {
- StringPiece factor = *itFactor;
-
- sourceVocab.Insert(getHash(factor), factor.as_string());
- itFactor++;
- }
- itWord++;
- }
-}
-
-void serialize_map(const std::map<uint64_t, std::string> &karta,
- const std::string &filename)
-{
- std::ofstream os(filename.c_str());
-
- std::map<uint64_t, std::string>::const_iterator iter;
- for (iter = karta.begin(); iter != karta.end(); ++iter) {
- os << iter->first << '\t' << iter->second << std::endl;
- }
-
- os.close();
-}
-
-void read_map(std::map<uint64_t, std::string> &karta, const char* filename)
-{
- std::ifstream is(filename);
-
- std::string line;
- while (getline(is, line)) {
- std::vector<std::string> toks = Tokenize(line, "\t");
- assert(toks.size() == 2);
- uint64_t ind = Scan<uint64_t>(toks[1]);
- karta[ind] = toks[0];
- }
-
- //Close the stream after we are done.
- is.close();
-}
-
-}
-
diff --git a/moses/TranslationModel/ProbingPT/vocabid.hh b/moses/TranslationModel/ProbingPT/vocabid.hh
deleted file mode 100644
index 7e1390874..000000000
--- a/moses/TranslationModel/ProbingPT/vocabid.hh
+++ /dev/null
@@ -1,29 +0,0 @@
-//Serialization
-#include <boost/serialization/serialization.hpp>
-#include <boost/serialization/map.hpp>
-#include <boost/archive/text_iarchive.hpp>
-#include <boost/archive/text_oarchive.hpp>
-#include <fstream>
-#include <iostream>
-#include <vector>
-
-#include <map> //Container
-#include "hash.hh" //Hash of elements
-
-#include "util/string_piece.hh" //Tokenization and work with StringPiece
-#include "util/tokenize_piece.hh"
-
-namespace Moses
-{
-template<typename VOCABID>
-class StoreVocab;
-
-void add_to_map(StoreVocab<uint64_t> &sourceVocab,
- const StringPiece &textin);
-
-void serialize_map(const std::map<uint64_t, std::string> &karta,
- const std::string &filename);
-
-void read_map(std::map<uint64_t, std::string> &karta, const char* filename);
-
-}
diff --git a/moses/parameters/AllOptions.cpp b/moses/parameters/AllOptions.cpp
index 868b8e4fb..019e1b0c7 100644
--- a/moses/parameters/AllOptions.cpp
+++ b/moses/parameters/AllOptions.cpp
@@ -101,6 +101,11 @@ namespace Moses
if (!syntax.update(param)) return false;
return sanity_check();
}
+#else
+ bool
+ AllOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
bool
diff --git a/moses/parameters/CubePruningOptions.cpp b/moses/parameters/CubePruningOptions.cpp
index 793ae2db6..847a421f8 100644
--- a/moses/parameters/CubePruningOptions.cpp
+++ b/moses/parameters/CubePruningOptions.cpp
@@ -72,6 +72,11 @@ namespace Moses
return true;
}
+#else
+ bool
+ CubePruningOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& params)
+ {}
#endif
diff --git a/moses/parameters/InputOptions.cpp b/moses/parameters/InputOptions.cpp
index dbdb2f60e..55bc49885 100644
--- a/moses/parameters/InputOptions.cpp
+++ b/moses/parameters/InputOptions.cpp
@@ -95,6 +95,11 @@ namespace Moses {
xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
+#else
+ bool
+ InputOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
}
diff --git a/moses/parameters/NBestOptions.cpp b/moses/parameters/NBestOptions.cpp
index 3000f49df..afae0ed6c 100644
--- a/moses/parameters/NBestOptions.cpp
+++ b/moses/parameters/NBestOptions.cpp
@@ -62,6 +62,11 @@ update(std::map<std::string,xmlrpc_c::value>const& param)
enabled = (nbest_size > 0);
return true;
}
+#else
+bool
+NBestOptions::
+update(std::map<std::string,xmlrpc_c::value>const& param)
+{}
#endif
diff --git a/moses/parameters/OOVHandlingOptions.cpp b/moses/parameters/OOVHandlingOptions.cpp
index 154074664..a55026b98 100644
--- a/moses/parameters/OOVHandlingOptions.cpp
+++ b/moses/parameters/OOVHandlingOptions.cpp
@@ -43,6 +43,11 @@ namespace Moses {
// xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
+#else
+ bool
+ OOVHandlingOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
}
diff --git a/moses/parameters/OptionsBaseClass.cpp b/moses/parameters/OptionsBaseClass.cpp
index a19aaf7a6..324359e93 100644
--- a/moses/parameters/OptionsBaseClass.cpp
+++ b/moses/parameters/OptionsBaseClass.cpp
@@ -24,5 +24,10 @@ namespace Moses {
if (m == param.end()) return dfltval;
return Scan<bool>(xmlrpc_c::value_string(m->second));
}
+#else
+ bool
+ check(std::map<std::string, xmlrpc_c::value> const& param,
+ std::string const key, bool dfltval)
+ {}
#endif
}
diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp
index 210950a3c..fe56a7356 100644
--- a/moses/parameters/ReportingOptions.cpp
+++ b/moses/parameters/ReportingOptions.cpp
@@ -128,11 +128,17 @@ namespace Moses {
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
}
+
+ m = param.find("no-ReportSegmentation");
+ if (m == param.end() || !Scan<bool>(xmlrpc_c::value_string(m->second))) {
+
+ // If we are reporting alignment info, turn on ReportSegmentation, unless XML request explicitly says not to
+ m = param.find("align");
+ if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
+ ReportSegmentation = 1;
- m = param.find("align");
- if (m != param.end() && Scan<bool>(xmlrpc_c::value_string(m->second)))
- ReportSegmentation = 1;
-
+ }
+
PrintAlignmentInfo = check(param,"word-align",PrintAlignmentInfo);
m = param.find("factor-delimiter");
diff --git a/moses/parameters/SearchOptions.cpp b/moses/parameters/SearchOptions.cpp
index 958569e94..35028b8fb 100644
--- a/moses/parameters/SearchOptions.cpp
+++ b/moses/parameters/SearchOptions.cpp
@@ -102,6 +102,11 @@ namespace Moses
return true;
}
+#else
+ bool
+ SearchOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& params)
+ {}
#endif
}
diff --git a/moses/parameters/SyntaxOptions.cpp b/moses/parameters/SyntaxOptions.cpp
index f76c187ec..e977dd4c0 100644
--- a/moses/parameters/SyntaxOptions.cpp
+++ b/moses/parameters/SyntaxOptions.cpp
@@ -77,6 +77,11 @@ namespace Moses {
// xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
return true;
}
+#else
+ bool
+ SyntaxOptions::
+ update(std::map<std::string,xmlrpc_c::value>const& param)
+ {}
#endif
}
diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp
index e2580fe2f..767358e5c 100644
--- a/moses/server/TranslationRequest.cpp
+++ b/moses/server/TranslationRequest.cpp
@@ -85,13 +85,14 @@ void
TranslationRequest::
add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
{
- // if (!m_withAlignInfo) return;
- if (!options()->output.ReportSegmentation) return;
+ if (!m_withAlignInfo) return;
+ // if (!options()->output.ReportSegmentation) return;
Range const& trg = h.GetCurrTargetWordsRange();
Range const& src = h.GetCurrSourceWordsRange();
std::map<std::string, xmlrpc_c::value> pAlnInfo;
pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
+ pAlnInfo["tgt-end"] = xmlrpc_c::value_int(trg.GetEndPos());
pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
@@ -356,6 +357,12 @@ parse_request(std::map<std::string, xmlrpc_c::value> const& params)
}
}
+ // Report alignment info if Moses config says to or if XML request says to
+ m_withAlignInfo = options()->output.ReportSegmentation || check(params, "align");
+
+ // Report word alignment info if Moses config says to or if XML request says to
+ m_withWordAlignInfo = options()->output.PrintAlignmentInfo || check(params, "word-align");
+
si = params.find("weights");
if (si != params.end())
{
@@ -465,8 +472,8 @@ pack_hypothesis(const Moses::Manager& manager,
<< std::endl);
dest[key] = xmlrpc_c::value_string(target.str());
- // if (m_withAlignInfo) {
- if (options()->output.ReportSegmentation) {
+ if (m_withAlignInfo) {
+ // if (options()->output.ReportSegmentation) {
// phrase alignment, if requested
vector<xmlrpc_c::value> p_aln;
@@ -475,8 +482,8 @@ pack_hypothesis(const Moses::Manager& manager,
dest["align"] = xmlrpc_c::value_array(p_aln);
}
- // if (m_withWordAlignInfo) {
- if (options()->output.PrintAlignmentInfo) {
+ if (m_withWordAlignInfo) {
+ //if (options()->output.PrintAlignmentInfo) {
// word alignment, if requested
vector<xmlrpc_c::value> w_aln;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h
index 2554e5544..3463c72c8 100644
--- a/moses/server/TranslationRequest.h
+++ b/moses/server/TranslationRequest.h
@@ -38,8 +38,8 @@ TranslationRequest : public virtual Moses::TranslationTask
Translator* m_translator;
std::string m_source_string, m_target_string;
- // bool m_withAlignInfo;
- // bool m_withWordAlignInfo;
+ bool m_withAlignInfo;
+ bool m_withWordAlignInfo;
bool m_withGraphInfo;
bool m_withTopts;
bool m_withScoreBreakdown;