Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRico Sennrich <rico.sennrich@gmx.ch>2014-03-21 14:53:15 +0400
committerRico Sennrich <rico.sennrich@gmx.ch>2014-03-21 15:12:24 +0400
commit45630a5851fad3bdd6953e88727e4b7c8d0c4c18 (patch)
treecd95a3175eb7fc4bb1027cf92d2fa880112b3fee /moses/Incremental.cpp
parent1c6061e78174d09ea4a7a8125ee0df7f41d88ae5 (diff)
various optimizations to make CYK+ parser several times faster and eat less memory.
speed-up of decoding depends on how much time is spent in parser: 10-50% speed-up for string-to-tree systems observed (more on long sentences and with high max-chart-span). if you only use hiero or string-to-tree models (but none with source syntax), use compile-option --unlabelled-source for (small) efficiency gains.
Diffstat (limited to 'moses/Incremental.cpp')
-rw-r--r--moses/Incremental.cpp28
1 files changed, 24 insertions, 4 deletions
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index 5cce4614b..2ee98aade 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -50,7 +50,10 @@ public:
void FinishedSearch() {
for (ChartCellLabelSet::iterator i(out_.mutable_begin()); i != out_.mutable_end(); ++i) {
- ChartCellLabel::Stack &stack = i->second.MutableStack();
+ if ((*i) == NULL) {
+ continue;
+ }
+ ChartCellLabel::Stack &stack = (*i)->MutableStack();
Gen *gen = static_cast<Gen*>(stack.incr_generator);
gen->FinishedSearch();
stack.incr = &gen->Generating();
@@ -80,6 +83,8 @@ public:
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
+ float CalcEstimateOfBestScore(const TargetPhraseCollection & tpc, const StackVec & stackVec) const;
+
bool Empty() const {
return edges_.Empty();
}
@@ -112,7 +117,7 @@ private:
const search::Score oov_weight_;
};
-template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &)
+template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &range)
{
std::vector<search::PartialVertex> vertices;
vertices.reserve(nts.size());
@@ -173,6 +178,17 @@ template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std:
edges_.AddEdge(edge);
}
+// for early pruning
+template <class Model> float Fill<Model>::CalcEstimateOfBestScore(const TargetPhraseCollection &targets, const StackVec &nts) const
+{
+ float below_score = 0.0;
+ for (StackVec::const_iterator i = nts.begin(); i != nts.end(); ++i) {
+ below_score += (*i)->GetStack().incr->RootAlternate().Bound();
+ }
+ const TargetPhrase &targetPhrase = **(targets.begin());
+ return targetPhrase.GetFutureScore() + below_score;
+}
+
// TODO: factors (but chart doesn't seem to support factors anyway).
template <class Model> lm::WordIndex Fill<Model>::Convert(const Word &word) const
{
@@ -209,8 +225,12 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M
size_t size = source_.GetSize();
boost::object_pool<search::Vertex> vertex_pool(std::max<size_t>(size * size / 2, 32));
- for (size_t width = 1; width < size; ++width) {
- for (size_t startPos = 0; startPos <= size-width; ++startPos) {
+ for (int startPos = size-1; startPos >= 0; --startPos) {
+ for (size_t width = 1; width <= size-startPos; ++width) {
+ // full range uses RootSearch
+ if (startPos == 0 && startPos + width == size) {
+ break;
+ }
WordsRange range(startPos, startPos + width - 1);
Fill<Model> filler(context, words, oov_weight);
parser_.Create(range, filler);