Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRico Sennrich <rico.sennrich@gmx.ch>2014-03-21 14:53:15 +0400
committerRico Sennrich <rico.sennrich@gmx.ch>2014-03-21 15:12:24 +0400
commit45630a5851fad3bdd6953e88727e4b7c8d0c4c18 (patch)
treecd95a3175eb7fc4bb1027cf92d2fa880112b3fee /moses/ChartCellLabelSet.h
parent1c6061e78174d09ea4a7a8125ee0df7f41d88ae5 (diff)
various optimizations to make CYK+ parser several times faster and eat less memory.
speed-up of decoding depends on how much time is spent in parser: 10-50% speed-up for string-to-tree systems observed (more on long sentences and with high max-chart-span). if you only use hiero or string-to-tree models (but none with source syntax), use compile-option --unlabelled-source for (small) efficiency gains.
Diffstat (limited to 'moses/ChartCellLabelSet.h')
-rw-r--r--moses/ChartCellLabelSet.h74
1 files changed, 57 insertions, 17 deletions
diff --git a/moses/ChartCellLabelSet.h b/moses/ChartCellLabelSet.h
index 68c8b4263..2b497b957 100644
--- a/moses/ChartCellLabelSet.h
+++ b/moses/ChartCellLabelSet.h
@@ -21,6 +21,7 @@
#include "ChartCellLabel.h"
#include "NonTerminal.h"
+#include "moses/FactorCollection.h"
#include <boost/functional/hash.hpp>
#include <boost/unordered_map.hpp>
@@ -36,20 +37,23 @@ class ChartHypothesisCollection;
class ChartCellLabelSet
{
private:
-#if defined(BOOST_VERSION) && (BOOST_VERSION >= 104200)
- typedef boost::unordered_map<Word, ChartCellLabel,
- NonTerminalHasher, NonTerminalEqualityPred
- > MapType;
-#else
- typedef std::map<Word, ChartCellLabel> MapType;
-#endif
+
+ typedef std::vector<ChartCellLabel*> MapType;
public:
typedef MapType::const_iterator const_iterator;
typedef MapType::iterator iterator;
- ChartCellLabelSet(const WordsRange &coverage) : m_coverage(coverage) {}
+ ChartCellLabelSet(const WordsRange &coverage)
+ : m_coverage(coverage)
+ , m_map(FactorCollection::Instance().GetNumNonTerminals(), NULL)
+ , m_size(0) { }
+
+ ~ChartCellLabelSet() {
+ RemoveAllInColl(m_map);
+ }
+ // TODO: skip empty elements when iterating, or deprecate this
const_iterator begin() const {
return m_map.begin();
}
@@ -65,36 +69,72 @@ public:
}
void AddWord(const Word &w) {
- m_map.insert(std::make_pair(w, ChartCellLabel(m_coverage, w)));
+ size_t idx = w[0]->GetId();
+ if (! ChartCellExists(idx)) {
+ m_size++;
+ m_map[idx] = new ChartCellLabel(m_coverage, w);
+ }
}
// Stack is a HypoList or whatever the search algorithm uses.
void AddConstituent(const Word &w, const HypoList *stack) {
- ChartCellLabel::Stack s;
- s.cube = stack;
- m_map.insert(std::make_pair(w, ChartCellLabel(m_coverage, w, s)));
+ size_t idx = w[0]->GetId();
+ if (ChartCellExists(idx)) {
+ ChartCellLabel::Stack & s = m_map[idx]->MutableStack();
+ s.cube = stack;
+ }
+ else {
+ ChartCellLabel::Stack s;
+ s.cube = stack;
+ m_size++;
+ m_map[idx] = new ChartCellLabel(m_coverage, w, s);
+ }
+ }
+
+ // grow vector if necessary
+ bool ChartCellExists(size_t idx) {
+ try {
+ if (m_map.at(idx) != NULL) {
+ return true;
+ }
+ }
+ catch (const std::out_of_range& oor) {
+ m_map.resize(FactorCollection::Instance().GetNumNonTerminals(), NULL);
+ }
+ return false;
}
bool Empty() const {
- return m_map.empty();
+ return m_size == 0;
}
size_t GetSize() const {
- return m_map.size();
+ return m_size;
}
const ChartCellLabel *Find(const Word &w) const {
- MapType::const_iterator p = m_map.find(w);
- return p == m_map.end() ? 0 : &(p->second);
+ size_t idx = w[0]->GetId();
+ try {
+ return m_map.at(idx);
+ }
+ catch (const std::out_of_range& oor) {
+ return NULL;
+ }
}
ChartCellLabel::Stack &FindOrInsert(const Word &w) {
- return m_map.insert(std::make_pair(w, ChartCellLabel(m_coverage, w))).first->second.MutableStack();
+ size_t idx = w[0]->GetId();
+ if (! ChartCellExists(idx)) {
+ m_size++;
+ m_map[idx] = new ChartCellLabel(m_coverage, w);
+ }
+ return m_map[idx]->MutableStack();
}
private:
const WordsRange &m_coverage;
MapType m_map;
+ size_t m_size;
};
}