diff options
author | Rico Sennrich <rico.sennrich@gmx.ch> | 2014-03-21 14:53:15 +0400 |
---|---|---|
committer | Rico Sennrich <rico.sennrich@gmx.ch> | 2014-03-21 15:12:24 +0400 |
commit | 45630a5851fad3bdd6953e88727e4b7c8d0c4c18 (patch) | |
tree | cd95a3175eb7fc4bb1027cf92d2fa880112b3fee /moses/FactorCollection.cpp | |
parent | 1c6061e78174d09ea4a7a8125ee0df7f41d88ae5 (diff) |
various optimizations to make CYK+ parser several times faster and eat less memory.
speed-up of decoding depends on how much time is spent in parser:
10-50% speed-up for string-to-tree systems observed (more on long sentences and with high max-chart-span).
if you only use hiero or string-to-tree models (but none with source syntax), use compile-option --unlabelled-source for (small) efficiency gains.
Diffstat (limited to 'moses/FactorCollection.cpp')
-rw-r--r-- | moses/FactorCollection.cpp | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/moses/FactorCollection.cpp b/moses/FactorCollection.cpp index 5d6eb1c53..5013da417 100644 --- a/moses/FactorCollection.cpp +++ b/moses/FactorCollection.cpp @@ -35,27 +35,34 @@ namespace Moses { FactorCollection FactorCollection::s_instance; -const Factor *FactorCollection::AddFactor(const StringPiece &factorString) +const Factor *FactorCollection::AddFactor(const StringPiece &factorString, bool isNonTerminal) { FactorFriend to_ins; to_ins.in.m_string = factorString; - to_ins.in.m_id = m_factorId; + to_ins.in.m_id = (isNonTerminal) ? m_factorIdNonTerminal : m_factorId; + Set & set = (isNonTerminal) ? m_set : m_setNonTerminal; // If we're threaded, hope a read-only lock is sufficient. #ifdef WITH_THREADS { // read=lock scope boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock); - Set::const_iterator i = m_set.find(to_ins); - if (i != m_set.end()) return &i->in; + Set::const_iterator i = set.find(to_ins); + if (i != set.end()) return &i->in; } boost::unique_lock<boost::shared_mutex> lock(m_accessLock); #endif // WITH_THREADS - std::pair<Set::iterator, bool> ret(m_set.insert(to_ins)); + std::pair<Set::iterator, bool> ret(set.insert(to_ins)); if (ret.second) { ret.first->in.m_string.set( memcpy(m_string_backing.Allocate(factorString.size()), factorString.data(), factorString.size()), factorString.size()); - m_factorId++; + if (isNonTerminal) { + m_factorIdNonTerminal++; + UTIL_THROW_IF2(m_factorIdNonTerminal >= moses_MaxNumNonterminals, "Number of non-terminals exceeds maximum size reserved. Adjust parameter moses_MaxNumNonterminals, then recompile"); + } + else { + m_factorId++; + } } return &ret.first->in; } |