diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-02-24 11:46:38 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-02-24 11:46:38 +0400 |
commit | 2879764b0890cce5a39b88e253b59d7684c0c7d9 (patch) | |
tree | 89e3eb83771a34f8b9438f6cc2c1de029bcb4d33 | |
parent | 5c868f8294729bdf68991a93c37b2544d5624631 (diff) |
Switch to MurmurHash for some hash tables
-rw-r--r-- | lm/filter/phrase_table_vocab_main.cc | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/lm/filter/phrase_table_vocab_main.cc b/lm/filter/phrase_table_vocab_main.cc index d3a737b..4dacaf0 100644 --- a/lm/filter/phrase_table_vocab_main.cc +++ b/lm/filter/phrase_table_vocab_main.cc @@ -45,7 +45,15 @@ class InternString { boost::unordered_set<MutablePiece> strs_; }; +struct MurmurChar : public std::unary_function<const char *, std::size_t> { + std::size_t operator()(const char *value) const { + return util::MurmurHashNative(&value, sizeof(const char*)); + } +}; + class TargetWords { + private: + typedef boost::unordered_set<const char *, MurmurChar> Map; public: void Introduce(StringPiece source) { vocab_.resize(vocab_.size() + 1); @@ -61,7 +69,7 @@ class TargetWords { interns_.push_back(intern_.Add(nopipe)); } for (std::vector<unsigned int>::const_iterator i(sentences.begin()); i != sentences.end(); ++i) { - boost::unordered_set<const char *> &vocab = vocab_[*i]; + Map &vocab = vocab_[*i]; for (std::vector<const char *>::const_iterator j = interns_.begin(); j != interns_.end(); ++j) { vocab.insert(*j); } @@ -70,8 +78,8 @@ class TargetWords { void Print() const { util::FakeOFStream out(1); - for (std::vector<boost::unordered_set<const char *> >::const_iterator i = vocab_.begin(); i != vocab_.end(); ++i) { - for (boost::unordered_set<const char *>::const_iterator j = i->begin(); j != i->end(); ++j) { + for (std::vector<Map>::const_iterator i = vocab_.begin(); i != vocab_.end(); ++i) { + for (Map::const_iterator j = i->begin(); j != i->end(); ++j) { out << *j << ' '; } out << '\n'; @@ -81,7 +89,7 @@ class TargetWords { private: InternString intern_; - std::vector<boost::unordered_set<const char *> > vocab_; + std::vector<Map> vocab_; // Temporary in Add. std::vector<const char *> interns_; |