Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/kpu/kenlm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2014-02-24 11:46:38 +0400
committerKenneth Heafield <github@kheafield.com>2014-02-24 11:46:38 +0400
commit2879764b0890cce5a39b88e253b59d7684c0c7d9 (patch)
tree89e3eb83771a34f8b9438f6cc2c1de029bcb4d33
parent5c868f8294729bdf68991a93c37b2544d5624631 (diff)
Switch to MurmurHash for some hash tables
-rw-r--r--lm/filter/phrase_table_vocab_main.cc16
1 files changed, 12 insertions, 4 deletions
diff --git a/lm/filter/phrase_table_vocab_main.cc b/lm/filter/phrase_table_vocab_main.cc
index d3a737b..4dacaf0 100644
--- a/lm/filter/phrase_table_vocab_main.cc
+++ b/lm/filter/phrase_table_vocab_main.cc
@@ -45,7 +45,15 @@ class InternString {
boost::unordered_set<MutablePiece> strs_;
};
+struct MurmurChar : public std::unary_function<const char *, std::size_t> {
+ std::size_t operator()(const char *value) const {
+ return util::MurmurHashNative(&value, sizeof(const char*));
+ }
+};
+
class TargetWords {
+ private:
+ typedef boost::unordered_set<const char *, MurmurChar> Map;
public:
void Introduce(StringPiece source) {
vocab_.resize(vocab_.size() + 1);
@@ -61,7 +69,7 @@ class TargetWords {
interns_.push_back(intern_.Add(nopipe));
}
for (std::vector<unsigned int>::const_iterator i(sentences.begin()); i != sentences.end(); ++i) {
- boost::unordered_set<const char *> &vocab = vocab_[*i];
+ Map &vocab = vocab_[*i];
for (std::vector<const char *>::const_iterator j = interns_.begin(); j != interns_.end(); ++j) {
vocab.insert(*j);
}
@@ -70,8 +78,8 @@ class TargetWords {
void Print() const {
util::FakeOFStream out(1);
- for (std::vector<boost::unordered_set<const char *> >::const_iterator i = vocab_.begin(); i != vocab_.end(); ++i) {
- for (boost::unordered_set<const char *>::const_iterator j = i->begin(); j != i->end(); ++j) {
+ for (std::vector<Map>::const_iterator i = vocab_.begin(); i != vocab_.end(); ++i) {
+ for (Map::const_iterator j = i->begin(); j != i->end(); ++j) {
out << *j << ' ';
}
out << '\n';
@@ -81,7 +89,7 @@ class TargetWords {
private:
InternString intern_;
- std::vector<boost::unordered_set<const char *> > vocab_;
+ std::vector<Map> vocab_;
// Temporary in Add.
std::vector<const char *> interns_;