diff options
author | Vaclav Petricek <vpetricek@eharmony.com> | 2013-11-01 09:43:50 +0400 |
---|---|---|
committer | Vaclav Petricek <vpetricek@eharmony.com> | 2013-11-01 09:43:50 +0400 |
commit | 09477630f7d8a809d7e6dcdf142e8877e476379e (patch) | |
tree | 20f27fedf08682b43061df6041ddd10269bd84c3 /library | |
parent | 9379deff7595fb147b55646065d02b3ddaaf0fac (diff) |
use murmurhash
Diffstat (limited to 'library')
-rw-r--r-- | library/recommend.cc | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/library/recommend.cc b/library/recommend.cc index f4b7909d..1dd2b36d 100644 --- a/library/recommend.cc +++ b/library/recommend.cc @@ -38,22 +38,25 @@ void progress() fprintf(stderr, "%12d %8d %8d %8d %12d %s %s\n", pairs, users, items, recs, skipped, userfilename.c_str(), itemfilename.c_str()); } -unsigned hash_ber(char *in, size_t len) { - unsigned hashv = 0; - while (len--) hashv = ((hashv) * 33) + *in++; - return hashv; -} -unsigned hash_fnv(char *in, size_t len) { - unsigned hashv = 2166136261UL; - while(len--) hashv = (hashv * 16777619) ^ *in++; - return hashv; -} +// return uniform_hash((unsigned char *)s.begin, s.end - s.begin, h); + + +//unsigned hash_ber(char *in, size_t len) { +// unsigned hashv = 0; +// while (len--) hashv = ((hashv) * 33) + *in++; +// return hashv; +//} +//unsigned hash_fnv(char *in, size_t len) { +// unsigned hashv = 2166136261UL; +// while(len--) hashv = (hashv * 16777619) ^ *in++; +// return hashv; +//} #define MASK(u,b) ( u & ((1UL << b) - 1)) #define NUM_HASHES 2 void get_hashv(char *in, size_t len, unsigned *out) { assert(NUM_HASHES==2); - out[0] = MASK(hash_ber(in,len),b); - out[1] = MASK(hash_fnv(in,len),b); + out[0] = MASK(uniform_hash(in,len,1),b); + out[1] = MASK(uniform_hash(in,len,2),b); } #define BIT_TEST(c,i) (c[i/8] & (1 << (i % 8))) |