diff options
author | Brian Muller <brian.muller@livingsocial.com> | 2012-01-03 23:56:51 +0400 |
---|---|---|
committer | Brian Muller <brian.muller@livingsocial.com> | 2012-01-03 23:56:51 +0400 |
commit | 9f457bf70dcab12082ec07cc6018e3f1a75899a2 (patch) | |
tree | a30f2028788bf291a71200ff5644a3c72a842f27 /vowpalwabbit/hash.cc | |
parent | 5c267f9404ffbb733c88b759605386aad51e6baf (diff) |
now creating a linkable library
Diffstat (limited to 'vowpalwabbit/hash.cc')
-rw-r--r-- | vowpalwabbit/hash.cc | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/vowpalwabbit/hash.cc b/vowpalwabbit/hash.cc new file mode 100644 index 00000000..429ea187 --- /dev/null +++ b/vowpalwabbit/hash.cc @@ -0,0 +1,69 @@ +// Tweaked for VW and contributed by Ariel Faigon. +// Original at: http://murmurhash.googlepages.com/ +// +// Based on MurmurHash2, by Austin Appleby +// +// Note - This code makes a few assumptions about how your machine behaves: +// +// 1. We can read a 4-byte value from any address without crashing +// (i.e non aligned access is supported) - this is not a problem +// on Intel/x86/AMD64 machines (including new Macs) +// 2. sizeof(int) == 4 +// +// And it has a few limitations - +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and +// big-endian machines. +// +#include <stdint.h> /* defines uint32_t etc */ +#include <sys/types.h> /* defines size_t */ + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +uint32_t uniform_hash( const void *key, size_t len, uint32_t seed) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const unsigned int m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + unsigned int h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while (len >= 4) { + unsigned int k = *(unsigned int *)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch (len) { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + |