Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorariel faigon <ariel.git@yendor.com>2012-08-14 01:49:02 +0400
committerariel faigon <ariel.git@yendor.com>2012-08-14 01:49:02 +0400
commitf62ddd3312e6483a1575ef76b6d7fdb70ac11887 (patch)
tree057e96a9b7da0450c8d6f5614fef36fb74e52209 /vowpalwabbit/hash.cc
parenta4300090c4ef9f019dfb7efe2f2b25dc96b5e6aa (diff)
Replace murmur2 by murmur3 hash, switch seed to 0, revamp comments
Diffstat (limited to 'vowpalwabbit/hash.cc')
-rw-r--r--vowpalwabbit/hash.cc100
1 files changed, 12 insertions, 88 deletions
diff --git a/vowpalwabbit/hash.cc b/vowpalwabbit/hash.cc
index 360137c9..690186b1 100644
--- a/vowpalwabbit/hash.cc
+++ b/vowpalwabbit/hash.cc
@@ -1,20 +1,17 @@
-// Tweaked for VW and contributed by Ariel Faigon.
-// Original at: http://murmurhash.googlepages.com/
//
-// Incorporates MurmurHash2, by Austin Appleby
-// Incorporates MurmurHash3, by Austin Appleby
+// MurmurHash3, by Austin Appleby
//
-// Note - This code makes a few assumptions about how your machine behaves:
+// Originals at:
+// http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+// http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.h
//
-// 1. We can read a 4-byte value from any address without crashing
-// (i.e non aligned access is supported) - this is not a problem
-// on Intel/x86/AMD64 machines (including new Macs)
-// 2. sizeof(int) == 4
+// Notes:
+// 1) this code assumes we can read a 4-byte value from any address
+// without crashing (i.e non aligned access is supported). This is
+// not a problem on Intel/x86/AMD64 machines (including new Macs)
+// 2) It produces different results on little-endian and big-endian machines.
//
-// And it has a few limitations -
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and
-// big-endian machines.
+// Adopted for VW and contributed by Ariel Faigon.
//
// Platform-specific functions and macros
@@ -30,8 +27,6 @@
#include <sys/types.h> /* defines size_t */
-#ifdef MURMUR3 /* compile w/ -D MURMUR3 to default to this hash */
-
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
@@ -53,7 +48,6 @@
#include <stdlib.h>
#define ROTL32(x,y) _rotl(x,y)
-#define ROTL64(x,y) _rotl64(x,y)
#define BIG_CONSTANT(x) (x)
@@ -68,13 +62,7 @@ inline uint32_t rotl32 ( uint32_t x, int8_t r )
return (x << r) | (x >> (32 - r));
}
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
- return (x << r) | (x >> (64 - r));
-}
-
#define ROTL32(x,y) rotl32(x,y)
-#define ROTL64(x,y) rotl64(x,y)
#define BIG_CONSTANT(x) (x##LLU)
@@ -89,11 +77,6 @@ FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
return p[i];
}
-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
-{
- return p[i];
-}
-
//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
@@ -108,11 +91,7 @@ FORCE_INLINE uint32_t fmix ( uint32_t h )
return h;
}
-//
-// ariel: changed the interface to match vw
-// removed all the functions we don't need (leave only the 32-bit hash)
-// renamed MurmurHash3_x86_32 -> uniform_hash
-//
+//-----------------------------------------------------------------------------
uint32_t uniform_hash (const void * key, size_t len, uint32_t seed)
{
const uint8_t * data = (const uint8_t*)key;
@@ -164,60 +143,5 @@ uint32_t uniform_hash (const void * key, size_t len, uint32_t seed)
h1 = fmix(h1);
return h1;
-}
-
-//-----------------------------------------------------------------------------
-
-#else
-
-#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-uint32_t uniform_hash( const void *key, size_t len, uint32_t seed)
-{
- // 'm' and 'r' are mixing constants generated offline.
- // They're not really 'magic', they just happen to work well.
-
- const unsigned int m = 0x5bd1e995;
- const int r = 24;
-
- // Initialize the hash to a 'random' value
-
- unsigned int h = seed ^ len;
-
- // Mix 4 bytes at a time into the hash
-
- const unsigned char * data = (const unsigned char *)key;
-
- while (len >= 4) {
- unsigned int k = *(unsigned int *)data;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
-
- data += 4;
- len -= 4;
- }
-
- // Handle the last few bytes of the input array
- switch (len) {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0];
- h *= m;
- };
-
- // Do a few final mixes of the hash to ensure the last few
- // bytes are well-incorporated.
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
-}
-
-#endif /* MURMUR2 or MURMUR3 */
+}