Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Muller <brian.muller@livingsocial.com>2012-01-03 23:56:51 +0400
committerBrian Muller <brian.muller@livingsocial.com>2012-01-03 23:56:51 +0400
commit9f457bf70dcab12082ec07cc6018e3f1a75899a2 (patch)
treea30f2028788bf291a71200ff5644a3c72a842f27 /vowpalwabbit/hash.cc
parent5c267f9404ffbb733c88b759605386aad51e6baf (diff)
now creating a linkable library
Diffstat (limited to 'vowpalwabbit/hash.cc')
-rw-r--r--vowpalwabbit/hash.cc69
1 files changed, 69 insertions, 0 deletions
diff --git a/vowpalwabbit/hash.cc b/vowpalwabbit/hash.cc
new file mode 100644
index 00000000..429ea187
--- /dev/null
+++ b/vowpalwabbit/hash.cc
@@ -0,0 +1,69 @@
+// Tweaked for VW and contributed by Ariel Faigon.
+// Original at: http://murmurhash.googlepages.com/
+//
+// Based on MurmurHash2, by Austin Appleby
+//
+// Note - This code makes a few assumptions about how your machine behaves:
+//
+// 1. We can read a 4-byte value from any address without crashing
+// (i.e non aligned access is supported) - this is not a problem
+// on Intel/x86/AMD64 machines (including new Macs)
+// 2. sizeof(int) == 4
+//
+// And it has a few limitations -
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and
+// big-endian machines.
+//
+#include <stdint.h> /* defines uint32_t etc */
+#include <sys/types.h> /* defines size_t */
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t uniform_hash( const void *key, size_t len, uint32_t seed)
+{
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
+
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ // Initialize the hash to a 'random' value
+
+ unsigned int h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+
+ const unsigned char * data = (const unsigned char *)key;
+
+ while (len >= 4) {
+ unsigned int k = *(unsigned int *)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle the last few bytes of the input array
+ switch (len) {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0];
+ h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+}
+