Welcome to mirror list, hosted at ThFree Co, Russian Federation.

hash.cc - github.com/moses-smt/vowpal_wabbit.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 429ea18793404ac2198809c3d3a14e57ea1a2b1f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Tweaked for VW and contributed by Ariel Faigon.
// Original at: http://murmurhash.googlepages.com/
//
// Based on MurmurHash2, by Austin Appleby
//
// Note - This code makes a few assumptions about how your machine behaves:
//
// 1. We can read a 4-byte value from any address without crashing
//    (i.e non aligned access is supported) - this is not a problem
//    on Intel/x86/AMD64 machines (including new Macs)
// 2. sizeof(int) == 4
//
// And it has a few limitations -
//  1. It will not work incrementally.
//  2. It will not produce the same results on little-endian and
//     big-endian machines.
//
#include <stdint.h>	/* defines uint32_t etc */
#include <sys/types.h>	/* defines size_t */

#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }

uint32_t uniform_hash( const void *key, size_t len, uint32_t seed)
{
    // 'm' and 'r' are mixing constants generated offline.
    // They're not really 'magic', they just happen to work well.

    const unsigned int m = 0x5bd1e995;
    const int r = 24;

    // Initialize the hash to a 'random' value

    unsigned int h = seed ^ len;

    // Mix 4 bytes at a time into the hash

    const unsigned char * data = (const unsigned char *)key;

    while (len >= 4) {
	unsigned int k = *(unsigned int *)data;

	k *= m;
	k ^= k >> r;
	k *= m;

	h *= m;
	h ^= k;

	data += 4;
	len -= 4;
    }

    // Handle the last few bytes of the input array
    switch (len) {
	case 3: h ^= data[2] << 16;
	case 2: h ^= data[1] << 8;
	case 1: h ^= data[0];
		h *= m;
    };

    // Do a few final mixes of the hash to ensure the last few
    // bytes are well-incorporated.
    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return h;
}