Welcome to mirror list, hosted at ThFree Co, Russian Federation.

BLI_hash.hh « blenlib « blender « source - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 693d4a8a71cd85dfd90cd8d42605f7ef60a8ac4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/* SPDX-License-Identifier: GPL-2.0-or-later */

#pragma once

/** \file
 * \ingroup bli
 *
 * A specialization of `blender::DefaultHash<T>` provides a hash function for values of type T.
 * This hash function is used by default in hash table implementations in blenlib.
 *
 * The actual hash function is in the `operator()` method of `DefaultHash<T>`. The following code
 * computes the hash of some value using DefaultHash.
 *
 *   T value = ...;
 *   DefaultHash<T> hash_function;
 *   uint32_t hash = hash_function(value);
 *
 * Hash table implementations like blender::Set support heterogeneous key lookups. That means that
 * one can do a lookup with a key of type A in a hash table that stores keys of type B. This is
 * commonly done when B is std::string, because the conversion from e.g. a #StringRef to
 * std::string can be costly and is unnecessary. To make this work, values of type A and B that
 * compare equal have to have the same hash value. This is achieved by defining potentially
 * multiple `operator()` in a specialization of #DefaultHash. All those methods have to compute the
 * same hash for values that compare equal.
 *
 * The computed hash is an unsigned 64 bit integer. Ideally, the hash function would generate
 * uniformly random hash values for a set of keys. However, in many cases trivial hash functions
 * are faster and produce a good enough distribution. In general it is better when more information
 * is in the lower bits of the hash. By choosing a good probing strategy, the effects of a bad hash
 * function are less noticeable though. In this context a good probing strategy is one that takes
 * all bits of the hash into account eventually. One has to check on a case by case basis to see if
 * a better but more expensive or trivial hash function works better.
 *
 * There are three main ways to provide a hash table implementation with a custom hash function.
 *
 * - When you want to provide a default hash function for your own custom type: Add a `hash`
 *   member function to it. The function should return `uint64_t` and take no arguments. This
 *   method will be called by the default implementation of #DefaultHash. It will automatically be
 *   used by hash table implementations.
 *
 * - When you want to provide a default hash function for a type that you cannot modify: Add a new
 *   specialization to the #DefaultHash struct. This can be done by writing code like below in
 *   either global or BLI namespace.
 *
 *     template<> struct blender::DefaultHash<TheType> {
 *       uint64_t operator()(const TheType &value) const {
 *         return ...;
 *       }
 *     };
 *
 * - When you want to provide a different hash function for a type that already has a default hash
 *   function: Implement a struct like the one below and pass it as template parameter to the hash
 *   table explicitly.
 *
 *     struct MyCustomHash {
 *       uint64_t operator()(const TheType &value) const {
 *         return ...;
 *       }
 *     };
 */

#include <functional>
#include <memory>
#include <string>
#include <utility>

#include "BLI_math_base.h"
#include "BLI_string_ref.hh"
#include "BLI_utildefines.h"

namespace blender {

/**
 * If there is no other specialization of #DefaultHash for a given type, look for a hash function
 * on the type itself. Implementing a `hash()` method on a type is often significantly easier than
 * specializing #DefaultHash.
 *
 * To support heterogeneous lookup, a type can also implement a static `hash_as(const OtherType &)`
 * function.
 *
 * In the case of an enum type, the default hash is just to cast the enum value to an integer.
 */
template<typename T> struct DefaultHash {
  uint64_t operator()(const T &value) const
  {
    if constexpr (std::is_enum_v<T>) {
      /* For enums use the value as hash directly. */
      return uint64_t(value);
    }
    else {
      /* Try to call the `hash()` function on the value. */
      /* If this results in a compiler error, no hash function for the type has been found. */
      return value.hash();
    }
  }

  template<typename U> uint64_t operator()(const U &value) const
  {
    /* Try calling the static `T::hash_as(value)` function with the given value. The returned hash
     * should be "compatible" with `T::hash()`. Usually that means that if `value` is converted to
     * `T` its hash does not change. */
    /* If this results in a compiler error, no hash function for the heterogeneous lookup has been
     * found. */
    return T::hash_as(value);
  }
};

/**
 * Use the same hash function for const and non const variants of a type.
 */
template<typename T> struct DefaultHash<const T> {
  uint64_t operator()(const T &value) const
  {
    return DefaultHash<T>{}(value);
  }
};

#define TRIVIAL_DEFAULT_INT_HASH(TYPE) \
  template<> struct DefaultHash<TYPE> { \
    uint64_t operator()(TYPE value) const \
    { \
      return uint64_t(value); \
    } \
  }

/**
 * We cannot make any assumptions about the distribution of keys, so use a trivial hash function by
 * default. The default probing strategy is designed to take all bits of the hash into account
 * to avoid worst case behavior when the lower bits are all zero. Special hash functions can be
 * implemented when more knowledge about a specific key distribution is available.
 */
TRIVIAL_DEFAULT_INT_HASH(int8_t);
TRIVIAL_DEFAULT_INT_HASH(uint8_t);
TRIVIAL_DEFAULT_INT_HASH(int16_t);
TRIVIAL_DEFAULT_INT_HASH(uint16_t);
TRIVIAL_DEFAULT_INT_HASH(int32_t);
TRIVIAL_DEFAULT_INT_HASH(uint32_t);
TRIVIAL_DEFAULT_INT_HASH(int64_t);
TRIVIAL_DEFAULT_INT_HASH(uint64_t);

/**
 * One should try to avoid using floats as keys in hash tables, but sometimes it is convenient.
 */
template<> struct DefaultHash<float> {
  uint64_t operator()(float value) const
  {
    return *reinterpret_cast<uint32_t *>(&value);
  }
};

template<> struct DefaultHash<double> {
  uint64_t operator()(double value) const
  {
    return *reinterpret_cast<uint64_t *>(&value);
  }
};

template<> struct DefaultHash<bool> {
  uint64_t operator()(bool value) const
  {
    return uint64_t((value != false) * 1298191);
  }
};

inline uint64_t hash_string(StringRef str)
{
  uint64_t hash = 5381;
  for (char c : str) {
    hash = hash * 33 + c;
  }
  return hash;
}

template<> struct DefaultHash<std::string> {
  /**
   * Take a #StringRef as parameter to support heterogeneous lookups in hash table implementations
   * when std::string is used as key.
   */
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

template<> struct DefaultHash<StringRef> {
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

template<> struct DefaultHash<StringRefNull> {
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

template<> struct DefaultHash<std::string_view> {
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

/**
 * While we cannot guarantee that the lower 4 bits of a pointer are zero, it is often the case.
 */
template<typename T> struct DefaultHash<T *> {
  uint64_t operator()(const T *value) const
  {
    uintptr_t ptr = uintptr_t(value);
    uint64_t hash = uint64_t(ptr >> 4);
    return hash;
  }
};

template<typename T> uint64_t get_default_hash(const T &v)
{
  return DefaultHash<T>{}(v);
}

template<typename T1, typename T2> uint64_t get_default_hash_2(const T1 &v1, const T2 &v2)
{
  const uint64_t h1 = get_default_hash(v1);
  const uint64_t h2 = get_default_hash(v2);
  return h1 ^ (h2 * 19349669);
}

template<typename T1, typename T2, typename T3>
uint64_t get_default_hash_3(const T1 &v1, const T2 &v2, const T3 &v3)
{
  const uint64_t h1 = get_default_hash(v1);
  const uint64_t h2 = get_default_hash(v2);
  const uint64_t h3 = get_default_hash(v3);
  return h1 ^ (h2 * 19349669) ^ (h3 * 83492791);
}

template<typename T1, typename T2, typename T3, typename T4>
uint64_t get_default_hash_4(const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4)
{
  const uint64_t h1 = get_default_hash(v1);
  const uint64_t h2 = get_default_hash(v2);
  const uint64_t h3 = get_default_hash(v3);
  const uint64_t h4 = get_default_hash(v4);
  return h1 ^ (h2 * 19349669) ^ (h3 * 83492791) ^ (h4 * 3632623);
}

template<typename T> struct DefaultHash<std::unique_ptr<T>> {
  uint64_t operator()(const std::unique_ptr<T> &value) const
  {
    return get_default_hash(value.get());
  }
};

template<typename T> struct DefaultHash<std::shared_ptr<T>> {
  uint64_t operator()(const std::shared_ptr<T> &value) const
  {
    return get_default_hash(value.get());
  }
};

template<typename T> struct DefaultHash<std::reference_wrapper<T>> {
  uint64_t operator()(const std::reference_wrapper<T> &value) const
  {
    return get_default_hash(value.get());
  }
};

template<typename T1, typename T2> struct DefaultHash<std::pair<T1, T2>> {
  uint64_t operator()(const std::pair<T1, T2> &value) const
  {
    return get_default_hash_2(value.first, value.second);
  }
};

}  // namespace blender