Welcome to mirror list, hosted at ThFree Co, Russian Federation.

BLI_hash.hh « blenlib « blender « source - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 2e3212cc83beede9173019c5529357e4182020d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/*
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#pragma once

/** \file
 * \ingroup bli
 *
 * A specialization of `blender::DefaultHash<T>` provides a hash function for values of type T.
 * This hash function is used by default in hash table implementations in blenlib.
 *
 * The actual hash function is in the `operator()` method of `DefaultHash<T>`. The following code
 * computes the hash of some value using DefaultHash.
 *
 *   T value = ...;
 *   DefaultHash<T> hash_function;
 *   uint32_t hash = hash_function(value);
 *
 * Hash table implementations like blender::Set support heterogeneous key lookups. That means that
 * one can do a lookup with a key of type A in a hash table that stores keys of type B. This is
 * commonly done when B is std::string, because the conversion from e.g. a #StringRef to
 * std::string can be costly and is unnecessary. To make this work, values of type A and B that
 * compare equal have to have the same hash value. This is achieved by defining potentially
 * multiple `operator()` in a specialization of #DefaultHash. All those methods have to compute the
 * same hash for values that compare equal.
 *
 * The computed hash is an unsigned 64 bit integer. Ideally, the hash function would generate
 * uniformly random hash values for a set of keys. However, in many cases trivial hash functions
 * are faster and produce a good enough distribution. In general it is better when more information
 * is in the lower bits of the hash. By choosing a good probing strategy, the effects of a bad hash
 * function are less noticeable though. In this context a good probing strategy is one that takes
 * all bits of the hash into account eventually. One has to check on a case by case basis to see if
 * a better but more expensive or trivial hash function works better.
 *
 * There are three main ways to provide a hash table implementation with a custom hash function.
 *
 * - When you want to provide a default hash function for your own custom type: Add a `hash`
 *   member function to it. The function should return `uint64_t` and take no arguments. This
 *   method will be called by the default implementation of #DefaultHash. It will automatically be
 *   used by hash table implementations.
 *
 * - When you want to provide a default hash function for a type that you cannot modify: Add a new
 *   specialization to the #DefaultHash struct. This can be done by writing code like below in
 *   either global or BLI namespace.
 *
 *     template<> struct blender::DefaultHash<TheType> {
 *       uint64_t operator()(const TheType &value) const {
 *         return ...;
 *       }
 *     };
 *
 * - When you want to provide a different hash function for a type that already has a default hash
 *   function: Implement a struct like the one below and pass it as template parameter to the hash
 *   table explicitly.
 *
 *     struct MyCustomHash {
 *       uint64_t operator()(const TheType &value) const {
 *         return ...;
 *       }
 *     };
 */

#include <functional>
#include <memory>
#include <string>
#include <utility>

#include "BLI_math_base.h"
#include "BLI_string_ref.hh"
#include "BLI_utildefines.h"

namespace blender {

/**
 * If there is no other specialization of #DefaultHash for a given type, try to call `hash()` on
 * the value. If there is no such method, this will result in a compiler error. Usually that means
 * that you have to implement a hash function using one of three strategies listed above.
 */
template<typename T> struct DefaultHash {
  uint64_t operator()(const T &value) const
  {
    return value.hash();
  }
};

/**
 * Use the same hash function for const and non const variants of a type.
 */
template<typename T> struct DefaultHash<const T> {
  uint64_t operator()(const T &value) const
  {
    return DefaultHash<T>{}(value);
  }
};

#define TRIVIAL_DEFAULT_INT_HASH(TYPE) \
  template<> struct DefaultHash<TYPE> { \
    uint64_t operator()(TYPE value) const \
    { \
      return static_cast<uint64_t>(value); \
    } \
  }

/**
 * We cannot make any assumptions about the distribution of keys, so use a trivial hash function by
 * default. The default probing strategy is designed to take all bits of the hash into account
 * to avoid worst case behavior when the lower bits are all zero. Special hash functions can be
 * implemented when more knowledge about a specific key distribution is available.
 */
TRIVIAL_DEFAULT_INT_HASH(int8_t);
TRIVIAL_DEFAULT_INT_HASH(uint8_t);
TRIVIAL_DEFAULT_INT_HASH(int16_t);
TRIVIAL_DEFAULT_INT_HASH(uint16_t);
TRIVIAL_DEFAULT_INT_HASH(int32_t);
TRIVIAL_DEFAULT_INT_HASH(uint32_t);
TRIVIAL_DEFAULT_INT_HASH(int64_t);
TRIVIAL_DEFAULT_INT_HASH(uint64_t);

/**
 * One should try to avoid using floats as keys in hash tables, but sometimes it is convenient.
 */
template<> struct DefaultHash<float> {
  uint64_t operator()(float value) const
  {
    return *reinterpret_cast<uint32_t *>(&value);
  }
};

template<> struct DefaultHash<bool> {
  uint64_t operator()(bool value) const
  {
    return static_cast<uint64_t>((value != false) * 1298191);
  }
};

inline uint64_t hash_string(StringRef str)
{
  uint64_t hash = 5381;
  for (char c : str) {
    hash = hash * 33 + c;
  }
  return hash;
}

template<> struct DefaultHash<std::string> {
  /**
   * Take a #StringRef as parameter to support heterogeneous lookups in hash table implementations
   * when std::string is used as key.
   */
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

template<> struct DefaultHash<StringRef> {
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

template<> struct DefaultHash<StringRefNull> {
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

template<> struct DefaultHash<std::string_view> {
  uint64_t operator()(StringRef value) const
  {
    return hash_string(value);
  }
};

/**
 * While we cannot guarantee that the lower 4 bits of a pointer are zero, it is often the case.
 */
template<typename T> struct DefaultHash<T *> {
  uint64_t operator()(const T *value) const
  {
    uintptr_t ptr = reinterpret_cast<uintptr_t>(value);
    uint64_t hash = static_cast<uint64_t>(ptr >> 4);
    return hash;
  }
};

template<typename T> struct DefaultHash<std::unique_ptr<T>> {
  uint64_t operator()(const std::unique_ptr<T> &value) const
  {
    return DefaultHash<T *>{}(value.get());
  }
};

template<typename T1, typename T2> struct DefaultHash<std::pair<T1, T2>> {
  uint64_t operator()(const std::pair<T1, T2> &value) const
  {
    uint64_t hash1 = DefaultHash<T1>{}(value.first);
    uint64_t hash2 = DefaultHash<T2>{}(value.second);
    return hash1 ^ (hash2 * 33);
  }
};

}  // namespace blender