Welcome to mirror list, hosted at ThFree Co, Russian Federation.

BLI_devirtualize_parameters.hh « blenlib « blender « source - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 77b6223f310e8e0a3ac1eae9aa4175d6c53e4615 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
/* SPDX-License-Identifier: GPL-2.0-or-later */

#pragma once

/** \file
 * \ingroup bli
 *
 * In geometry nodes, many functions accept fields as inputs. For the implementation that means
 * that the inputs are virtual arrays. Usually those are backed by actual arrays or single values
 * but sometimes virtual arrays are used to compute values on demand or convert between data
 * formats.
 *
 * Using virtual arrays has the downside that individual elements are accessed through a virtual
 * method call, which has some overhead compared to normal array access. Whether this overhead is
 * negligible depends on the context. For very small functions (e.g. a single addition), the
 * overhead can make the function many times slower. Furthermore, it prevents the compiler from
 * doing some optimizations (e.g. loop unrolling and inserting SIMD instructions).
 *
 * The solution is to "devirtualize" the virtual arrays in cases when the overhead cannot be
 * ignored. That means that the function is instantiated multiple times at compile time for the
 * different cases. For example, there can be an optimized function that adds a span and a single
 * value, and another function that adds a span and another span. At run-time there is a dynamic
 * dispatch that executes the best function given the specific virtual arrays.
 *
 * The problem with this devirtualization is that it can result in exponentially increasing compile
 * times and binary sizes, depending on the number of parameters that are devirtualized separately.
 * So there is always a trade-off between run-time performance and compile-time/binary-size.
 *
 * This file provides a utility to devirtualize array parameters to a function using a high level
 * API. This makes it easy to experiment with different extremes of the mentioned trade-off and
 * allows finding a good compromise for each function.
 */

#include "BLI_parameter_pack_utils.hh"
#include "BLI_virtual_array.hh"

namespace blender::devirtualize_parameters {

/**
 * Bit flag that specifies how an individual parameter is or can be devirtualized.
 */
enum class DeviMode {
  /* This is used as zero-value to compare to, to avoid casting to int. */
  None = 0,
  /* Don't use devirtualization for that parameter, just pass it along. */
  Keep = (1 << 0),
  /* Devirtualize #Varray as #Span. */
  Span = (1 << 1),
  /* Devirtualize #VArray as #SingleAsSpan.  */
  Single = (1 << 2),
  /* Devirtualize #IndexMask as #IndexRange. */
  Range = (1 << 3),
};
ENUM_OPERATORS(DeviMode, DeviMode::Range);

/** Utility to encode multiple #DeviMode in a type. */
template<DeviMode... Mode> using DeviModeSequence = ValueSequence<DeviMode, Mode...>;

/**
 * Main class that performs the devirtualization.
 */
template<typename Fn, typename... SourceTypes> class Devirtualizer {
 private:
  /** Utility to get the tag of the I-th source type. */
  template<size_t I>
  using type_at_index = typename TypeSequence<SourceTypes...>::template at_index<I>;
  static constexpr size_t SourceTypesNum = sizeof...(SourceTypes);

  /** Function to devirtualize. */
  Fn fn_;

  /**
   * Source values that will be devirtualized. Note that these are stored as pointers to avoid
   * unnecessary copies. The caller is responsible for keeping the memory alive.
   */
  std::tuple<const SourceTypes *...> sources_;

  /** Keeps track of whether #fn_ has been called already to avoid calling it twice. */
  bool executed_ = false;

 public:
  Devirtualizer(Fn fn, const SourceTypes *...sources) : fn_(std::move(fn)), sources_{sources...}
  {
  }

  /**
   * Return true when the function passed to the constructor has been called already.
   */
  bool executed() const
  {
    return executed_;
  }

  /**
   * At compile time, generates multiple variants of the function, each optimized for a different
   * combination of devirtualized parameters. For every parameter, a bit flag is passed that
   * determines how it will be devirtualized. At run-time, if possible, one of the generated
   * functions is picked and executed.
   *
   * To check whether the function was called successfully, call #executed() afterwards.
   *
   * \note This generates an exponential amount of code in the final binary, depending on how many
   * to-be-virtualized parameters there are.
   */
  template<DeviMode... AllowedModes>
  void try_execute_devirtualized(DeviModeSequence<AllowedModes...> /* allowed_modes */)
  {
    BLI_assert(!executed_);
    static_assert(sizeof...(AllowedModes) == SourceTypesNum);
    this->try_execute_devirtualized_impl(DeviModeSequence<>(),
                                         DeviModeSequence<AllowedModes...>());
  }

  /**
   * Execute the function and pass in the original parameters without doing any devirtualization.
   */
  void execute_without_devirtualization()
  {
    BLI_assert(!executed_);
    this->try_execute_devirtualized_impl_call(
        make_value_sequence<DeviMode, DeviMode::Keep, SourceTypesNum>(),
        std::make_index_sequence<SourceTypesNum>());
  }

 private:
  /**
   * A recursive method that generates all the combinations of devirtualized parameters that the
   * caller requested. A recursive function is necessary to achieve generating an exponential
   * number of function calls (which has to be used with care, but is expected here).
   *
   * At every recursive step, the #DeviMode of one parameter is determined. This is achieved by
   * extending #DeviModeSequence<Mode...> by one element in each step. The recursion ends once all
   * parameters are handled.
   *
   * \return True when the function has been executed.
   */
  template<DeviMode... Mode, DeviMode... AllowedModes>
  bool try_execute_devirtualized_impl(
      /* Initially empty, but then extended by one element in each recursive step.  */
      DeviModeSequence<Mode...> /* modes */,
      /* Bit flag for every parameter. */
      DeviModeSequence<AllowedModes...> /* allowed_modes */)
  {
    static_assert(SourceTypesNum == sizeof...(AllowedModes));
    if constexpr (SourceTypesNum == sizeof...(Mode)) {
      /* End of recursion, now call the function with the determined #DeviModes. */
      this->try_execute_devirtualized_impl_call(DeviModeSequence<Mode...>(),
                                                std::make_index_sequence<SourceTypesNum>());
      return true;
    }
    else {
      /* Index of the parameter that is checked in the current recursive step. */
      constexpr size_t I = sizeof...(Mode);
      /* Non-devirtualized parameter type. */
      using SourceType = type_at_index<I>;
      /* A bit flag indicating what devirtualizations are allowed in this step. */
      [[maybe_unused]] constexpr DeviMode allowed_modes =
          DeviModeSequence<AllowedModes...>::template at_index<I>();

      /* Handle #VArray types. */
      if constexpr (is_VArray_v<SourceType>) {
        /* The actual virtual array, used for dynamic dispatch at run-time. */
        const SourceType &varray = *std::get<I>(sources_);
        /* Check if the virtual array is a single value. */
        if constexpr ((allowed_modes & DeviMode::Single) != DeviMode::None) {
          if (varray.is_single()) {
            if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Single>(),
                                                     DeviModeSequence<AllowedModes...>())) {
              return true;
            }
          }
        }
        /* Check if the virtual array is a span. */
        if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
          if (varray.is_span()) {
            if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
                                                     DeviModeSequence<AllowedModes...>())) {
              return true;
            }
          }
        }
        /* Check if it is ok if the virtual array is not devirtualized. */
        if constexpr ((allowed_modes & DeviMode::Keep) != DeviMode::None) {
          if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
                                                   DeviModeSequence<AllowedModes...>())) {
            return true;
          }
        }
      }

      /* Handle #IndexMask. */
      else if constexpr (std::is_same_v<IndexMask, SourceType>) {
        /* Check if the mask is actually a contiguous range. */
        if constexpr ((allowed_modes & DeviMode::Range) != DeviMode::None) {
          /* The actual mask used for dynamic dispatch at run-time. */
          const IndexMask &mask = *std::get<I>(sources_);
          if (mask.is_range()) {
            if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Range>(),
                                                     DeviModeSequence<AllowedModes...>())) {
              return true;
            }
          }
        }
        /* Check if mask is also allowed to stay a span. */
        if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
          if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
                                                   DeviModeSequence<AllowedModes...>())) {
            return true;
          }
        }
      }

      /* Handle unknown types. */
      else {
        if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
                                                 DeviModeSequence<AllowedModes...>())) {
          return true;
        }
      }
    }
    return false;
  }

  /**
   * Actually call the function with devirtualized parameters.
   */
  template<DeviMode... Mode, size_t... I>
  void try_execute_devirtualized_impl_call(DeviModeSequence<Mode...> /* modes */,
                                           std::index_sequence<I...> /* indices */)
  {

    BLI_assert(!executed_);
    fn_(this->get_devirtualized_parameter<I, Mode>()...);
    executed_ = true;
  }

  /**
   * Return the I-th parameter devirtualized using the passed in #DeviMode. This has different
   * return types based on the template parameters.
   *
   * \note It is expected that the caller already knows that the parameter can be devirtualized
   * with the given mode.
   */
  template<size_t I, DeviMode Mode> decltype(auto) get_devirtualized_parameter()
  {
    using SourceType = type_at_index<I>;
    static_assert(Mode != DeviMode::None);
    if constexpr (Mode == DeviMode::Keep) {
      /* Don't change the original parameter at all. */
      return *std::get<I>(sources_);
    }
    if constexpr (is_VArray_v<SourceType>) {
      const SourceType &varray = *std::get<I>(sources_);
      if constexpr (Mode == DeviMode::Single) {
        /* Devirtualize virtual array as single value. */
        return SingleAsSpan(varray);
      }
      else if constexpr (Mode == DeviMode::Span) {
        /* Devirtualize virtual array as span. */
        return varray.get_internal_span();
      }
    }
    else if constexpr (std::is_same_v<IndexMask, SourceType>) {
      const IndexMask &mask = *std::get<I>(sources_);
      if constexpr (ELEM(Mode, DeviMode::Span)) {
        /* Don't devirtualize mask, it's still a span. */
        return mask;
      }
      else if constexpr (Mode == DeviMode::Range) {
        /* Devirtualize the mask as range. */
        return mask.as_range();
      }
    }
  }
};

}  // namespace blender::devirtualize_parameters

namespace blender {

/**
 * Generate multiple versions of the given function optimized for different virtual arrays.
 * One has to be careful with nesting multiple devirtualizations, because that results in an
 * exponential number of function instantiations (increasing compile time and binary size).
 *
 * Generally, this function should only be used when the virtual method call overhead to get an
 * element from a virtual array is significant.
 */
template<typename T, typename Func>
inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
{
  using namespace devirtualize_parameters;
  if (enable) {
    Devirtualizer<decltype(func), VArray<T>> devirtualizer(func, &varray);
    constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
    devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode>());
    if (devirtualizer.executed()) {
      return;
    }
  }
  func(varray);
}

/**
 * Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
 * This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
 * cases.
 */
template<typename T1, typename T2, typename Func>
inline void devirtualize_varray2(const VArray<T1> &varray1,
                                 const VArray<T2> &varray2,
                                 const Func &func,
                                 bool enable = true)
{
  using namespace devirtualize_parameters;
  if (enable) {
    Devirtualizer<decltype(func), VArray<T1>, VArray<T2>> devirtualizer(func, &varray1, &varray2);
    constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
    devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode, devi_mode>());
    if (devirtualizer.executed()) {
      return;
    }
  }
  func(varray1, varray2);
}

}  // namespace blender