Welcome to mirror list, hosted at ThFree Co, Russian Federation.

kernel_function.h « cpu « device « cycles « intern - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 5ff55499d480fda68b850978f50947968bcee803 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*
 * Copyright 2011-2021 Blender Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#pragma once

#include "util/debug.h"
#include "util/system.h"

CCL_NAMESPACE_BEGIN

/* A wrapper around per-microarchitecture variant of a kernel function.
 *
 * Provides a function-call-like API which gets routed to the most suitable implementation.
 *
 * For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */
template<typename FunctionType> class CPUKernelFunction {
 public:
  CPUKernelFunction(FunctionType kernel_default,
                    FunctionType kernel_sse2,
                    FunctionType kernel_sse3,
                    FunctionType kernel_sse41,
                    FunctionType kernel_avx,
                    FunctionType kernel_avx2)
  {
    kernel_info_ = get_best_kernel_info(
        kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
  }

  template<typename... Args> inline auto operator()(Args... args) const
  {
    assert(kernel_info_.kernel);

    return kernel_info_.kernel(args...);
  }

  const char *get_uarch_name() const
  {
    return kernel_info_.uarch_name;
  }

 protected:
  /* Helper class which allows to pass human-readable microarchitecture name together with function
   * pointer. */
  class KernelInfo {
   public:
    KernelInfo() : KernelInfo("", nullptr)
    {
    }

    /* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without
     * memory allocation. */
    KernelInfo(const char *uarch_name, FunctionType kernel)
        : uarch_name(uarch_name), kernel(kernel)
    {
    }

    const char *uarch_name;
    FunctionType kernel;
  };

  KernelInfo get_best_kernel_info(FunctionType kernel_default,
                                  FunctionType kernel_sse2,
                                  FunctionType kernel_sse3,
                                  FunctionType kernel_sse41,
                                  FunctionType kernel_avx,
                                  FunctionType kernel_avx2)
  {
    /* Silence warnings about unused variables when compiling without some architectures. */
    (void)kernel_sse2;
    (void)kernel_sse3;
    (void)kernel_sse41;
    (void)kernel_avx;
    (void)kernel_avx2;

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
      return KernelInfo("AVX2", kernel_avx2);
    }
#endif

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
    if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
      return KernelInfo("AVX", kernel_avx);
    }
#endif

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
    if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
      return KernelInfo("SSE4.1", kernel_sse41);
    }
#endif

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
    if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
      return KernelInfo("SSE3", kernel_sse3);
    }
#endif

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
    if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
      return KernelInfo("SSE2", kernel_sse2);
    }
#endif

    return KernelInfo("default", kernel_default);
  }

  KernelInfo kernel_info_;
};

CCL_NAMESPACE_END