diff options
Diffstat (limited to 'intern/cycles/device/cpu/kernel_function.h')
-rw-r--r-- | intern/cycles/device/cpu/kernel_function.h | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/intern/cycles/device/cpu/kernel_function.h b/intern/cycles/device/cpu/kernel_function.h new file mode 100644 index 00000000000..aa18720cc24 --- /dev/null +++ b/intern/cycles/device/cpu/kernel_function.h @@ -0,0 +1,124 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "util/util_debug.h" +#include "util/util_system.h" + +CCL_NAMESPACE_BEGIN + +/* A wrapper around per-microarchitecture variant of a kernel function. + * + * Provides a function-call-like API which gets routed to the most suitable implementation. + * + * For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */ +template<typename FunctionType> class CPUKernelFunction { + public: + CPUKernelFunction(FunctionType kernel_default, + FunctionType kernel_sse2, + FunctionType kernel_sse3, + FunctionType kernel_sse41, + FunctionType kernel_avx, + FunctionType kernel_avx2) + { + kernel_info_ = get_best_kernel_info( + kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2); + } + + template<typename... Args> inline auto operator()(Args... args) const + { + assert(kernel_info_.kernel); + + return kernel_info_.kernel(args...); + } + + const char *get_uarch_name() const + { + return kernel_info_.uarch_name; + } + + protected: + /* Helper class which allows to pass human-readable microarchitecture name together with function + * pointer. */ + class KernelInfo { + public: + KernelInfo() : KernelInfo("", nullptr) + { + } + + /* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without + * memory allocation. */ + KernelInfo(const char *uarch_name, FunctionType kernel) + : uarch_name(uarch_name), kernel(kernel) + { + } + + const char *uarch_name; + FunctionType kernel; + }; + + KernelInfo get_best_kernel_info(FunctionType kernel_default, + FunctionType kernel_sse2, + FunctionType kernel_sse3, + FunctionType kernel_sse41, + FunctionType kernel_avx, + FunctionType kernel_avx2) + { + /* Silence warnings about unused variables when compiling without some architectures. */ + (void)kernel_sse2; + (void)kernel_sse3; + (void)kernel_sse41; + (void)kernel_avx; + (void)kernel_avx2; + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 + if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) { + return KernelInfo("AVX2", kernel_avx2); + } +#endif + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX + if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) { + return KernelInfo("AVX", kernel_avx); + } +#endif + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) { + return KernelInfo("SSE4.1", kernel_sse41); + } +#endif + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) { + return KernelInfo("SSE3", kernel_sse3); + } +#endif + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 + if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) { + return KernelInfo("SSE2", kernel_sse2); + } +#endif + + return KernelInfo("default", kernel_default); + } + + KernelInfo kernel_info_; +}; + +CCL_NAMESPACE_END |