/* * Copyright 2011-2021 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include "util/util_debug.h" #include "util/util_system.h" CCL_NAMESPACE_BEGIN /* A wrapper around per-microarchitecture variant of a kernel function. * * Provides a function-call-like API which gets routed to the most suitable implementation. * * For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */ template class CPUKernelFunction { public: CPUKernelFunction(FunctionType kernel_default, FunctionType kernel_sse2, FunctionType kernel_sse3, FunctionType kernel_sse41, FunctionType kernel_avx, FunctionType kernel_avx2) { kernel_info_ = get_best_kernel_info( kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2); } template inline auto operator()(Args... args) const { assert(kernel_info_.kernel); return kernel_info_.kernel(args...); } const char *get_uarch_name() const { return kernel_info_.uarch_name; } protected: /* Helper class which allows to pass human-readable microarchitecture name together with function * pointer. */ class KernelInfo { public: KernelInfo() : KernelInfo("", nullptr) { } /* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without * memory allocation. */ KernelInfo(const char *uarch_name, FunctionType kernel) : uarch_name(uarch_name), kernel(kernel) { } const char *uarch_name; FunctionType kernel; }; KernelInfo get_best_kernel_info(FunctionType kernel_default, FunctionType kernel_sse2, FunctionType kernel_sse3, FunctionType kernel_sse41, FunctionType kernel_avx, FunctionType kernel_avx2) { /* Silence warnings about unused variables when compiling without some architectures. */ (void)kernel_sse2; (void)kernel_sse3; (void)kernel_sse41; (void)kernel_avx; (void)kernel_avx2; #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) { return KernelInfo("AVX2", kernel_avx2); } #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) { return KernelInfo("AVX", kernel_avx); } #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) { return KernelInfo("SSE4.1", kernel_sse41); } #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) { return KernelInfo("SSE3", kernel_sse3); } #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) { return KernelInfo("SSE2", kernel_sse2); } #endif return KernelInfo("default", kernel_default); } KernelInfo kernel_info_; }; CCL_NAMESPACE_END