diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | intern/cycles/CMakeLists.txt | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel.cpp | 30 | ||||
-rw-r--r-- | intern/cycles/util/util_simd.cpp | 3 |
4 files changed, 42 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 4010ee6416f..12f58706103 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -390,8 +390,10 @@ mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) unset(PLATFORM_DEFAULT) option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON) option(WITH_CYCLES_DEBUG "Build Cycles with extra debug capabilities" OFF) +option(WITH_CYCLES_NATIVE_ONLY "Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF) mark_as_advanced(WITH_CYCLES_LOGGING) mark_as_advanced(WITH_CYCLES_DEBUG) +mark_as_advanced(WITH_CYCLES_NATIVE_ONLY) option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON) mark_as_advanced(WITH_CUDA_DYNLOAD) diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 13b5de360d5..3b6c25c370e 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -14,7 +14,15 @@ include(cmake/external_libs.cmake) # todo: this code could be refactored a bit to avoid duplication # note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm) -if(NOT WITH_CPU_SSE) +if(WITH_CYCLES_NATIVE_ONLY) + set(CXX_HAS_SSE FALSE) + set(CXX_HAS_AVX FALSE) + set(CXX_HAS_AVX2 FALSE) + add_definitions( + -DWITH_KERNEL_NATIVE + ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") +elseif(NOT WITH_CPU_SSE) set(CXX_HAS_SSE FALSE) set(CXX_HAS_AVX FALSE) set(CXX_HAS_AVX2 FALSE) diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 643eefcdc6c..45091f6f33d 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -16,11 +16,39 @@ /* CPU kernel entry points */ -/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */ +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this + * one with SSE2 intrinsics. + */ #if defined(__x86_64__) || defined(_M_X64) # define __KERNEL_SSE2__ #endif +/* When building kernel for native machine detect kernel features from the flags + * set by compiler. + */ +#ifdef WITH_KERNEL_NATIVE +# ifdef __SSE2__ +# ifndef __KERNEL_SSE2__ +# define __KERNEL_SSE2__ +# endif +# endif +# ifdef __SSE3__ +# define __KERNEL_SSE3__ +# endif +# ifdef __SSSE3__ +# define __KERNEL_SSSE3__ +# endif +# ifdef __SSE4_1__ +# define __KERNEL_SSE41__ +# endif +# ifdef __AVX__ +# define __KERNEL_AVX__ +# endif +# ifdef __AVX2__ +# define __KERNEL_AVX2__ +# endif +#endif + /* quiet unused define warnings */ #if defined(__KERNEL_SSE2__) /* do nothing */ diff --git a/intern/cycles/util/util_simd.cpp b/intern/cycles/util/util_simd.cpp index eb9e32800e1..de2df612578 100644 --- a/intern/cycles/util/util_simd.cpp +++ b/intern/cycles/util/util_simd.cpp @@ -15,7 +15,8 @@ * limitations under the License. */ -#ifdef WITH_KERNEL_SSE2 +#if (defined(WITH_KERNEL_SSE2)) || \ + (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__)) #define __KERNEL_SSE2__ #include "util_simd.h" |