diff options
author | Thomas Dinges <blender@dingto.org> | 2014-06-14 00:23:58 +0400 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2014-06-14 00:26:20 +0400 |
commit | 866c7fb6e63d128fa4800e28e0a091f874112344 (patch) | |
tree | 096daad79ca3eb7c47e339e7b1c568caf47a5733 /intern/cycles/CMakeLists.txt | |
parent | b4aa51f8d736f5431799fdf1df5f678a732ef6b9 (diff) |
Cycles: Add an AVX2 CPU kernel.
This kernel is compiled with AVX2, FMA3, and BMI compiler flags. At the moment only Intel Haswell benefits from this, but future AMD CPUs will have these instructions as well.
Makes rendering on Haswell CPUs a few percent faster, only benchmarked with clang on OS X though.
Part of my GSoC 2014.
Diffstat (limited to 'intern/cycles/CMakeLists.txt')
-rw-r--r-- | intern/cycles/CMakeLists.txt | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index a1b0030491e..5a6dc36b213 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -20,8 +20,10 @@ if(WIN32 AND MSVC) # /arch:AVX for VC2012 and above if(NOT MSVC_VERSION LESS 1700) set(CYCLES_AVX_ARCH_FLAGS "/arch:AVX") + set(CYCLES_AVX2_ARCH_FLAGS "/arch:AVX /arch:AVX2") elseif(NOT CMAKE_CL_64) set(CYCLES_AVX_ARCH_FLAGS "/arch:SSE2") + set(CYCLES_AVX2_ARCH_FLAGS "/arch:SSE2") endif() # there is no /arch:SSE3, but intrinsics are available anyway @@ -30,11 +32,13 @@ if(WIN32 AND MSVC) set(CYCLES_SSE3_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") set(CYCLES_SSE41_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") else() set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_AVX2_KERNEL_FLAGS "${CYCLES_AVX2_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") @@ -48,6 +52,7 @@ elseif(CMAKE_COMPILER_IS_GNUCC) set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse") set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse") set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse") + set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mfma -mbmi -mbmi2 -mfpmath=sse") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") @@ -57,6 +62,7 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3") set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1") set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx") + set(CYCLES_AVX2_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mavx2 —mfma -mbmi -mbmi2") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") endif() @@ -67,6 +73,7 @@ if(CXX_HAS_SSE) -DWITH_KERNEL_SSE3 -DWITH_KERNEL_SSE41 -DWITH_KERNEL_AVX + -DWITH_KERNEL_AVX2 ) endif() |