diff options
author | Thomas Dinges <blender@dingto.org> | 2014-06-14 00:23:58 +0400 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2014-06-14 00:26:20 +0400 |
commit | 866c7fb6e63d128fa4800e28e0a091f874112344 (patch) | |
tree | 096daad79ca3eb7c47e339e7b1c568caf47a5733 /intern/cycles/SConscript | |
parent | b4aa51f8d736f5431799fdf1df5f678a732ef6b9 (diff) |
Cycles: Add an AVX2 CPU kernel.
This kernel is compiled with AVX2, FMA3, and BMI compiler flags. At the moment only Intel Haswell benefits from this, but future AMD CPUs will have these instructions as well.
Makes rendering on Haswell CPUs a few percent faster, only benchmarked with clang on OS X though.
Part of my GSoC 2014.
Diffstat (limited to 'intern/cycles/SConscript')
-rw-r--r-- | intern/cycles/SConscript | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index 542bb82cf2a..dab8f25de4a 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -39,6 +39,7 @@ sources.remove(path.join('kernel', 'kernel_sse2.cpp')) sources.remove(path.join('kernel', 'kernel_sse3.cpp')) sources.remove(path.join('kernel', 'kernel_sse41.cpp')) sources.remove(path.join('kernel', 'kernel_avx.cpp')) +sources.remove(path.join('kernel', 'kernel_avx2.cpp')) incs = [] defs = [] @@ -98,6 +99,7 @@ elif env['OURPLATFORM'] == 'win64-vc': if env['MSVC_VERSION'] >= '12.0': kernel_flags['sse41'] = kernel_flags['sse3'] kernel_flags['avx'] = kernel_flags['sse41'] + ' /arch:AVX' + kernel_flags['avx2'] = kernel_flags['sse41'] + ' /arch:AVX /arch:AVX2' else: # -mavx only available with relatively new gcc/clang kernel_flags['sse2'] = '-ffast-math -msse -msse2 -mfpmath=sse' @@ -106,6 +108,7 @@ else: if (env['C_COMPILER_ID'] == 'gcc' and env['CCVERSION'] >= '4.6') or (env['C_COMPILER_ID'] == 'clang' and env['CCVERSION'] >= '3.1'): kernel_flags['avx'] = kernel_flags['sse41'] + ' -mavx' + kernel_flags['avx2'] = kernel_flags['avx'] + ' -mavx2 -mfma -mbmi -mbmi2' for kernel_type in kernel_flags.keys(): defs.append('WITH_KERNEL_' + kernel_type.upper()) |