diff options
author | Thomas Dinges <blender@dingto.org> | 2014-01-16 20:04:11 +0400 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2014-01-16 20:04:11 +0400 |
commit | de28a4d4b2c9397c5233a5ee1dbf1400f450a15c (patch) | |
tree | 1fe23de963e206af3fb2ff2d9e2e3393cd89149c /intern/cycles/SConscript | |
parent | 7c6d52eb07c4bd8142a95eca1dbdc794063859b8 (diff) |
Cycles: Add an AVX kernel for CPU rendering.
* AVX is available on Intel Sandy Bridge and newer and AMD Bulldozer and newer.
* We don't use dedicated AVX intrinsics yet, but gcc auto vectorization gives a 3% performance improvement for Caminandes. Tested on an i5-3570, Linux x64.
* No change for Windows yet, MSVC 2008 does not support AVX.
Reviewed by: brecht
Differential Revision: https://developer.blender.org/D216
Diffstat (limited to 'intern/cycles/SConscript')
-rw-r--r-- | intern/cycles/SConscript | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index b1dfeee1560..7ced4613d64 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -38,6 +38,7 @@ sources.remove(path.join('util', 'util_view.cpp')) sources.remove(path.join('kernel', 'kernel_sse2.cpp')) sources.remove(path.join('kernel', 'kernel_sse3.cpp')) sources.remove(path.join('kernel', 'kernel_sse41.cpp')) +sources.remove(path.join('kernel', 'kernel_avx.cpp')) incs = [] defs = [] @@ -78,23 +79,31 @@ if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', ' sse2_cxxflags = Split(env['CXXFLAGS']) sse3_cxxflags = Split(env['CXXFLAGS']) sse41_cxxflags = Split(env['CXXFLAGS']) +avx_cxxflags = Split(env['CXXFLAGS']) if env['OURPLATFORM'] == 'win32-vc': # there is no /arch:SSE3, but intrinsics are available anyway sse2_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) sse3_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) sse41_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) + avx_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) #/arch:AVX for VC2012 and above elif env['OURPLATFORM'] == 'win64-vc': sse2_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) sse3_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) sse41_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) + avx_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) #/arch:AVX for VC2012 and above else: sse2_cxxflags.append('-ffast-math -msse -msse2 -mfpmath=sse'.split()) sse3_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse'.split()) sse41_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse'.split()) + avx_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse'.split()) optim_defs = defs[:] +cycles_avx = cycles.Clone() +avx_sources = [path.join('kernel', 'kernel_avx.cpp')] +cycles_avx.BlenderLib('bf_intern_cycles_avx', avx_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=avx_cxxflags) + cycles_sse41 = cycles.Clone() sse41_sources = [path.join('kernel', 'kernel_sse41.cpp')] cycles_sse41.BlenderLib('bf_intern_cycles_sse41', sse41_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse41_cxxflags) |