diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-01-29 01:56:12 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-01-29 02:02:06 +0400 |
commit | f70d9660474c2be5f56d65247df3be5af0479e08 (patch) | |
tree | 750e3057998085a7342bdf7de95d76c85034c850 /intern/cycles | |
parent | 52ea13e97087f9a1d604708fe821fcf04d35aba6 (diff) |
Scons: refactor cycles kernel code to avoid building the AVX kernel with
compilers that don't support it.
CMake still needs to updated to work the same for consistency, but this should
fix the OS X buildbot at least.
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/CMakeLists.txt | 3 | ||||
-rw-r--r-- | intern/cycles/SConscript | 77 | ||||
-rw-r--r-- | intern/cycles/util/util_optimization.h | 13 |
3 files changed, 56 insertions, 37 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 4e1a18adbf3..f8c4ca68572 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -12,6 +12,7 @@ endif() include(cmake/external_libs.cmake) # Build Flags +# todo: refactor this code to match scons if(WIN32 AND MSVC) # there is no /arch:SSE3, but intrinsics are available anyway @@ -45,6 +46,8 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") endif() +add_definitions(-DWITH_KERNEL_SSE2 -DWITH_KERNEL_SSE3 -DWITH_KERNEL_SSE41 -DWITH_KERNEL_AVX) + # for OSL if(WIN32 AND MSVC) set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index c9465ada950..3b7ca7d3b8f 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -75,52 +75,55 @@ else: if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): incs.append(env['BF_PTHREADS_INC']) -# optimized kernel -sse2_cxxflags = Split(env['CXXFLAGS']) -sse3_cxxflags = Split(env['CXXFLAGS']) -sse41_cxxflags = Split(env['CXXFLAGS']) -avx_cxxflags = Split(env['CXXFLAGS']) +# optimized kernel. we compile the kernel multiple times with different +# optimization flags, at runtime it will choose the optimal kernel +kernel_flags = {} if env['OURPLATFORM'] == 'win32-vc': # there is no /arch:SSE3, but intrinsics are available anyway - sse2_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse3_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse41_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - avx_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) #/arch:AVX for VC2012 and above -elif env['OURPLATFORM'] == 'win64-vc': - sse2_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse3_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse41_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - avx_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) #/arch:AVX for VC2012 and above -else: - sse2_cxxflags.append('-ffast-math -msse -msse2 -mfpmath=sse'.split()) - sse3_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse'.split()) - sse41_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse'.split()) - avx_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse'.split()) + kernel_flags['sse2'] = '/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-' + kernel_flags['sse3'] = kernel_flags['sse2'] + kernel_flags['sse41'] = kernel_flags['sse3'] -optim_defs = defs[:] +elif env['OURPLATFORM'] == 'win64-vc': + # /arch:AVX only available from visual studio 2012 + kernel_flags['sse2'] = '-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-' + kernel_flags['sse3'] = kernel_flags['sse2'] + kernel_flags['sse41'] = kernel_flags['sse3'] -cycles_avx = cycles.Clone() -avx_sources = [path.join('kernel', 'kernel_avx.cpp')] -if env['OURPLATFORM'] == 'darwin' and env['C_COMPILER_ID'] == 'gcc' and env['CCVERSION'] >= '4.6': # use Apple assembler for avx , gnu-compilers do not support it ( gnu gcc-4.6 or higher case ) - cycles_avx.BlenderLib('bf_intern_cycles_avx', avx_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=avx_cxxflags, cc_compilerchange='/usr/bin/clang', cxx_compilerchange='/usr/bin/clang++') + if env['MSVC_VERSION'] in {'11.0', '12.0'}: + kernel_flags['avx'] = kernel_flags['sse41'] + ' /arch:AVX' else: - cycles_avx.BlenderLib('bf_intern_cycles_avx', avx_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=avx_cxxflags) - -cycles_sse41 = cycles.Clone() -sse41_sources = [path.join('kernel', 'kernel_sse41.cpp')] -cycles_sse41.BlenderLib('bf_intern_cycles_sse41', sse41_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse41_cxxflags) - -cycles_sse3 = cycles.Clone() -sse3_sources = [path.join('kernel', 'kernel_sse3.cpp')] -cycles_sse3.BlenderLib('bf_intern_cycles_sse3', sse3_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse3_cxxflags) - -cycles_sse2 = cycles.Clone() -sse2_sources = [path.join('kernel', 'kernel_sse2.cpp')] -cycles_sse2.BlenderLib('bf_intern_cycles_sse2', sse2_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse2_cxxflags) + # -mavx only available with relatively new gcc/clang + kernel_flags['sse2'] = '-ffast-math -msse -msse2 -mfpmath=sse' + kernel_flags['sse3'] = kernel_flags['sse2'] + ' -msse3 -mssse3' + kernel_flags['sse41'] = kernel_flags['sse3'] + ' -msse4.1' + + if (env['C_COMPILER_ID'] == 'gcc' and env['CCVERSION'] >= '4.6') or (env['C_COMPILER_ID'] == 'clang' and env['CCVERSION'] >= '3.1'): + kernel_flags['avx'] = kernel_flags['sse41'] + ' -mavx' + +for kernel_type in kernel_flags.keys(): + defs.append('WITH_KERNEL_' + kernel_type.upper()) + +for kernel_type in kernel_flags.keys(): + kernel_source = path.join('kernel', 'kernel_' + kernel_type + '.cpp') + kernel_cxxflags = Split(env['CXXFLAGS']) + kernel_cxxflags.append(kernel_flags[kernel_type].split()) + kernel_defs = defs[:] + kernel_env = cycles.Clone() + + if env['OURPLATFORM'] == 'darwin' and env['C_COMPILER_ID'] == 'gcc' and env['CCVERSION'] >= '4.6': + # use Apple assembler for avx , gnu-compilers do not support it ( gnu gcc-4.6 or higher case ) + kernel_env.BlenderLib('bf_intern_cycles_' + kernel_type, [kernel_source], incs, kernel_defs, + libtype=['intern'], priority=[10], cxx_compileflags=kernel_cxxflags, + cc_compilerchange='/usr/bin/clang', cxx_compilerchange='/usr/bin/clang++') + else: + kernel_env.BlenderLib('bf_intern_cycles_' + kernel_type, [kernel_source], incs, kernel_defs, + libtype=['intern'], priority=[10], cxx_compileflags=kernel_cxxflags) cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], cxx_compileflags=cxxflags) +# OSL shaders if env['WITH_BF_CYCLES_OSL']: oso_files = SConscript(['kernel/shaders/SConscript']) cycles.Depends("kernel/osl/osl_shader.o", oso_files) diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index ac94d43c998..bd86fbed8ce 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -25,8 +25,13 @@ #if defined(i386) || defined(_M_IX86) +#ifdef WITH_KERNEL_SSE2 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +#endif + +#ifdef WITH_KERNEL_SSE3 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +#endif #endif @@ -40,9 +45,17 @@ #define __KERNEL_SSE2__ /* no SSE2 kernel on x86-64, part of regular kernel */ +#ifdef WITH_KERNEL_SSE3 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +#endif + +#ifdef WITH_KERNEL_SSE41 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +#endif + +#ifdef WITH_KERNEL_AVX #define WITH_CYCLES_OPTIMIZED_KERNEL_AVX +#endif /* MSVC 2008, no SSE41 (broken blendv intrinsic) and no AVX support */ #if defined(_MSC_VER) && (_MSC_VER < 1700) |