diff options
-rw-r--r-- | build_files/buildbot/config/user-config-mac-i386.py | 3 | ||||
-rw-r--r-- | build_files/buildbot/config/user-config-mac-x86_64.py | 4 | ||||
-rw-r--r-- | intern/cycles/CMakeLists.txt | 3 | ||||
-rw-r--r-- | intern/cycles/SConscript | 77 | ||||
-rw-r--r-- | intern/cycles/util/util_optimization.h | 13 |
5 files changed, 57 insertions, 43 deletions
diff --git a/build_files/buildbot/config/user-config-mac-i386.py b/build_files/buildbot/config/user-config-mac-i386.py index 1836787d0bd..296b752649d 100644 --- a/build_files/buildbot/config/user-config-mac-i386.py +++ b/build_files/buildbot/config/user-config-mac-i386.py @@ -1,7 +1,4 @@ -CC = 'clang' -CXX = 'clang++' - MACOSX_ARCHITECTURE = 'i386' # valid archs: ppc, i386, ppc64, x86_64 WITH_BF_CYCLES_CUDA_BINARIES = True diff --git a/build_files/buildbot/config/user-config-mac-x86_64.py b/build_files/buildbot/config/user-config-mac-x86_64.py index 6945b304940..ac923f48abe 100644 --- a/build_files/buildbot/config/user-config-mac-x86_64.py +++ b/build_files/buildbot/config/user-config-mac-x86_64.py @@ -1,7 +1,5 @@ -CC = 'clang' -CXX = 'clang++' - MACOSX_ARCHITECTURE = 'x86_64' # valid archs: ppc, i386, ppc64, x86_64 WITH_BF_CYCLES_CUDA_BINARIES = True + diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 4e1a18adbf3..f8c4ca68572 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -12,6 +12,7 @@ endif() include(cmake/external_libs.cmake) # Build Flags +# todo: refactor this code to match scons if(WIN32 AND MSVC) # there is no /arch:SSE3, but intrinsics are available anyway @@ -45,6 +46,8 @@ elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") endif() +add_definitions(-DWITH_KERNEL_SSE2 -DWITH_KERNEL_SSE3 -DWITH_KERNEL_SSE41 -DWITH_KERNEL_AVX) + # for OSL if(WIN32 AND MSVC) set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index c9465ada950..3b7ca7d3b8f 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -75,52 +75,55 @@ else: if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): incs.append(env['BF_PTHREADS_INC']) -# optimized kernel -sse2_cxxflags = Split(env['CXXFLAGS']) -sse3_cxxflags = Split(env['CXXFLAGS']) -sse41_cxxflags = Split(env['CXXFLAGS']) -avx_cxxflags = Split(env['CXXFLAGS']) +# optimized kernel. we compile the kernel multiple times with different +# optimization flags, at runtime it will choose the optimal kernel +kernel_flags = {} if env['OURPLATFORM'] == 'win32-vc': # there is no /arch:SSE3, but intrinsics are available anyway - sse2_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse3_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse41_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - avx_cxxflags.append('/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) #/arch:AVX for VC2012 and above -elif env['OURPLATFORM'] == 'win64-vc': - sse2_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse3_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - sse41_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) - avx_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'.split()) #/arch:AVX for VC2012 and above -else: - sse2_cxxflags.append('-ffast-math -msse -msse2 -mfpmath=sse'.split()) - sse3_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse'.split()) - sse41_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse'.split()) - avx_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse'.split()) + kernel_flags['sse2'] = '/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-' + kernel_flags['sse3'] = kernel_flags['sse2'] + kernel_flags['sse41'] = kernel_flags['sse3'] -optim_defs = defs[:] +elif env['OURPLATFORM'] == 'win64-vc': + # /arch:AVX only available from visual studio 2012 + kernel_flags['sse2'] = '-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-' + kernel_flags['sse3'] = kernel_flags['sse2'] + kernel_flags['sse41'] = kernel_flags['sse3'] -cycles_avx = cycles.Clone() -avx_sources = [path.join('kernel', 'kernel_avx.cpp')] -if env['OURPLATFORM'] == 'darwin' and env['C_COMPILER_ID'] == 'gcc' and env['CCVERSION'] >= '4.6': # use Apple assembler for avx , gnu-compilers do not support it ( gnu gcc-4.6 or higher case ) - cycles_avx.BlenderLib('bf_intern_cycles_avx', avx_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=avx_cxxflags, cc_compilerchange='/usr/bin/clang', cxx_compilerchange='/usr/bin/clang++') + if env['MSVC_VERSION'] in {'11.0', '12.0'}: + kernel_flags['avx'] = kernel_flags['sse41'] + ' /arch:AVX' else: - cycles_avx.BlenderLib('bf_intern_cycles_avx', avx_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=avx_cxxflags) - -cycles_sse41 = cycles.Clone() -sse41_sources = [path.join('kernel', 'kernel_sse41.cpp')] -cycles_sse41.BlenderLib('bf_intern_cycles_sse41', sse41_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse41_cxxflags) - -cycles_sse3 = cycles.Clone() -sse3_sources = [path.join('kernel', 'kernel_sse3.cpp')] -cycles_sse3.BlenderLib('bf_intern_cycles_sse3', sse3_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse3_cxxflags) - -cycles_sse2 = cycles.Clone() -sse2_sources = [path.join('kernel', 'kernel_sse2.cpp')] -cycles_sse2.BlenderLib('bf_intern_cycles_sse2', sse2_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse2_cxxflags) + # -mavx only available with relatively new gcc/clang + kernel_flags['sse2'] = '-ffast-math -msse -msse2 -mfpmath=sse' + kernel_flags['sse3'] = kernel_flags['sse2'] + ' -msse3 -mssse3' + kernel_flags['sse41'] = kernel_flags['sse3'] + ' -msse4.1' + + if (env['C_COMPILER_ID'] == 'gcc' and env['CCVERSION'] >= '4.6') or (env['C_COMPILER_ID'] == 'clang' and env['CCVERSION'] >= '3.1'): + kernel_flags['avx'] = kernel_flags['sse41'] + ' -mavx' + +for kernel_type in kernel_flags.keys(): + defs.append('WITH_KERNEL_' + kernel_type.upper()) + +for kernel_type in kernel_flags.keys(): + kernel_source = path.join('kernel', 'kernel_' + kernel_type + '.cpp') + kernel_cxxflags = Split(env['CXXFLAGS']) + kernel_cxxflags.append(kernel_flags[kernel_type].split()) + kernel_defs = defs[:] + kernel_env = cycles.Clone() + + if env['OURPLATFORM'] == 'darwin' and env['C_COMPILER_ID'] == 'gcc' and env['CCVERSION'] >= '4.6': + # use Apple assembler for avx , gnu-compilers do not support it ( gnu gcc-4.6 or higher case ) + kernel_env.BlenderLib('bf_intern_cycles_' + kernel_type, [kernel_source], incs, kernel_defs, + libtype=['intern'], priority=[10], cxx_compileflags=kernel_cxxflags, + cc_compilerchange='/usr/bin/clang', cxx_compilerchange='/usr/bin/clang++') + else: + kernel_env.BlenderLib('bf_intern_cycles_' + kernel_type, [kernel_source], incs, kernel_defs, + libtype=['intern'], priority=[10], cxx_compileflags=kernel_cxxflags) cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], cxx_compileflags=cxxflags) +# OSL shaders if env['WITH_BF_CYCLES_OSL']: oso_files = SConscript(['kernel/shaders/SConscript']) cycles.Depends("kernel/osl/osl_shader.o", oso_files) diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index ac94d43c998..bd86fbed8ce 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -25,8 +25,13 @@ #if defined(i386) || defined(_M_IX86) +#ifdef WITH_KERNEL_SSE2 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +#endif + +#ifdef WITH_KERNEL_SSE3 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +#endif #endif @@ -40,9 +45,17 @@ #define __KERNEL_SSE2__ /* no SSE2 kernel on x86-64, part of regular kernel */ +#ifdef WITH_KERNEL_SSE3 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +#endif + +#ifdef WITH_KERNEL_SSE41 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +#endif + +#ifdef WITH_KERNEL_AVX #define WITH_CYCLES_OPTIMIZED_KERNEL_AVX +#endif /* MSVC 2008, no SSE41 (broken blendv intrinsic) and no AVX support */ #if defined(_MSC_VER) && (_MSC_VER < 1700) |