diff options
author | Thomas Dinges <blender@dingto.org> | 2014-01-14 23:39:21 +0400 |
---|---|---|
committer | Thomas Dinges <blender@dingto.org> | 2014-01-14 23:39:54 +0400 |
commit | 9351ac0d8577a2c76c238bbf2c365d811e986209 (patch) | |
tree | 51564853558f7219dfb67a3e095b5bb170bc35cb /intern/cycles/kernel | |
parent | d980c3eccbd020a9ff7137659e7cbfbc5adb125d (diff) |
Cycles: Skip the compilation of the dedicated SSE2 kernel on x86-64, we can assume SSE2 here, so just re-use the regular one. Saves 500kb in the blender binary.
Reviewed by: brecht
Differential Revision: https://developer.blender.org/D199
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.cpp | 5 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse2.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse3.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse41.cpp | 7 |
6 files changed, 26 insertions, 15 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 998d1a3540f..81499bbfda8 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -192,10 +192,8 @@ endif() include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -if(WITH_CYCLES_OPTIMIZED_KERNEL) - set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") - set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") -endif() +set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") +set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") if(WITH_CYCLES_OPTIMIZED_KERNEL_SSE41) set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 3e2727fde9a..3fe1e80890b 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -84,6 +84,11 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t assert(0); } +/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */ +#if defined(__x86_64__) || defined(_M_X64) +#define __KERNEL_SSE2__ +#endif + /* Path Tracing */ void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 105a3887da0..b6db92f26e9 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -17,9 +17,10 @@ #ifndef __KERNEL_H__ #define __KERNEL_H__ -/* CPU Kernel Interfae */ +/* CPU Kernel Interface */ #include "util_types.h" +#include "util_optimization.h" CCL_NAMESPACE_BEGIN @@ -43,7 +44,7 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i); -#ifdef WITH_OPTIMIZED_KERNEL +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, @@ -52,7 +53,9 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i); +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, @@ -61,7 +64,9 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa float sample_scale, int x, int y, int offset, int stride); void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i); +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer, diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 9c69e519dca..953c3e4f9c9 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -17,8 +17,10 @@ /* Optimized CPU kernel entry points. This file is compiled with SSE2 * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -#ifdef WITH_OPTIMIZED_KERNEL + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 /* SSE optimization disabled for now on 32 bit, see bug #36316 */ #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) @@ -70,4 +72,3 @@ void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int CCL_NAMESPACE_END #endif - diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 05877a41b4a..2a36c974191 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -17,8 +17,10 @@ /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -#ifdef WITH_OPTIMIZED_KERNEL + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 /* SSE optimization disabled for now on 32 bit, see bug #36316 */ #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) @@ -72,4 +74,3 @@ void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int CCL_NAMESPACE_END #endif - diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index 0c68fd3651b..6583feaeb45 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -17,8 +17,10 @@ /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -#ifdef WITH_OPTIMIZED_KERNEL + +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 /* SSE optimization disabled for now on 32 bit, see bug #36316 */ #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) @@ -73,4 +75,3 @@ void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, in CCL_NAMESPACE_END #endif - |