diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-01-15 18:11:50 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2014-01-15 18:11:50 +0400 |
commit | 8af782ad22c42654d23ca6379f105af8d98956cc (patch) | |
tree | 0675da25547f48d984e3c522b7fb380d43f2ea15 /intern | |
parent | ebbb6adf32b91660aab13cf4f5061ae916af0a30 (diff) |
Code cleanup: some reshuffling of SIMD defines moving more code to util_optimization.h.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/kernel/kernel.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse2.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse3.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_sse41.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/util/util_optimization.h | 80 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 58 |
6 files changed, 94 insertions, 69 deletions
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index b6db92f26e9..01bea10c1e7 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -20,7 +20,6 @@ /* CPU Kernel Interface */ #include "util_types.h" -#include "util_optimization.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 6f3f1714cbf..6a2a7804146 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -17,16 +17,16 @@ /* Optimized CPU kernel entry points. This file is compiled with SSE2 * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -#include "util_optimization.h" - -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 /* SSE optimization disabled for now on 32 bit, see bug #36316 */ #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) #define __KERNEL_SSE2__ #endif +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 + #include "kernel.h" #include "kernel_compat_cpu.h" #include "kernel_math.h" diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index e6760981eef..9d0abb93cc6 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -17,10 +17,6 @@ /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -#include "util_optimization.h" - -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 /* SSE optimization disabled for now on 32 bit, see bug #36316 */ #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) @@ -29,6 +25,10 @@ #define __KERNEL_SSSE3__ #endif +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + #include "kernel.h" #include "kernel_compat_cpu.h" #include "kernel_math.h" diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index fd2198aebda..bc20de0ec20 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -17,10 +17,6 @@ /* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3 * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ - -#include "util_optimization.h" - -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 /* SSE optimization disabled for now on 32 bit, see bug #36316 */ #if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) @@ -30,6 +26,10 @@ #define __KERNEL_SSE41__ #endif +#include "util_optimization.h" + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + #include "kernel.h" #include "kernel_compat_cpu.h" #include "kernel_math.h" diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index 61a2ad088dd..b7a2506c950 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -14,8 +14,31 @@ * limitations under the License */ +#ifndef __UTIL_OPTIMIZATION_H__ +#define __UTIL_OPTIMIZATION_H__ + +#ifndef __KERNEL_GPU__ + +/* x86 + * + * Compile a regular, SSE2 and SSE3 kernel. */ + +#if defined(i386) || defined(_M_IX86) + +#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + +#endif + +/* x86-64 + * + * Compile a regular (includes SSE2), SSE3 and SSE 4.1 kernel. */ + #if defined(__x86_64__) || defined(_M_X64) +/* SSE2 is always available on x86-64 CPUs, so auto enable */ +#define __KERNEL_SSE2__ + /* no SSE2 kernel on x86-64, part of regular kernel */ #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 @@ -27,9 +50,60 @@ #endif -#if defined(i386) || defined(_M_IX86) +/* SSE Experiment + * + * This is disabled code for an experiment to use SSE types globally for types + * such as float3 and float4. Currently this gives an overall slowdown. */ -#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +#if 0 +#define __KERNEL_SSE__ +#ifndef __KERNEL_SSE2__ +#define __KERNEL_SSE2__ +#endif +#ifndef __KERNEL_SSE3__ +#define __KERNEL_SSE3__ +#endif +#ifndef __KERNEL_SSSE3__ +#define __KERNEL_SSSE3__ +#endif +#ifndef __KERNEL_SSE4__ +#define __KERNEL_SSE4__ +#endif +#endif + +/* SSE Intrinsics includes + * + * We assume __KERNEL_SSEX__ flags to have been defined at this point */ + +/* SSE intrinsics headers */ +#ifndef FREE_WINDOWS64 + +#ifdef __KERNEL_SSE2__ +#include <xmmintrin.h> /* SSE 1 */ +#include <emmintrin.h> /* SSE 2 */ +#endif + +#ifdef __KERNEL_SSE3__ +#include <pmmintrin.h> /* SSE 3 */ +#endif +#ifdef __KERNEL_SSSE3__ +#include <tmmintrin.h> /* SSSE 3 */ #endif + +#ifdef __KERNEL_SSE41__ +#include <smmintrin.h> /* SSE 4.1 */ +#endif + +#else + +/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. + * Since we can't avoid including <windows.h>, better only include that */ +#include <windows.h> + +#endif + +#endif + +#endif /* __UTIL_OPTIMIZATION_H__ */ + diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 2ee2f0f92e0..ebfd8b6700c 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -57,67 +57,19 @@ #endif -/* SIMD Types */ +/* Standard Integer Types */ #ifndef __KERNEL_GPU__ -#define __KERNEL_SSE2__ - -/* not enabled, globally applying it gives slowdown, only for testing. */ -#if 0 -#define __KERNEL_SSE__ -#ifndef __KERNEL_SSE2__ -#define __KERNEL_SSE2__ -#endif -#ifndef __KERNEL_SSE3__ -#define __KERNEL_SSE3__ -#endif -#ifndef __KERNEL_SSSE3__ -#define __KERNEL_SSSE3__ -#endif -#ifndef __KERNEL_SSE4__ -#define __KERNEL_SSE4__ -#endif -#endif - -/* SSE2 is always available on x86_64 CPUs, so auto enable */ -#if defined(__x86_64__) && !defined(__KERNEL_SSE2__) -#define __KERNEL_SSE2__ -#endif - -/* SSE intrinsics headers */ -#ifndef FREE_WINDOWS64 - -#ifdef __KERNEL_SSE2__ -#include <xmmintrin.h> /* SSE 1 */ -#include <emmintrin.h> /* SSE 2 */ -#endif - -#ifdef __KERNEL_SSE3__ -#include <pmmintrin.h> /* SSE 3 */ -#endif - -#ifdef __KERNEL_SSSE3__ -#include <tmmintrin.h> /* SSSE 3 */ -#endif - -#ifdef __KERNEL_SSE41__ -#include <smmintrin.h> /* SSE 4.1 */ -#endif - -#else - -/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. - * Since we can't avoid including <windows.h>, better only include that */ -#include <windows.h> - -#endif - /* int8_t, uint16_t, and friends */ #ifndef _WIN32 #include <stdint.h> #endif +/* SIMD Types */ + +#include "util_optimization.h" + #endif CCL_NAMESPACE_BEGIN |