Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-01-15 18:11:50 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-01-15 18:11:50 +0400
commit8af782ad22c42654d23ca6379f105af8d98956cc (patch)
tree0675da25547f48d984e3c522b7fb380d43f2ea15
parentebbb6adf32b91660aab13cf4f5061ae916af0a30 (diff)
Code cleanup: some reshuffling of SIMD defines moving more code to util_optimization.h.
-rw-r--r--intern/cycles/kernel/kernel.h1
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp8
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp8
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp8
-rw-r--r--intern/cycles/util/util_optimization.h80
-rw-r--r--intern/cycles/util/util_types.h58
6 files changed, 94 insertions, 69 deletions
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index b6db92f26e9..01bea10c1e7 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -20,7 +20,6 @@
/* CPU Kernel Interface */
#include "util_types.h"
-#include "util_optimization.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 6f3f1714cbf..6a2a7804146 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -17,16 +17,16 @@
/* Optimized CPU kernel entry points. This file is compiled with SSE2
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
-
-#include "util_optimization.h"
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
#endif
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_math.h"
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index e6760981eef..9d0abb93cc6 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -17,10 +17,6 @@
/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
-
-#include "util_optimization.h"
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
@@ -29,6 +25,10 @@
#define __KERNEL_SSSE3__
#endif
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_math.h"
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index fd2198aebda..bc20de0ec20 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -17,10 +17,6 @@
/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
-
-#include "util_optimization.h"
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
@@ -30,6 +26,10 @@
#define __KERNEL_SSE41__
#endif
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_math.h"
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
index 61a2ad088dd..b7a2506c950 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -14,8 +14,31 @@
* limitations under the License
*/
+#ifndef __UTIL_OPTIMIZATION_H__
+#define __UTIL_OPTIMIZATION_H__
+
+#ifndef __KERNEL_GPU__
+
+/* x86
+ *
+ * Compile a regular, SSE2 and SSE3 kernel. */
+
+#if defined(i386) || defined(_M_IX86)
+
+#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+
+#endif
+
+/* x86-64
+ *
+ * Compile a regular (includes SSE2), SSE3 and SSE 4.1 kernel. */
+
#if defined(__x86_64__) || defined(_M_X64)
+/* SSE2 is always available on x86-64 CPUs, so auto enable */
+#define __KERNEL_SSE2__
+
/* no SSE2 kernel on x86-64, part of regular kernel */
#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
@@ -27,9 +50,60 @@
#endif
-#if defined(i386) || defined(_M_IX86)
+/* SSE Experiment
+ *
+ * This is disabled code for an experiment to use SSE types globally for types
+ * such as float3 and float4. Currently this gives an overall slowdown. */
-#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-#define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+#if 0
+#define __KERNEL_SSE__
+#ifndef __KERNEL_SSE2__
+#define __KERNEL_SSE2__
+#endif
+#ifndef __KERNEL_SSE3__
+#define __KERNEL_SSE3__
+#endif
+#ifndef __KERNEL_SSSE3__
+#define __KERNEL_SSSE3__
+#endif
+#ifndef __KERNEL_SSE4__
+#define __KERNEL_SSE4__
+#endif
+#endif
+
+/* SSE Intrinsics includes
+ *
+ * We assume __KERNEL_SSEX__ flags to have been defined at this point */
+
+/* SSE intrinsics headers */
+#ifndef FREE_WINDOWS64
+
+#ifdef __KERNEL_SSE2__
+#include <xmmintrin.h> /* SSE 1 */
+#include <emmintrin.h> /* SSE 2 */
+#endif
+
+#ifdef __KERNEL_SSE3__
+#include <pmmintrin.h> /* SSE 3 */
+#endif
+#ifdef __KERNEL_SSSE3__
+#include <tmmintrin.h> /* SSSE 3 */
#endif
+
+#ifdef __KERNEL_SSE41__
+#include <smmintrin.h> /* SSE 4.1 */
+#endif
+
+#else
+
+/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
+ * Since we can't avoid including <windows.h>, better only include that */
+#include <windows.h>
+
+#endif
+
+#endif
+
+#endif /* __UTIL_OPTIMIZATION_H__ */
+
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 2ee2f0f92e0..ebfd8b6700c 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -57,67 +57,19 @@
#endif
-/* SIMD Types */
+/* Standard Integer Types */
#ifndef __KERNEL_GPU__
-#define __KERNEL_SSE2__
-
-/* not enabled, globally applying it gives slowdown, only for testing. */
-#if 0
-#define __KERNEL_SSE__
-#ifndef __KERNEL_SSE2__
-#define __KERNEL_SSE2__
-#endif
-#ifndef __KERNEL_SSE3__
-#define __KERNEL_SSE3__
-#endif
-#ifndef __KERNEL_SSSE3__
-#define __KERNEL_SSSE3__
-#endif
-#ifndef __KERNEL_SSE4__
-#define __KERNEL_SSE4__
-#endif
-#endif
-
-/* SSE2 is always available on x86_64 CPUs, so auto enable */
-#if defined(__x86_64__) && !defined(__KERNEL_SSE2__)
-#define __KERNEL_SSE2__
-#endif
-
-/* SSE intrinsics headers */
-#ifndef FREE_WINDOWS64
-
-#ifdef __KERNEL_SSE2__
-#include <xmmintrin.h> /* SSE 1 */
-#include <emmintrin.h> /* SSE 2 */
-#endif
-
-#ifdef __KERNEL_SSE3__
-#include <pmmintrin.h> /* SSE 3 */
-#endif
-
-#ifdef __KERNEL_SSSE3__
-#include <tmmintrin.h> /* SSSE 3 */
-#endif
-
-#ifdef __KERNEL_SSE41__
-#include <smmintrin.h> /* SSE 4.1 */
-#endif
-
-#else
-
-/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
- * Since we can't avoid including <windows.h>, better only include that */
-#include <windows.h>
-
-#endif
-
/* int8_t, uint16_t, and friends */
#ifndef _WIN32
#include <stdint.h>
#endif
+/* SIMD Types */
+
+#include "util_optimization.h"
+
#endif
CCL_NAMESPACE_BEGIN