Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <blender@dingto.org>2014-01-14 23:39:21 +0400
committerThomas Dinges <blender@dingto.org>2014-01-14 23:39:54 +0400
commit9351ac0d8577a2c76c238bbf2c365d811e986209 (patch)
tree51564853558f7219dfb67a3e095b5bb170bc35cb /intern/cycles/kernel
parentd980c3eccbd020a9ff7137659e7cbfbc5adb125d (diff)
Cycles: Skip the compilation of the dedicated SSE2 kernel on x86-64, we can assume SSE2 here, so just re-use the regular one. Saves 500kb in the blender binary.
Reviewed by: brecht Differential Revision: https://developer.blender.org/D199
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt6
-rw-r--r--intern/cycles/kernel/kernel.cpp5
-rw-r--r--intern/cycles/kernel/kernel.h9
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp7
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp7
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp7
6 files changed, 26 insertions, 15 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 998d1a3540f..81499bbfda8 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -192,10 +192,8 @@ endif()
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
-if(WITH_CYCLES_OPTIMIZED_KERNEL)
- set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
- set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
-endif()
+set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
if(WITH_CYCLES_OPTIMIZED_KERNEL_SSE41)
set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index 3e2727fde9a..3fe1e80890b 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -84,6 +84,11 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
assert(0);
}
+/* On x86-64, we can assume SSE2, so avoid the extra kernel and compile this one with SSE2 intrinsics */
+#if defined(__x86_64__) || defined(_M_X64)
+#define __KERNEL_SSE2__
+#endif
+
/* Path Tracing */
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 105a3887da0..b6db92f26e9 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -17,9 +17,10 @@
#ifndef __KERNEL_H__
#define __KERNEL_H__
-/* CPU Kernel Interfae */
+/* CPU Kernel Interface */
#include "util_types.h"
+#include "util_optimization.h"
CCL_NAMESPACE_BEGIN
@@ -43,7 +44,7 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i);
-#ifdef WITH_OPTIMIZED_KERNEL
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse2_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
@@ -52,7 +53,9 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i);
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse3_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
@@ -61,7 +64,9 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
float sample_scale, int x, int y, int offset, int stride);
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i);
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
void kernel_cpu_sse41_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
void kernel_cpu_sse41_convert_to_byte(KernelGlobals *kg, uchar4 *rgba, float *buffer,
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 9c69e519dca..953c3e4f9c9 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -17,8 +17,10 @@
/* Optimized CPU kernel entry points. This file is compiled with SSE2
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
-
-#ifdef WITH_OPTIMIZED_KERNEL
+
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
@@ -70,4 +72,3 @@ void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int
CCL_NAMESPACE_END
#endif
-
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 05877a41b4a..2a36c974191 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -17,8 +17,10 @@
/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
-
-#ifdef WITH_OPTIMIZED_KERNEL
+
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
@@ -72,4 +74,3 @@ void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int
CCL_NAMESPACE_END
#endif
-
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index 0c68fd3651b..6583feaeb45 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -17,8 +17,10 @@
/* Optimized CPU kernel entry points. This file is compiled with SSE3/SSSE3
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
-
-#ifdef WITH_OPTIMIZED_KERNEL
+
+#include "util_optimization.h"
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
/* SSE optimization disabled for now on 32 bit, see bug #36316 */
#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
@@ -73,4 +75,3 @@ void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, in
CCL_NAMESPACE_END
#endif
-