Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/device_cpu.cpp')
-rw-r--r--intern/cycles/device/device_cpu.cpp127
1 files changed, 94 insertions, 33 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index f06963c146e..676b1279a80 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -19,15 +19,8 @@
/* So ImathMath is included before our kernel_cpu_compat. */
#ifdef WITH_OSL
-# if defined(_MSC_VER)
-/* Prevent OSL from polluting the context with weird macros from windows.h.
- * TODO(sergey): Ideally it's only enough to have class/struct declarations in
- * the header and skip header include here.
- */
-# define NOGDI
-# define NOMINMAX
-# define WIN32_LEAN_AND_MEAN
-# endif
+/* So no context pollution happens from indirectly included windows.h */
+# include "util_windows.h"
# include <OSL/oslexec.h>
#endif
@@ -78,6 +71,40 @@ public:
system_cpu_support_sse41();
system_cpu_support_avx();
system_cpu_support_avx2();
+
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+ if(system_cpu_support_avx2()) {
+ VLOG(1) << "Will be using AVX2 kernels.";
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+ if(system_cpu_support_avx()) {
+ VLOG(1) << "Will be using AVX kernels.";
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+ if(system_cpu_support_sse41()) {
+ VLOG(1) << "Will be using SSE4.1 kernels.";
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+ if(system_cpu_support_sse3()) {
+ VLOG(1) << "Will be using SSE3kernels.";
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+ if(system_cpu_support_sse2()) {
+ VLOG(1) << "Will be using SSE2 kernels.";
+ }
+ else
+#endif
+ {
+ VLOG(1) << "Will be using regular kernels.";
+ }
}
~CPUDevice()
@@ -197,31 +224,38 @@ public:
void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2())
+ if(system_cpu_support_avx2()) {
path_trace_kernel = kernel_cpu_avx2_path_trace;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx())
+ if(system_cpu_support_avx()) {
path_trace_kernel = kernel_cpu_avx_path_trace;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41())
+ if(system_cpu_support_sse41()) {
path_trace_kernel = kernel_cpu_sse41_path_trace;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3())
+ if(system_cpu_support_sse3()) {
path_trace_kernel = kernel_cpu_sse3_path_trace;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
- if(system_cpu_support_sse2())
+ if(system_cpu_support_sse2()) {
path_trace_kernel = kernel_cpu_sse2_path_trace;
+ }
else
#endif
+ {
path_trace_kernel = kernel_cpu_path_trace;
+ }
while(task.acquire_tile(this, tile)) {
float *render_buffer = (float*)tile.buffer;
@@ -267,32 +301,38 @@ public:
if(task.rgba_half) {
void(*convert_to_half_float_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2())
+ if(system_cpu_support_avx2()) {
convert_to_half_float_kernel = kernel_cpu_avx2_convert_to_half_float;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx())
- for(int y = task.y; y < task.y + task.h; y++)
+ if(system_cpu_support_avx()) {
convert_to_half_float_kernel = kernel_cpu_avx_convert_to_half_float;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41())
+ if(system_cpu_support_sse41()) {
convert_to_half_float_kernel = kernel_cpu_sse41_convert_to_half_float;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3())
+ if(system_cpu_support_sse3()) {
convert_to_half_float_kernel = kernel_cpu_sse3_convert_to_half_float;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
- if(system_cpu_support_sse2())
+ if(system_cpu_support_sse2()) {
convert_to_half_float_kernel = kernel_cpu_sse2_convert_to_half_float;
+ }
else
#endif
+ {
convert_to_half_float_kernel = kernel_cpu_convert_to_half_float;
+ }
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
@@ -302,31 +342,38 @@ public:
else {
void(*convert_to_byte_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2())
+ if(system_cpu_support_avx2()) {
convert_to_byte_kernel = kernel_cpu_avx2_convert_to_byte;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx())
+ if(system_cpu_support_avx()) {
convert_to_byte_kernel = kernel_cpu_avx_convert_to_byte;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41())
+ if(system_cpu_support_sse41()) {
convert_to_byte_kernel = kernel_cpu_sse41_convert_to_byte;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3())
+ if(system_cpu_support_sse3()) {
convert_to_byte_kernel = kernel_cpu_sse3_convert_to_byte;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
- if(system_cpu_support_sse2())
+ if(system_cpu_support_sse2()) {
convert_to_byte_kernel = kernel_cpu_sse2_convert_to_byte;
+ }
else
#endif
+ {
convert_to_byte_kernel = kernel_cpu_convert_to_byte;
+ }
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
@@ -343,39 +390,53 @@ public:
#ifdef WITH_OSL
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
- void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int);
+ void(*shader_kernel)(KernelGlobals*, uint4*, float4*, float*, int, int, int, int, int);
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
- if(system_cpu_support_avx2())
+ if(system_cpu_support_avx2()) {
shader_kernel = kernel_cpu_avx2_shader;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
- if(system_cpu_support_avx())
+ if(system_cpu_support_avx()) {
shader_kernel = kernel_cpu_avx_shader;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
- if(system_cpu_support_sse41())
+ if(system_cpu_support_sse41()) {
shader_kernel = kernel_cpu_sse41_shader;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
- if(system_cpu_support_sse3())
+ if(system_cpu_support_sse3()) {
shader_kernel = kernel_cpu_sse3_shader;
+ }
else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
- if(system_cpu_support_sse2())
+ if(system_cpu_support_sse2()) {
shader_kernel = kernel_cpu_sse2_shader;
+ }
else
#endif
+ {
shader_kernel = kernel_cpu_shader;
+ }
for(int sample = 0; sample < task.num_samples; sample++) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
- shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
- task.shader_eval_type, x, task.offset, sample);
+ shader_kernel(&kg,
+ (uint4*)task.shader_input,
+ (float4*)task.shader_output,
+ (float*)task.shader_output_luma,
+ task.shader_eval_type,
+ task.shader_filter,
+ x,
+ task.offset,
+ sample);
if(task.get_cancel() || task_pool.canceled())
break;