diff options
Diffstat (limited to 'intern/cycles/device/device_cpu.cpp')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 127 |
1 files changed, 94 insertions, 33 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index f06963c146e..676b1279a80 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -19,15 +19,8 @@ /* So ImathMath is included before our kernel_cpu_compat. */ #ifdef WITH_OSL -# if defined(_MSC_VER) -/* Prevent OSL from polluting the context with weird macros from windows.h. - * TODO(sergey): Ideally it's only enough to have class/struct declarations in - * the header and skip header include here. - */ -# define NOGDI -# define NOMINMAX -# define WIN32_LEAN_AND_MEAN -# endif +/* So no context pollution happens from indirectly included windows.h */ +# include "util_windows.h" # include <OSL/oslexec.h> #endif @@ -78,6 +71,40 @@ public: system_cpu_support_sse41(); system_cpu_support_avx(); system_cpu_support_avx2(); + +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 + if(system_cpu_support_avx2()) { + VLOG(1) << "Will be using AVX2 kernels."; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX + if(system_cpu_support_avx()) { + VLOG(1) << "Will be using AVX kernels."; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + if(system_cpu_support_sse41()) { + VLOG(1) << "Will be using SSE4.1 kernels."; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 + if(system_cpu_support_sse3()) { + VLOG(1) << "Will be using SSE3kernels."; + } + else +#endif +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 + if(system_cpu_support_sse2()) { + VLOG(1) << "Will be using SSE2 kernels."; + } + else +#endif + { + VLOG(1) << "Will be using regular kernels."; + } } ~CPUDevice() @@ -197,31 +224,38 @@ public: void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) + if(system_cpu_support_avx2()) { path_trace_kernel = kernel_cpu_avx2_path_trace; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) + if(system_cpu_support_avx()) { path_trace_kernel = kernel_cpu_avx_path_trace; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) + if(system_cpu_support_sse41()) { path_trace_kernel = kernel_cpu_sse41_path_trace; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) + if(system_cpu_support_sse3()) { path_trace_kernel = kernel_cpu_sse3_path_trace; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) + if(system_cpu_support_sse2()) { path_trace_kernel = kernel_cpu_sse2_path_trace; + } else #endif + { path_trace_kernel = kernel_cpu_path_trace; + } while(task.acquire_tile(this, tile)) { float *render_buffer = (float*)tile.buffer; @@ -267,32 +301,38 @@ public: if(task.rgba_half) { void(*convert_to_half_float_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) + if(system_cpu_support_avx2()) { convert_to_half_float_kernel = kernel_cpu_avx2_convert_to_half_float; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) - for(int y = task.y; y < task.y + task.h; y++) + if(system_cpu_support_avx()) { convert_to_half_float_kernel = kernel_cpu_avx_convert_to_half_float; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) + if(system_cpu_support_sse41()) { convert_to_half_float_kernel = kernel_cpu_sse41_convert_to_half_float; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) + if(system_cpu_support_sse3()) { convert_to_half_float_kernel = kernel_cpu_sse3_convert_to_half_float; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) + if(system_cpu_support_sse2()) { convert_to_half_float_kernel = kernel_cpu_sse2_convert_to_half_float; + } else #endif + { convert_to_half_float_kernel = kernel_cpu_convert_to_half_float; + } for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) @@ -302,31 +342,38 @@ public: else { void(*convert_to_byte_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) + if(system_cpu_support_avx2()) { convert_to_byte_kernel = kernel_cpu_avx2_convert_to_byte; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) + if(system_cpu_support_avx()) { convert_to_byte_kernel = kernel_cpu_avx_convert_to_byte; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) + if(system_cpu_support_sse41()) { convert_to_byte_kernel = kernel_cpu_sse41_convert_to_byte; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) + if(system_cpu_support_sse3()) { convert_to_byte_kernel = kernel_cpu_sse3_convert_to_byte; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) + if(system_cpu_support_sse2()) { convert_to_byte_kernel = kernel_cpu_sse2_convert_to_byte; + } else #endif + { convert_to_byte_kernel = kernel_cpu_convert_to_byte; + } for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) @@ -343,39 +390,53 @@ public: #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif - void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int); + void(*shader_kernel)(KernelGlobals*, uint4*, float4*, float*, int, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) + if(system_cpu_support_avx2()) { shader_kernel = kernel_cpu_avx2_shader; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) + if(system_cpu_support_avx()) { shader_kernel = kernel_cpu_avx_shader; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) + if(system_cpu_support_sse41()) { shader_kernel = kernel_cpu_sse41_shader; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) + if(system_cpu_support_sse3()) { shader_kernel = kernel_cpu_sse3_shader; + } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) + if(system_cpu_support_sse2()) { shader_kernel = kernel_cpu_sse2_shader; + } else #endif + { shader_kernel = kernel_cpu_shader; + } for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); + shader_kernel(&kg, + (uint4*)task.shader_input, + (float4*)task.shader_output, + (float*)task.shader_output_luma, + task.shader_eval_type, + task.shader_filter, + x, + task.offset, + sample); if(task.get_cancel() || task_pool.canceled()) break; |