diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-02-04 20:12:37 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2013-02-04 20:12:37 +0400 |
commit | 7c9d99334705498932a272f68f74121953d4974a (patch) | |
tree | 1225c588ddc680f7edede44d9475c24150ee929e /intern/cycles/device | |
parent | 52303db217e37a17313db9f38931c144bd7e8bbe (diff) |
Fix cycles intersection issue with overlapping faces on windows 32 bit and CPU
without SSE3 support, due to 80 bit precision float register being used for one
bounding box but not the one next to it.
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 48 |
1 files changed, 41 insertions, 7 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index a1d7706a34e..1915245bb55 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -58,7 +58,8 @@ public: #endif /* do now to avoid thread issues */ - system_cpu_support_optimized(); + system_cpu_support_sse2(); + system_cpu_support_sse3(); } ~CPUDevice() @@ -170,7 +171,7 @@ public: int end_sample = tile.start_sample + tile.num_samples; #ifdef WITH_OPTIMIZED_KERNEL - if(system_cpu_support_optimized()) { + if(system_cpu_support_sse2()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.cancelled()) { if(task.need_finish_queue == false) @@ -179,7 +180,26 @@ public: for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { - kernel_cpu_optimized_path_trace(&kg, render_buffer, rng_state, + kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, + sample, x, y, tile.offset, tile.stride); + } + } + + tile.sample = sample + 1; + + task.update_progress(tile); + } + } + else if(system_cpu_support_sse3()) { + for(int sample = start_sample; sample < end_sample; sample++) { + if (task.get_cancel() || task_pool.cancelled()) { + if(task.need_finish_queue == false) + break; + } + + for(int y = tile.y; y < tile.y + tile.h; y++) { + for(int x = tile.x; x < tile.x + tile.w; x++) { + kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } @@ -227,10 +247,16 @@ public: void thread_tonemap(DeviceTask& task) { #ifdef WITH_OPTIMIZED_KERNEL - if(system_cpu_support_optimized()) { + if(system_cpu_support_sse2()) { + for(int y = task.y; y < task.y + task.h; y++) + for(int x = task.x; x < task.x + task.w; x++) + kernel_cpu_sse2_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer, + task.sample, task.resolution, x, y, task.offset, task.stride); + } + else if(system_cpu_support_sse3()) { for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer, + kernel_cpu_sse3_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer, task.sample, task.resolution, x, y, task.offset, task.stride); } else @@ -252,9 +278,17 @@ public: #endif #ifdef WITH_OPTIMIZED_KERNEL - if(system_cpu_support_optimized()) { + if(system_cpu_support_sse2()) { + for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { + kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); + + if(task_pool.cancelled()) + break; + } + } + else if(system_cpu_support_sse3()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { - kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); + kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task_pool.cancelled()) break; |