Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2013-02-04 20:12:37 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2013-02-04 20:12:37 +0400
commit7c9d99334705498932a272f68f74121953d4974a (patch)
tree1225c588ddc680f7edede44d9475c24150ee929e /intern/cycles/device/device_cpu.cpp
parent52303db217e37a17313db9f38931c144bd7e8bbe (diff)
Fix cycles intersection issue with overlapping faces on windows 32 bit and CPU
without SSE3 support, due to 80 bit precision float register being used for one bounding box but not the one next to it.
Diffstat (limited to 'intern/cycles/device/device_cpu.cpp')
-rw-r--r--intern/cycles/device/device_cpu.cpp48
1 files changed, 41 insertions, 7 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index a1d7706a34e..1915245bb55 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -58,7 +58,8 @@ public:
#endif
/* do now to avoid thread issues */
- system_cpu_support_optimized();
+ system_cpu_support_sse2();
+ system_cpu_support_sse3();
}
~CPUDevice()
@@ -170,7 +171,7 @@ public:
int end_sample = tile.start_sample + tile.num_samples;
#ifdef WITH_OPTIMIZED_KERNEL
- if(system_cpu_support_optimized()) {
+ if(system_cpu_support_sse2()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.cancelled()) {
if(task.need_finish_queue == false)
@@ -179,7 +180,26 @@ public:
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
- kernel_cpu_optimized_path_trace(&kg, render_buffer, rng_state,
+ kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
+ sample, x, y, tile.offset, tile.stride);
+ }
+ }
+
+ tile.sample = sample + 1;
+
+ task.update_progress(tile);
+ }
+ }
+ else if(system_cpu_support_sse3()) {
+ for(int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel() || task_pool.cancelled()) {
+ if(task.need_finish_queue == false)
+ break;
+ }
+
+ for(int y = tile.y; y < tile.y + tile.h; y++) {
+ for(int x = tile.x; x < tile.x + tile.w; x++) {
+ kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
@@ -227,10 +247,16 @@ public:
void thread_tonemap(DeviceTask& task)
{
#ifdef WITH_OPTIMIZED_KERNEL
- if(system_cpu_support_optimized()) {
+ if(system_cpu_support_sse2()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_sse2_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
+ task.sample, task.resolution, x, y, task.offset, task.stride);
+ }
+ else if(system_cpu_support_sse3()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
- kernel_cpu_optimized_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
+ kernel_cpu_sse3_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
else
@@ -252,9 +278,17 @@ public:
#endif
#ifdef WITH_OPTIMIZED_KERNEL
- if(system_cpu_support_optimized()) {
+ if(system_cpu_support_sse2()) {
+ for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
+ kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+
+ if(task_pool.cancelled())
+ break;
+ }
+ }
+ else if(system_cpu_support_sse3()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;