diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-10-25 16:27:50 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2016-10-25 16:27:50 +0300 |
commit | 8c761ff83883248780c61a83dbc194f0ec080bed (patch) | |
tree | 98d637a0bba7e916ebd0ade733132ac533a916f9 | |
parent | f7cf2f659afc97cf4d4a8603d4dcafb97031ba79 (diff) |
Cycles: Use new SSE version of offset calculation for all QBVH flavors
Gives up to ~1% speedup again.
While it seems to be small, still nice since the code now is actually more
clean that it used to be before.
-rw-r--r-- | intern/cycles/kernel/bvh/qbvh_shadow_all.h | 19 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/qbvh_subsurface.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/qbvh_volume.h | 19 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/qbvh_volume_all.h | 19 |
4 files changed, 30 insertions, 34 deletions
diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index ae7aec2082f..5f4d06f12ea 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -92,10 +92,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); @@ -392,9 +391,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, num_hits_in_instance = 0; isect_array->t = isect_t; - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); @@ -450,9 +449,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, isect_t = tmax; isect_array->t = isect_t; - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); diff --git a/intern/cycles/kernel/bvh/qbvh_subsurface.h b/intern/cycles/kernel/bvh/qbvh_subsurface.h index 24aca96a298..ccd36df034a 100644 --- a/intern/cycles/kernel/bvh/qbvh_subsurface.h +++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h @@ -101,10 +101,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h index db9779351d2..424710b69f2 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume.h +++ b/intern/cycles/kernel/bvh/qbvh_volume.h @@ -87,10 +87,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); @@ -303,9 +302,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); # endif - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); @@ -349,9 +348,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); # endif - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h index 88f1f764e4c..eb48af6fc68 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume_all.h +++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h @@ -91,10 +91,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); @@ -354,9 +353,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); # endif - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # if BVH_FEATURE(BVH_HAIR) @@ -420,9 +419,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, isect_t = tmax; isect_array->t = isect_t; - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); |