From 8c761ff83883248780c61a83dbc194f0ec080bed Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Tue, 25 Oct 2016 15:27:50 +0200 Subject: Cycles: Use new SSE version of offset calculation for all QBVH flavors Gives up to ~1% speedup again. While it seems to be small, still nice since the code now is actually more clean that it used to be before. --- intern/cycles/kernel/bvh/qbvh_shadow_all.h | 19 +++++++++---------- intern/cycles/kernel/bvh/qbvh_subsurface.h | 7 +++---- intern/cycles/kernel/bvh/qbvh_volume.h | 19 +++++++++---------- intern/cycles/kernel/bvh/qbvh_volume_all.h | 19 +++++++++---------- 4 files changed, 30 insertions(+), 34 deletions(-) (limited to 'intern/cycles/kernel/bvh') diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index ae7aec2082f..5f4d06f12ea 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -92,10 +92,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); @@ -392,9 +391,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, num_hits_in_instance = 0; isect_array->t = isect_t; - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); @@ -450,9 +449,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, isect_t = tmax; isect_array->t = isect_t; - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); diff --git a/intern/cycles/kernel/bvh/qbvh_subsurface.h b/intern/cycles/kernel/bvh/qbvh_subsurface.h index 24aca96a298..ccd36df034a 100644 --- a/intern/cycles/kernel/bvh/qbvh_subsurface.h +++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h @@ -101,10 +101,9 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h index db9779351d2..424710b69f2 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume.h +++ b/intern/cycles/kernel/bvh/qbvh_volume.h @@ -87,10 +87,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); @@ -303,9 +302,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); # endif - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); @@ -349,9 +348,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); # endif - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h index 88f1f764e4c..eb48af6fc68 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume_all.h +++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h @@ -91,10 +91,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Offsets to select the side that becomes the lower or upper bound. */ int near_x, near_y, near_z; int far_x, far_y, far_z; - - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); IsectPrecalc isect_precalc; triangle_intersect_precalc(dir, &isect_precalc); @@ -354,9 +353,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); # endif - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # if BVH_FEATURE(BVH_HAIR) @@ -420,9 +419,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, isect_t = tmax; isect_array->t = isect_t; - if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } - if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; } - if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; } + qbvh_near_far_idx_calc(idir, + &near_x, &near_y, &near_z, + &far_x, &far_y, &far_z); tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); -- cgit v1.2.3