diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-23 19:15:54 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-23 19:45:19 +0300 |
commit | a1348dde2ed27d0a8a1d62f9e17602857b1f19f1 (patch) | |
tree | 7dd48094a06a8043cc837d18a072c4c735ba46ad /intern/cycles/kernel/geom | |
parent | 2a5d7b5b1e0345ce8ebf40c78ecd31eaeaa88f6d (diff) |
Cycles: Fix speed regression on GPU
Avoid construction of temporary array and make utility function force-inlined.
Additionally avoid calling float4_to_float3 twice.
This brings render times to the same values as before current patch series.
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r-- | intern/cycles/kernel/geom/geom_triangle_intersect.h | 44 |
1 files changed, 23 insertions, 21 deletions
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 973b3566378..313121104f9 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -51,19 +51,22 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) - const ssef *verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; #else const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); - const float3 verts[3] = {float4_to_float3(tri_a), - float4_to_float3(tri_b), - float4_to_float3(tri_c)}; #endif float t, u, v; if(ray_triangle_intersect(isect_precalc, P, isect->t, - verts, +#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) + ssef_verts, +#else + float4_to_float3(tri_a), + float4_to_float3(tri_b), + float4_to_float3(tri_c), +#endif &u, &v, &t)) { #ifdef __VISIBILITY_FLAG__ @@ -105,19 +108,22 @@ ccl_device_inline void triangle_intersect_subsurface( const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) - const ssef *verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; #else - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); - const float3 verts[3] = {float4_to_float3(tri_a), - float4_to_float3(tri_b), - float4_to_float3(tri_c)}; + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)), + tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)), + tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2)); #endif float t, u, v; if(!ray_triangle_intersect(isect_precalc, P, tmax, - verts, +#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) + ssef_verts, +#else + tri_a, + tri_b, + tri_c, +#endif &u, &v, &t)) { return; @@ -156,15 +162,11 @@ ccl_device_inline void triangle_intersect_subsurface( /* Record geometric normal. */ /* TODO(sergey): Check whether it's faster to re-use ssef verts. */ #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)), + tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)), + tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2)); #endif - /* TODO(sergey): Use float4_to_float3() on just an edges. */ - const float3 v0 = float4_to_float3(tri_a); - const float3 v1 = float4_to_float3(tri_b); - const float3 v2 = float4_to_float3(tri_c); - ss_isect->Ng[hit] = normalize(cross(v1 - v0, v2 - v0)); + ss_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); } #endif |