Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2015-04-08 16:44:31 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2015-04-08 16:47:25 +0300
commitbf11e362c5418ec40dc0437d329efceb225eb5ef (patch)
tree8a220dc06e43f9d4e83a2d0b7dc53a00b105e493 /intern/cycles/kernel
parent2c91f9dc1129c4d11a0100dc1925ac244e550cf6 (diff)
Fix T44046: Cycles speed regression in 2.74 (CPU only)
Issue was caused by MSVC not being able to optimize some code out in the same way as GCC/Clang does, so now that parts of code are explicitly unfolded in order to help compilers out. This makes speed loss much less drastic on my laptop. That's probably as good as we can do with MSVC without investing infinite amount of time looking trying to workaround the optimizer.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h56
1 files changed, 24 insertions, 32 deletions
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index a0e36058bf8..3990bae5478 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -110,14 +110,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -205,14 +203,12 @@ ccl_device_inline void triangle_intersect_subsurface(
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -322,14 +318,12 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -381,14 +375,12 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);