Merge branch 'master' into temp-ghash-experimentstemp-ghash-experiments

Note that 'store hash' feature was removed for now - to complex to maintain (conflicts) and relatively easy to re-add if we ever really want this one day. Conflicts: source/blender/blenlib/BLI_ghash.h source/blender/blenlib/intern/BLI_ghash.c source/blender/blenlib/intern/hash_mm2a.c source/blender/bmesh/tools/bmesh_region_match.c tests/gtests/blenlib/BLI_ghash_performance_test.cc tests/gtests/blenlib/BLI_ghash_test.cc tests/gtests/blenlib/CMakeLists.txt
author: Bastien Montagne <montagne29@wanadoo.fr> 2015-06-29 17:41:00 +0300
committer: Bastien Montagne <montagne29@wanadoo.fr> 2015-06-29 18:18:11 +0300
commit: d140e70c496122915eb5c05aba83153e2e0d7998 (patch)
tree: 1e589247d69da64aa7b0e7802319237ec050b5d6 /intern/cycles/kernel/geom/geom_triangle_intersect.h
parent: 147bd16ed1bb3415b30408b0eab110d0854eadd2 (diff)
parent: 295d0c52a26730edc6d4ed1276e4051cce006be5 (diff)
1 files changed, 55 insertions, 59 deletions
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index c9e30a451da..3ef918dc842 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-/* Triangle/Ray intersections .
+/* Triangle/Ray intersections.
  *
  * For BVH ray intersection we use a precomputed triangle storage to accelerate
  * intersection at the cost of more memory usage.
@@ -49,18 +49,27 @@ typedef struct IsectPrecalc {
 	float Sx, Sy, Sz;
 } IsectPrecalc;
 
-/* Workaround for CUDA toolkit 6.5.16. */
-#if defined(__KERNEL_CPU__) || !defined(__KERNEL_CUDA_EXPERIMENTAL__) || __CUDA_ARCH__ < 500
+#if defined(__KERNEL_CUDA__)
 #  if (defined(i386) || defined(_M_IX86))
+#    if __CUDA_ARCH__ > 500
 ccl_device_noinline
-#  else
+#    else  /* __CUDA_ARCH__ > 500 */
 ccl_device_inline
-#  endif
-#else
+#    endif  /* __CUDA_ARCH__ > 500 */
+#  else  /* (defined(i386) || defined(_M_IX86)) */
+#    if defined(__KERNEL_EXPERIMENTAL__) && (__CUDA_ARCH__ >= 500)
 ccl_device_noinline
-#endif
+#    else
+ccl_device_inline
+#    endif
+#  endif  /* (defined(i386) || defined(_M_IX86)) */
+#elif defined(__KERNEL_OPENCL_APPLE__)
+ccl_device_noinline
+#else  /* defined(__KERNEL_OPENCL_APPLE__) */
+ccl_device_inline
+#endif  /* defined(__KERNEL_OPENCL_APPLE__) */
 void triangle_intersect_precalc(float3 dir,
-                                 IsectPrecalc *isect_precalc)
+                                IsectPrecalc *isect_precalc)
 {
 	/* Calculate dimension where the ray direction is maximal. */
 	int kz = util_max_axis(make_float3(fabsf(dir.x),
@@ -77,10 +86,10 @@ void triangle_intersect_precalc(float3 dir,
 	}
 
 	/* Calculate the shear constants. */
-	float inf_dir_z = 1.0f / IDX(dir, kz);
-	isect_precalc->Sx = IDX(dir, kx) * inf_dir_z;
-	isect_precalc->Sy = IDX(dir, ky) * inf_dir_z;
-	isect_precalc->Sz = inf_dir_z;
+	float inv_dir_z = 1.0f / IDX(dir, kz);
+	isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
+	isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
+	isect_precalc->Sz = inv_dir_z;
 
 	/* Store the dimensions. */
 	isect_precalc->kx = kx;
@@ -98,7 +107,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
                                           const IsectPrecalc *isect_precalc,
                                           Intersection *isect,
                                           float3 P,
-                                          float3 dir,
                                           uint visibility,
                                           int object,
                                           int triAddr)
@@ -111,14 +119,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	const float Sz = isect_precalc->Sz;
 
 	/* Calculate vertices relative to ray origin. */
-	float3 tri[3];
-	tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
-	tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
-	tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
-	const float3 A = tri[0] - P;
-	const float3 B = tri[1] - P;
-	const float3 C = tri[2] - P;
+	const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+	             tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+	             tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+	const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+	const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+	const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
 
 	const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
 	const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -155,8 +161,8 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	 */
 	const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
 	const float sign_T = xor_signmast(T, sign_mask);
-	if ((sign_T < 0.0f) ||
-	    (sign_T > isect->t * xor_signmast(det, sign_mask)))
+	if((sign_T < 0.0f) ||
+	   (sign_T > isect->t * xor_signmast(det, sign_mask)))
 	{
 		return false;
 	}
@@ -191,7 +197,6 @@ ccl_device_inline void triangle_intersect_subsurface(
         const IsectPrecalc *isect_precalc,
         Intersection *isect_array,
         float3 P,
-        float3 dir,
         int object,
         int triAddr,
         float tmax,
@@ -207,14 +212,12 @@ ccl_device_inline void triangle_intersect_subsurface(
 	const float Sz = isect_precalc->Sz;
 
 	/* Calculate vertices relative to ray origin. */
-	float3 tri[3];
-	tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
-	tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
-	tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
-	const float3 A = tri[0] - P;
-	const float3 B = tri[1] - P;
-	const float3 C = tri[2] - P;
+	const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+	             tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+	             tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+	const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+	const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+	const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
 
 	const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
 	const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -249,13 +252,10 @@ ccl_device_inline void triangle_intersect_subsurface(
 	/* Calculate scaled z−coordinates of vertices and use them to calculate
 	 * the hit distance.
 	 */
-	const float Az = Sz * A_kz;
-	const float Bz = Sz * B_kz;
-	const float Cz = Sz * C_kz;
-	const float T = U * Az + V * Bz + W * Cz;
-
-	if ((xor_signmast(T, sign_mask) < 0.0f) ||
-	    (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask)))
+	const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
+	const float sign_T = xor_signmast(T, sign_mask);
+	if((sign_T < 0.0f) ||
+	   (sign_T > tmax * xor_signmast(det, sign_mask)))
 	{
 		return;
 	}
@@ -315,7 +315,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
 			return P;
 		}
 #ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_itfm;
+		Transform tfm = ccl_fetch(sd, ob_itfm);
 #else
 		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
 #endif
@@ -327,14 +327,12 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
 
 	P = P + D*t;
 
-	float3 tri[3];
-	tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
-	tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
-	tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
-	float3 edge1 = tri[0] - tri[2];
-	float3 edge2 = tri[1] - tri[2];
-	float3 tvec = P - tri[2];
+	const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+	             tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+	             tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+	float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+	float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+	float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
 	float3 qvec = cross(tvec, edge1);
 	float3 pvec = cross(D, edge2);
 	float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -343,7 +341,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
 
 	if(isect->object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_tfm;
+		Transform tfm = ccl_fetch(sd, ob_tfm);
 #else
 		Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
 #endif
@@ -372,7 +370,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
 #ifdef __INTERSECTION_REFINE__
 	if(isect->object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_itfm;
+		Transform tfm = ccl_fetch(sd, ob_itfm);
 #else
 		Transform tfm = object_fetch_transform(kg,
 		                                       isect->object,
@@ -386,14 +384,12 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
 
 	P = P + D*t;
 
-	float3 tri[3];
-	tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
-	tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
-	tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
-	float3 edge1 = tri[0] - tri[2];
-	float3 edge2 = tri[1] - tri[2];
-	float3 tvec = P - tri[2];
+	const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+	             tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+	             tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+	float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+	float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+	float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
 	float3 qvec = cross(tvec, edge1);
 	float3 pvec = cross(D, edge2);
 	float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -402,7 +398,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
 
 	if(isect->object != OBJECT_NONE) {
 #ifdef __OBJECT_MOTION__
-		Transform tfm = sd->ob_tfm;
+		Transform tfm = ccl_fetch(sd, ob_tfm);
 #else
 		Transform tfm = object_fetch_transform(kg,
 		                                       isect->object,
author	Bastien Montagne <montagne29@wanadoo.fr>	2015-06-29 17:41:00 +0300
committer	Bastien Montagne <montagne29@wanadoo.fr>	2015-06-29 18:18:11 +0300
commit	d140e70c496122915eb5c05aba83153e2e0d7998 (patch)
tree	1e589247d69da64aa7b0e7802319237ec050b5d6 /intern/cycles/kernel/geom/geom_triangle_intersect.h
parent	147bd16ed1bb3415b30408b0eab110d0854eadd2 (diff)
parent	295d0c52a26730edc6d4ed1276e4051cce006be5 (diff)