Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Montagne <montagne29@wanadoo.fr>2015-06-29 17:41:00 +0300
committerBastien Montagne <montagne29@wanadoo.fr>2015-06-29 18:18:11 +0300
commitd140e70c496122915eb5c05aba83153e2e0d7998 (patch)
tree1e589247d69da64aa7b0e7802319237ec050b5d6 /intern/cycles/kernel/geom/geom_triangle_intersect.h
parent147bd16ed1bb3415b30408b0eab110d0854eadd2 (diff)
parent295d0c52a26730edc6d4ed1276e4051cce006be5 (diff)
Merge branch 'master' into temp-ghash-experimentstemp-ghash-experiments
Note that 'store hash' feature was removed for now - to complex to maintain (conflicts) and relatively easy to re-add if we ever really want this one day. Conflicts: source/blender/blenlib/BLI_ghash.h source/blender/blenlib/intern/BLI_ghash.c source/blender/blenlib/intern/hash_mm2a.c source/blender/bmesh/tools/bmesh_region_match.c tests/gtests/blenlib/BLI_ghash_performance_test.cc tests/gtests/blenlib/BLI_ghash_test.cc tests/gtests/blenlib/CMakeLists.txt
Diffstat (limited to 'intern/cycles/kernel/geom/geom_triangle_intersect.h')
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h114
1 files changed, 55 insertions, 59 deletions
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index c9e30a451da..3ef918dc842 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-/* Triangle/Ray intersections .
+/* Triangle/Ray intersections.
*
* For BVH ray intersection we use a precomputed triangle storage to accelerate
* intersection at the cost of more memory usage.
@@ -49,18 +49,27 @@ typedef struct IsectPrecalc {
float Sx, Sy, Sz;
} IsectPrecalc;
-/* Workaround for CUDA toolkit 6.5.16. */
-#if defined(__KERNEL_CPU__) || !defined(__KERNEL_CUDA_EXPERIMENTAL__) || __CUDA_ARCH__ < 500
+#if defined(__KERNEL_CUDA__)
# if (defined(i386) || defined(_M_IX86))
+# if __CUDA_ARCH__ > 500
ccl_device_noinline
-# else
+# else /* __CUDA_ARCH__ > 500 */
ccl_device_inline
-# endif
-#else
+# endif /* __CUDA_ARCH__ > 500 */
+# else /* (defined(i386) || defined(_M_IX86)) */
+# if defined(__KERNEL_EXPERIMENTAL__) && (__CUDA_ARCH__ >= 500)
ccl_device_noinline
-#endif
+# else
+ccl_device_inline
+# endif
+# endif /* (defined(i386) || defined(_M_IX86)) */
+#elif defined(__KERNEL_OPENCL_APPLE__)
+ccl_device_noinline
+#else /* defined(__KERNEL_OPENCL_APPLE__) */
+ccl_device_inline
+#endif /* defined(__KERNEL_OPENCL_APPLE__) */
void triangle_intersect_precalc(float3 dir,
- IsectPrecalc *isect_precalc)
+ IsectPrecalc *isect_precalc)
{
/* Calculate dimension where the ray direction is maximal. */
int kz = util_max_axis(make_float3(fabsf(dir.x),
@@ -77,10 +86,10 @@ void triangle_intersect_precalc(float3 dir,
}
/* Calculate the shear constants. */
- float inf_dir_z = 1.0f / IDX(dir, kz);
- isect_precalc->Sx = IDX(dir, kx) * inf_dir_z;
- isect_precalc->Sy = IDX(dir, ky) * inf_dir_z;
- isect_precalc->Sz = inf_dir_z;
+ float inv_dir_z = 1.0f / IDX(dir, kz);
+ isect_precalc->Sx = IDX(dir, kx) * inv_dir_z;
+ isect_precalc->Sy = IDX(dir, ky) * inv_dir_z;
+ isect_precalc->Sz = inv_dir_z;
/* Store the dimensions. */
isect_precalc->kx = kx;
@@ -98,7 +107,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const IsectPrecalc *isect_precalc,
Intersection *isect,
float3 P,
- float3 dir,
uint visibility,
int object,
int triAddr)
@@ -111,14 +119,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -155,8 +161,8 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
*/
const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
const float sign_T = xor_signmast(T, sign_mask);
- if ((sign_T < 0.0f) ||
- (sign_T > isect->t * xor_signmast(det, sign_mask)))
+ if((sign_T < 0.0f) ||
+ (sign_T > isect->t * xor_signmast(det, sign_mask)))
{
return false;
}
@@ -191,7 +197,6 @@ ccl_device_inline void triangle_intersect_subsurface(
const IsectPrecalc *isect_precalc,
Intersection *isect_array,
float3 P,
- float3 dir,
int object,
int triAddr,
float tmax,
@@ -207,14 +212,12 @@ ccl_device_inline void triangle_intersect_subsurface(
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2));
-
- const float3 A = tri[0] - P;
- const float3 B = tri[1] - P;
- const float3 C = tri[2] - P;
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
+ const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
+ const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);
@@ -249,13 +252,10 @@ ccl_device_inline void triangle_intersect_subsurface(
/* Calculate scaled z−coordinates of vertices and use them to calculate
* the hit distance.
*/
- const float Az = Sz * A_kz;
- const float Bz = Sz * B_kz;
- const float Cz = Sz * C_kz;
- const float T = U * Az + V * Bz + W * Cz;
-
- if ((xor_signmast(T, sign_mask) < 0.0f) ||
- (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask)))
+ const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz;
+ const float sign_T = xor_signmast(T, sign_mask);
+ if((sign_T < 0.0f) ||
+ (sign_T > tmax * xor_signmast(det, sign_mask)))
{
return;
}
@@ -315,7 +315,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
return P;
}
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
#endif
@@ -327,14 +327,12 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -343,7 +341,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
#endif
@@ -372,7 +370,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
#ifdef __INTERSECTION_REFINE__
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
+ Transform tfm = ccl_fetch(sd, ob_itfm);
#else
Transform tfm = object_fetch_transform(kg,
isect->object,
@@ -386,14 +384,12 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
P = P + D*t;
- float3 tri[3];
- tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0));
- tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1));
- tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2));
-
- float3 edge1 = tri[0] - tri[2];
- float3 edge2 = tri[1] - tri[2];
- float3 tvec = P - tri[2];
+ const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
+ tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
+ tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+ float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
+ float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
+ float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
float3 qvec = cross(tvec, edge1);
float3 pvec = cross(D, edge2);
float rt = dot(edge2, qvec) / dot(edge1, pvec);
@@ -402,7 +398,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
if(isect->object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
+ Transform tfm = ccl_fetch(sd, ob_tfm);
#else
Transform tfm = object_fetch_transform(kg,
isect->object,