diff options
author | Bastien Montagne <montagne29@wanadoo.fr> | 2015-06-29 17:41:00 +0300 |
---|---|---|
committer | Bastien Montagne <montagne29@wanadoo.fr> | 2015-06-29 18:18:11 +0300 |
commit | d140e70c496122915eb5c05aba83153e2e0d7998 (patch) | |
tree | 1e589247d69da64aa7b0e7802319237ec050b5d6 /intern/cycles/kernel/geom/geom_triangle_intersect.h | |
parent | 147bd16ed1bb3415b30408b0eab110d0854eadd2 (diff) | |
parent | 295d0c52a26730edc6d4ed1276e4051cce006be5 (diff) |
Merge branch 'master' into temp-ghash-experimentstemp-ghash-experiments
Note that 'store hash' feature was removed for now - to complex to maintain (conflicts)
and relatively easy to re-add if we ever really want this one day.
Conflicts:
source/blender/blenlib/BLI_ghash.h
source/blender/blenlib/intern/BLI_ghash.c
source/blender/blenlib/intern/hash_mm2a.c
source/blender/bmesh/tools/bmesh_region_match.c
tests/gtests/blenlib/BLI_ghash_performance_test.cc
tests/gtests/blenlib/BLI_ghash_test.cc
tests/gtests/blenlib/CMakeLists.txt
Diffstat (limited to 'intern/cycles/kernel/geom/geom_triangle_intersect.h')
-rw-r--r-- | intern/cycles/kernel/geom/geom_triangle_intersect.h | 114 |
1 files changed, 55 insertions, 59 deletions
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index c9e30a451da..3ef918dc842 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -14,7 +14,7 @@ * limitations under the License. */ -/* Triangle/Ray intersections . +/* Triangle/Ray intersections. * * For BVH ray intersection we use a precomputed triangle storage to accelerate * intersection at the cost of more memory usage. @@ -49,18 +49,27 @@ typedef struct IsectPrecalc { float Sx, Sy, Sz; } IsectPrecalc; -/* Workaround for CUDA toolkit 6.5.16. */ -#if defined(__KERNEL_CPU__) || !defined(__KERNEL_CUDA_EXPERIMENTAL__) || __CUDA_ARCH__ < 500 +#if defined(__KERNEL_CUDA__) # if (defined(i386) || defined(_M_IX86)) +# if __CUDA_ARCH__ > 500 ccl_device_noinline -# else +# else /* __CUDA_ARCH__ > 500 */ ccl_device_inline -# endif -#else +# endif /* __CUDA_ARCH__ > 500 */ +# else /* (defined(i386) || defined(_M_IX86)) */ +# if defined(__KERNEL_EXPERIMENTAL__) && (__CUDA_ARCH__ >= 500) ccl_device_noinline -#endif +# else +ccl_device_inline +# endif +# endif /* (defined(i386) || defined(_M_IX86)) */ +#elif defined(__KERNEL_OPENCL_APPLE__) +ccl_device_noinline +#else /* defined(__KERNEL_OPENCL_APPLE__) */ +ccl_device_inline +#endif /* defined(__KERNEL_OPENCL_APPLE__) */ void triangle_intersect_precalc(float3 dir, - IsectPrecalc *isect_precalc) + IsectPrecalc *isect_precalc) { /* Calculate dimension where the ray direction is maximal. */ int kz = util_max_axis(make_float3(fabsf(dir.x), @@ -77,10 +86,10 @@ void triangle_intersect_precalc(float3 dir, } /* Calculate the shear constants. */ - float inf_dir_z = 1.0f / IDX(dir, kz); - isect_precalc->Sx = IDX(dir, kx) * inf_dir_z; - isect_precalc->Sy = IDX(dir, ky) * inf_dir_z; - isect_precalc->Sz = inf_dir_z; + float inv_dir_z = 1.0f / IDX(dir, kz); + isect_precalc->Sx = IDX(dir, kx) * inv_dir_z; + isect_precalc->Sy = IDX(dir, ky) * inv_dir_z; + isect_precalc->Sz = inv_dir_z; /* Store the dimensions. */ isect_precalc->kx = kx; @@ -98,7 +107,6 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const IsectPrecalc *isect_precalc, Intersection *isect, float3 P, - float3 dir, uint visibility, int object, int triAddr) @@ -111,14 +119,12 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2)); - - const float3 A = tri[0] - P; - const float3 B = tri[1] - P; - const float3 C = tri[2] - P; + const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); + const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); + const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); @@ -155,8 +161,8 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, */ const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz; const float sign_T = xor_signmast(T, sign_mask); - if ((sign_T < 0.0f) || - (sign_T > isect->t * xor_signmast(det, sign_mask))) + if((sign_T < 0.0f) || + (sign_T > isect->t * xor_signmast(det, sign_mask))) { return false; } @@ -191,7 +197,6 @@ ccl_device_inline void triangle_intersect_subsurface( const IsectPrecalc *isect_precalc, Intersection *isect_array, float3 P, - float3 dir, int object, int triAddr, float tmax, @@ -207,14 +212,12 @@ ccl_device_inline void triangle_intersect_subsurface( const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2)); - - const float3 A = tri[0] - P; - const float3 B = tri[1] - P; - const float3 C = tri[2] - P; + const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z); + const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z); + const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z); const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz); const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz); @@ -249,13 +252,10 @@ ccl_device_inline void triangle_intersect_subsurface( /* Calculate scaled z−coordinates of vertices and use them to calculate * the hit distance. */ - const float Az = Sz * A_kz; - const float Bz = Sz * B_kz; - const float Cz = Sz * C_kz; - const float T = U * Az + V * Bz + W * Cz; - - if ((xor_signmast(T, sign_mask) < 0.0f) || - (xor_signmast(T, sign_mask) > tmax * xor_signmast(det, sign_mask))) + const float T = (U * A_kz + V * B_kz + W * C_kz) * Sz; + const float sign_T = xor_signmast(T, sign_mask); + if((sign_T < 0.0f) || + (sign_T > tmax * xor_signmast(det, sign_mask))) { return; } @@ -315,7 +315,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, return P; } #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif @@ -327,14 +327,12 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, P = P + D*t; - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2)); - - float3 edge1 = tri[0] - tri[2]; - float3 edge2 = tri[1] - tri[2]; - float3 tvec = P - tri[2]; + const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); float3 qvec = cross(tvec, edge1); float3 pvec = cross(D, edge2); float rt = dot(edge2, qvec) / dot(edge1, pvec); @@ -343,7 +341,7 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif @@ -372,7 +370,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, #ifdef __INTERSECTION_REFINE__ if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = ccl_fetch(sd, ob_itfm); #else Transform tfm = object_fetch_transform(kg, isect->object, @@ -386,14 +384,12 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, P = P + D*t; - float3 tri[3]; - tri[0] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0)); - tri[1] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1)); - tri[2] = float4_to_float3(kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2)); - - float3 edge1 = tri[0] - tri[2]; - float3 edge2 = tri[1] - tri[2]; - float3 tvec = P - tri[2]; + const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0), + tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1), + tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); float3 qvec = cross(tvec, edge1); float3 pvec = cross(D, edge2); float rt = dot(edge2, qvec) / dot(edge1, pvec); @@ -402,7 +398,7 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, if(isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = ccl_fetch(sd, ob_tfm); #else Transform tfm = object_fetch_transform(kg, isect->object, |