diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-03-01 01:23:24 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-06 18:52:04 +0300 |
commit | 04857cc8efb385af5d8f40b655eeca41e2b73494 (patch) | |
tree | b16edec8a0e91fddfa050b2e8b747ca194c0b622 /intern/cycles/kernel/geom | |
parent | 0fd0b0643a7a1c0334f39bddba4067d8fa8eede6 (diff) |
Cycles: fully decouple triangle and curve primitive storage from BVH2
Previously the storage here was optimized to avoid indirections in BVH2
traversal. This helps improve performance a bit, but makes performance
and memory usage of Embree and OptiX BVHs a bit worse also. It also adds
code complexity in other parts of the code.
Now decouple triangle and curve primitive storage from BVH2.
* Reduced peak memory usage on all devices
* Bit better performance for OptiX and Embree
* Bit worse performance for CUDA
* Simplified code:
** Intersection.prim/object now matches ShaderData.prim/object
** No more offset manipulation for mesh displacement before a BVH is built
** Remove primitive packing code and flags for Embree and OptiX
** Curve segments are now stored in a KernelCurve struct
* Also happens to fix a bug in baking with incorrect prim/object
Fixes T91968, T91770, T91902
Differential Revision: https://developer.blender.org/D12766
Diffstat (limited to 'intern/cycles/kernel/geom')
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve.h | 24 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve_intersect.h | 55 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_motion_triangle.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_motion_triangle_intersect.h | 17 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_shader_data.h | 5 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_triangle.h | 30 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_triangle_intersect.h | 61 |
7 files changed, 88 insertions, 110 deletions
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index a827a67ce7a..811558edae9 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -34,8 +34,8 @@ ccl_device float curve_attribute_float(const KernelGlobals *kg, float *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); @@ -76,8 +76,8 @@ ccl_device float2 curve_attribute_float2(const KernelGlobals *kg, float2 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); @@ -122,8 +122,8 @@ ccl_device float3 curve_attribute_float3(const KernelGlobals *kg, float3 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); @@ -164,8 +164,8 @@ ccl_device float4 curve_attribute_float4(const KernelGlobals *kg, float4 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0); @@ -206,8 +206,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd) float r = 0.0f; if (sd->type & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; @@ -231,8 +231,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd) ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index a068e93790a..30addb9616d 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -630,33 +630,19 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, const float3 P, const float3 dir, const float tmax, - uint visibility, int object, - int curveAddr, + int prim, float time, int type) { const bool is_motion = (type & PRIMITIVE_ALL_MOTION); -# ifndef __KERNEL_OPTIX__ /* See OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */ - if (is_motion && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); - if (time < prim_time.x || time > prim_time.y) { - return false; - } - } -# endif + KernelCurve kcurve = kernel_tex_fetch(__curves, prim); - int segment = PRIMITIVE_UNPACK_SEGMENT(type); - int prim = kernel_tex_fetch(__prim_index, curveAddr); - - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; + int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type); int k1 = k0 + 1; - - int ka = max(k0 - 1, __float_as_int(v00.x)); - int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + int ka = max(k0 - 1, kcurve.first_key); + int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1); float4 curve[4]; if (!is_motion) { @@ -666,21 +652,14 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, curve[3] = kernel_tex_fetch(__curve_keys, kb); } else { - int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; - motion_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, curve); + motion_curve_keys(kg, object, prim, time, ka, k0, k1, kb, curve); } -# ifdef __VISIBILITY_FLAG__ - if (!(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)) { - return false; - } -# endif - if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { /* todo: adaptive number of subdivisions could help performance here. */ const int subdivisions = kernel_data.bvh.curve_subdivisions; if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) { - isect->prim = curveAddr; + isect->prim = prim; isect->object = object; isect->type = type; return true; @@ -690,7 +669,7 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, } else { if (curve_intersect_recursive(P, dir, tmax, curve, isect)) { - isect->prim = curveAddr; + isect->prim = prim; isect->object = object; isect->type = type; return true; @@ -708,7 +687,7 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, const int isect_object, const int isect_prim) { - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -716,14 +695,12 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, D = safe_normalize_len(D, &t); } - int prim = kernel_tex_fetch(__prim_index, isect_prim); - float4 v00 = kernel_tex_fetch(__curves, prim); + KernelCurve kcurve = kernel_tex_fetch(__curves, isect_prim); - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - - int ka = max(k0 - 1, __float_as_int(v00.x)); - int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + int ka = max(k0 - 1, kcurve.first_key); + int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1); float4 P_curve[4]; @@ -780,15 +757,13 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, sd->dPdv = cross(dPdu, sd->Ng); # endif - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } sd->P = P; - - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - sd->shader = __float_as_int(curvedata.z); + sd->shader = kernel_tex_fetch(__curves, sd->prim).shader_id; } #endif diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 239bd0a37b2..b7f182090aa 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -72,9 +72,9 @@ ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg, { if (step == numsteps) { /* center step: regular vertex location */ - verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); } else { /* center step not store in this array */ diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index ec7e4b07d76..6fb9756ff92 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -44,7 +44,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg, float3 verts[3]) { #ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { if (UNLIKELY(t == 0.0f)) { return P; } @@ -70,7 +70,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg, /* Compute refined position. */ P = P + D * rt; - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -106,7 +106,7 @@ ccl_device_inline return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts); # else # ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -128,7 +128,7 @@ ccl_device_inline P = P + D * rt; - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -186,8 +186,9 @@ ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg, isect->t = t; isect->u = u; isect->v = v; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : + object; isect->type = PRIMITIVE_MOTION_TRIANGLE; return true; } @@ -288,8 +289,8 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg, isect->t = t; isect->u = u; isect->v = v; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = local_object; isect->type = PRIMITIVE_MOTION_TRIANGLE; /* Record geometric normal. */ diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h index 5dc03940238..f78d194359d 100644 --- a/intern/cycles/kernel/geom/geom_shader_data.h +++ b/intern/cycles/kernel/geom/geom_shader_data.h @@ -52,10 +52,9 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k sd->v = isect->v; sd->ray_length = isect->t; sd->type = isect->type; - sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : - isect->object; + sd->object = isect->object; sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); - sd->prim = kernel_tex_fetch(__prim_index, isect->prim); + sd->prim = isect->prim; sd->lamp = LAMP_NONE; sd->flag = 0; diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 910fb122c6d..8edba46fd39 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -29,9 +29,9 @@ ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* return normal */ if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { @@ -54,9 +54,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg, { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* compute point */ float t = 1.0f - u - v; *P = (u * v0 + v * v1 + t * v2); @@ -78,9 +78,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg, ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, float3 P[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); } /* Triangle vertex locations and vertex normals */ @@ -91,9 +91,9 @@ ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg, float3 N[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); @@ -145,9 +145,9 @@ ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg, { /* fetch triangle vertex coordinates */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* compute derivatives of P w.r.t. uv */ *dPdu = (p0 - p2); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 30b77ebd2eb..b784cc75d08 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -35,13 +35,14 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg, int object, int prim_addr) { - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; #else - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); #endif float t, u, v; if (ray_triangle_intersect(P, @@ -64,8 +65,9 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg, if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif { - isect->prim = prim_addr; - isect->object = object; + isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : + object; + isect->prim = prim; isect->type = PRIMITIVE_TRIANGLE; isect->u = u; isect->v = v; @@ -102,13 +104,14 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, } } - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; # else - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); # endif float t, u, v; if (!ray_triangle_intersect(P, @@ -167,8 +170,8 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, /* Record intersection. */ Intersection *isect = &local_isect->hits[hit]; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = local_object; isect->type = PRIMITIVE_TRIANGLE; isect->u = u; isect->v = v; @@ -176,9 +179,9 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, /* Record geometric normal. */ # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); # endif local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); @@ -206,7 +209,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, const int isect_prim) { #ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { if (UNLIKELY(t == 0.0f)) { return P; } @@ -219,10 +222,10 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, P = P + D * t; - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -239,7 +242,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, P = P + D * rt; } - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -265,7 +268,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, /* t is always in world space with OptiX. */ return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim); #else - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -276,10 +279,10 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, P = P + D * t; # ifdef __INTERSECTION_REFINE__ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -297,7 +300,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, } # endif /* __INTERSECTION_REFINE__ */ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } |