diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-03-01 01:23:24 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-06 18:52:04 +0300 |
commit | 04857cc8efb385af5d8f40b655eeca41e2b73494 (patch) | |
tree | b16edec8a0e91fddfa050b2e8b747ca194c0b622 /intern/cycles/kernel | |
parent | 0fd0b0643a7a1c0334f39bddba4067d8fa8eede6 (diff) |
Cycles: fully decouple triangle and curve primitive storage from BVH2
Previously the storage here was optimized to avoid indirections in BVH2
traversal. This helps improve performance a bit, but makes performance
and memory usage of Embree and OptiX BVHs a bit worse also. It also adds
code complexity in other parts of the code.
Now decouple triangle and curve primitive storage from BVH2.
* Reduced peak memory usage on all devices
* Bit better performance for OptiX and Embree
* Bit worse performance for CUDA
* Simplified code:
** Intersection.prim/object now matches ShaderData.prim/object
** No more offset manipulation for mesh displacement before a BVH is built
** Remove primitive packing code and flags for Embree and OptiX
** Curve segments are now stored in a KernelCurve struct
* Also happens to fix a bug in baking with incorrect prim/object
Fixes T91968, T91770, T91902
Differential Revision: https://developer.blender.org/D12766
Diffstat (limited to 'intern/cycles/kernel')
18 files changed, 221 insertions, 226 deletions
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h index 092d770dcac..d3db6295ea5 100644 --- a/intern/cycles/kernel/bvh/bvh_embree.h +++ b/intern/cycles/kernel/bvh/bvh_embree.h @@ -106,9 +106,6 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg, const RTCHit *hit, Intersection *isect) { - bool is_hair = hit->geomID & 1; - isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u; - isect->v = is_hair ? hit->v : hit->u; isect->t = ray->tfar; isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { @@ -121,27 +118,37 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg, else { isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - isect->object = OBJECT_NONE; + isect->object = hit->geomID / 2; + } + + const bool is_hair = hit->geomID & 1; + if (is_hair) { + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, isect->prim); + isect->type = segment.type; + isect->prim = segment.prim; + isect->u = hit->u; + isect->v = hit->v; + } + else { + isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type; + isect->u = 1.0f - hit->v - hit->u; + isect->v = hit->u; } - isect->type = kernel_tex_fetch(__prim_type, isect->prim); } -ccl_device_inline void kernel_embree_convert_sss_hit(const KernelGlobals *kg, - const RTCRay *ray, - const RTCHit *hit, - Intersection *isect, - int local_object_id) +ccl_device_inline void kernel_embree_convert_sss_hit( + const KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) { isect->u = 1.0f - hit->v - hit->u; isect->v = hit->u; isect->t = ray->tfar; isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2)); + rtcGetGeometry(kernel_data.bvh.scene, object * 2)); isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = local_object_id; - isect->type = kernel_tex_fetch(__prim_type, isect->prim); + isect->object = object; + isect->type = kernel_tex_fetch(__objects, object).primitive_type; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index 0ae36fccf9b..82c7c1a8a6c 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -130,7 +130,6 @@ ccl_device_inline if (prim_addr >= 0) { const int prim_addr2 = __float_as_int(leaf.y); const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; /* pop */ node_addr = traversal_stack[stack_ptr]; @@ -138,14 +137,15 @@ ccl_device_inline /* primitive intersection */ while (prim_addr < prim_addr2) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); + kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == + (type & PRIMITIVE_ALL)); bool hit; /* todo: specialized intersect functions which don't fill in * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? * might give a few % performance improvement */ - switch (p_type) { + switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { hit = triangle_intersect( kg, isect, P, dir, isect_t, visibility, object, prim_addr); @@ -163,17 +163,20 @@ ccl_device_inline case PRIMITIVE_MOTION_CURVE_THICK: case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - hit = curve_intersect(kg, - isect, - P, - dir, - isect_t, - visibility, - object, - prim_addr, - ray->time, - curve_type); + if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + if (ray->time < prim_time.x || ray->time > prim_time.y) { + hit = false; + break; + } + } + + const int curve_object = kernel_tex_fetch(__prim_object, prim_addr); + const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr); + hit = curve_intersect( + kg, isect, P, dir, isect_t, curve_object, curve_prim, ray->time, curve_type); + break; } #endif diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index a26d8c514f3..2feff593c10 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -165,18 +165,18 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg, case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { for (; prim_addr < prim_addr2; prim_addr++) { - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); - const bool hit = curve_intersect(kg, - isect, - P, - dir, - isect->t, - visibility, - object, - prim_addr, - ray->time, - curve_type); + if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + if (ray->time < prim_time.x || ray->time > prim_time.y) { + continue; + } + } + + const int curve_object = kernel_tex_fetch(__prim_object, prim_addr); + const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr); + const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const bool hit = curve_intersect( + kg, isect, P, dir, isect->t, curve_object, curve_prim, ray->time, curve_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h index 21384457b16..9f188a93e2c 100644 --- a/intern/cycles/kernel/bvh/bvh_util.h +++ b/intern/cycles/kernel/bvh/bvh_util.h @@ -118,19 +118,18 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits) ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *ccl_restrict kg, const Intersection *ccl_restrict isect) { - const int prim = kernel_tex_fetch(__prim_index, isect->prim); + const int prim = isect->prim; int shader = 0; #ifdef __HAIR__ - if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) + if (isect->type & PRIMITIVE_ALL_TRIANGLE) #endif { shader = kernel_tex_fetch(__tri_shader, prim); } #ifdef __HAIR__ else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); + shader = kernel_tex_fetch(__curves, prim).shader_id; } #endif @@ -138,21 +137,19 @@ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *cc } ccl_device_forceinline int intersection_get_shader_from_isect_prim( - const KernelGlobals *ccl_restrict kg, const int isect_prim) + const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type) { - const int prim = kernel_tex_fetch(__prim_index, isect_prim); int shader = 0; #ifdef __HAIR__ - if (kernel_tex_fetch(__prim_type, isect_prim) & PRIMITIVE_ALL_TRIANGLE) + if (isect_type & PRIMITIVE_ALL_TRIANGLE) #endif { shader = kernel_tex_fetch(__tri_shader, prim); } #ifdef __HAIR__ else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); + shader = kernel_tex_fetch(__curves, prim).shader_id; } #endif @@ -162,25 +159,13 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim( ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg, const Intersection *ccl_restrict isect) { - return intersection_get_shader_from_isect_prim(kg, isect->prim); -} - -ccl_device_forceinline int intersection_get_object(const KernelGlobals *ccl_restrict kg, - const Intersection *ccl_restrict isect) -{ - if (isect->object != OBJECT_NONE) { - return isect->object; - } - - return kernel_tex_fetch(__prim_object, isect->prim); + return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type); } ccl_device_forceinline int intersection_get_object_flags(const KernelGlobals *ccl_restrict kg, const Intersection *ccl_restrict isect) { - const int object = intersection_get_object(kg, isect); - - return kernel_tex_fetch(__object_flag, object); + return kernel_tex_fetch(__object_flag, isect->object); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 7a79e0c4823..736f30d93ef 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -41,22 +41,15 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2() return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2()); } -template<bool always = false> ccl_device_forceinline uint get_object_id() +ccl_device_forceinline int get_object_id() { #ifdef __OBJECT_MOTION__ - /* Always get the the instance ID from the TLAS. + /* Always get the the instance ID from the TLAS * There might be a motion transform node between TLAS and BLAS which does not have one. */ - uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); + return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); #else - uint object = optixGetInstanceId(); + return optixGetInstanceId(); #endif - /* Choose between always returning object ID or only for instances. */ - if (always || (object & 1) == 0) - /* Can just remove the low bit since instance always contains object ID. */ - return object >> 1; - else - /* Set to OBJECT_NONE if this is not an instanced object. */ - return OBJECT_NONE; } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest() @@ -108,7 +101,7 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() #endif #ifdef __BVH_LOCAL__ - const uint object = get_object_id<true>(); + const int object = get_object_id(); if (object != optixGetPayload_4() /* local_object */) { /* Only intersect with matching object. */ return optixIgnoreIntersection(); @@ -152,21 +145,23 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() local_isect->num_hits = 1; } + const int prim = optixGetPrimitiveIndex(); + Intersection *isect = &local_isect->hits[hit]; isect->t = optixGetRayTmax(); - isect->prim = optixGetPrimitiveIndex(); + isect->prim = prim; isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__prim_type, isect->prim); + isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type; const float2 barycentrics = optixGetTriangleBarycentrics(); isect->u = 1.0f - barycentrics.y - barycentrics.x; isect->v = barycentrics.x; /* Record geometric normal. */ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)); - const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)); - const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)); + const float3 tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)); + const float3 tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); /* Continue tracing (without this the trace call would return after the first hit). */ @@ -179,25 +174,32 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() #ifdef __SHADOW_RECORD_ALL__ bool ignore_intersection = false; - const uint prim = optixGetPrimitiveIndex(); + int prim = optixGetPrimitiveIndex(); + const uint object = get_object_id(); # ifdef __VISIBILITY_FLAG__ const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { ignore_intersection = true; } # endif float u = 0.0f, v = 0.0f; + int type = 0; if (optixIsTriangleHit()) { const float2 barycentrics = optixGetTriangleBarycentrics(); u = 1.0f - barycentrics.y - barycentrics.x; v = barycentrics.x; + type = kernel_tex_fetch(__objects, object).primitive_type; } # ifdef __HAIR__ else { u = __uint_as_float(optixGetAttribute_0()); v = __uint_as_float(optixGetAttribute_1()); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + type = segment.type; + prim = segment.prim; + /* Filter out curve endcaps. */ if (u == 0.0f || u == 1.0f) { ignore_intersection = true; @@ -245,8 +247,8 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() isect->v = v; isect->t = optixGetRayTmax(); isect->prim = prim; - isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__prim_type, prim); + isect->object = object; + isect->type = type; # ifdef __TRANSPARENT_SHADOWS__ /* Detect if this surface has a shader with transparent shadows. */ @@ -274,15 +276,14 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test() } #endif + const uint object = get_object_id(); #ifdef __VISIBILITY_FLAG__ - const uint prim = optixGetPrimitiveIndex(); const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { return optixIgnoreIntersection(); } #endif - const uint object = get_object_id<true>(); if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) { return optixIgnoreIntersection(); } @@ -301,9 +302,9 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test() #endif #ifdef __VISIBILITY_FLAG__ - const uint prim = optixGetPrimitiveIndex(); + const uint object = get_object_id(); const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { return optixIgnoreIntersection(); } @@ -316,28 +317,39 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test() extern "C" __global__ void __closesthit__kernel_optix_hit() { + const int object = get_object_id(); + const int prim = optixGetPrimitiveIndex(); + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */ - optixSetPayload_3(optixGetPrimitiveIndex()); - optixSetPayload_4(get_object_id()); - /* Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index. */ - optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex())); + optixSetPayload_4(object); if (optixIsTriangleHit()) { const float2 barycentrics = optixGetTriangleBarycentrics(); optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); optixSetPayload_2(__float_as_uint(barycentrics.x)); + optixSetPayload_3(prim); + optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type); } else { + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */ optixSetPayload_2(optixGetAttribute_1()); + optixSetPayload_3(segment.prim); + optixSetPayload_5(segment.type); } } #ifdef __HAIR__ -ccl_device_inline void optix_intersection_curve(const uint prim, const uint type) +ccl_device_inline void optix_intersection_curve(const int prim, const int type) { - const uint object = get_object_id<true>(); + const int object = get_object_id(); + +# ifdef __VISIBILITY_FLAG__ const uint visibility = optixGetPayload_4(); + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + return; + } +# endif float3 P = optixGetObjectRayOrigin(); float3 dir = optixGetObjectRayDirection(); @@ -358,7 +370,7 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type if (isect.t != FLT_MAX) isect.t *= len; - if (curve_intersect(NULL, &isect, P, dir, isect.t, visibility, object, prim, time, type)) { + if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL, __float_as_int(isect.u), /* Attribute_0 */ @@ -368,9 +380,9 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type extern "C" __global__ void __intersection__curve_ribbon() { - const uint prim = optixGetPrimitiveIndex(); - const uint type = kernel_tex_fetch(__prim_type, prim); - + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, optixGetPrimitiveIndex()); + const int prim = segment.prim; + const int type = segment.type; if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { optix_intersection_curve(prim, type); } diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index a827a67ce7a..811558edae9 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -34,8 +34,8 @@ ccl_device float curve_attribute_float(const KernelGlobals *kg, float *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); @@ -76,8 +76,8 @@ ccl_device float2 curve_attribute_float2(const KernelGlobals *kg, float2 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); @@ -122,8 +122,8 @@ ccl_device float3 curve_attribute_float3(const KernelGlobals *kg, float3 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); @@ -164,8 +164,8 @@ ccl_device float4 curve_attribute_float4(const KernelGlobals *kg, float4 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0); @@ -206,8 +206,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd) float r = 0.0f; if (sd->type & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; @@ -231,8 +231,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd) ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index a068e93790a..30addb9616d 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -630,33 +630,19 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, const float3 P, const float3 dir, const float tmax, - uint visibility, int object, - int curveAddr, + int prim, float time, int type) { const bool is_motion = (type & PRIMITIVE_ALL_MOTION); -# ifndef __KERNEL_OPTIX__ /* See OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */ - if (is_motion && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); - if (time < prim_time.x || time > prim_time.y) { - return false; - } - } -# endif + KernelCurve kcurve = kernel_tex_fetch(__curves, prim); - int segment = PRIMITIVE_UNPACK_SEGMENT(type); - int prim = kernel_tex_fetch(__prim_index, curveAddr); - - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; + int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type); int k1 = k0 + 1; - - int ka = max(k0 - 1, __float_as_int(v00.x)); - int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + int ka = max(k0 - 1, kcurve.first_key); + int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1); float4 curve[4]; if (!is_motion) { @@ -666,21 +652,14 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, curve[3] = kernel_tex_fetch(__curve_keys, kb); } else { - int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; - motion_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, curve); + motion_curve_keys(kg, object, prim, time, ka, k0, k1, kb, curve); } -# ifdef __VISIBILITY_FLAG__ - if (!(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)) { - return false; - } -# endif - if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { /* todo: adaptive number of subdivisions could help performance here. */ const int subdivisions = kernel_data.bvh.curve_subdivisions; if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) { - isect->prim = curveAddr; + isect->prim = prim; isect->object = object; isect->type = type; return true; @@ -690,7 +669,7 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, } else { if (curve_intersect_recursive(P, dir, tmax, curve, isect)) { - isect->prim = curveAddr; + isect->prim = prim; isect->object = object; isect->type = type; return true; @@ -708,7 +687,7 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, const int isect_object, const int isect_prim) { - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -716,14 +695,12 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, D = safe_normalize_len(D, &t); } - int prim = kernel_tex_fetch(__prim_index, isect_prim); - float4 v00 = kernel_tex_fetch(__curves, prim); + KernelCurve kcurve = kernel_tex_fetch(__curves, isect_prim); - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - - int ka = max(k0 - 1, __float_as_int(v00.x)); - int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + int ka = max(k0 - 1, kcurve.first_key); + int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1); float4 P_curve[4]; @@ -780,15 +757,13 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, sd->dPdv = cross(dPdu, sd->Ng); # endif - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } sd->P = P; - - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - sd->shader = __float_as_int(curvedata.z); + sd->shader = kernel_tex_fetch(__curves, sd->prim).shader_id; } #endif diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 239bd0a37b2..b7f182090aa 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -72,9 +72,9 @@ ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg, { if (step == numsteps) { /* center step: regular vertex location */ - verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); } else { /* center step not store in this array */ diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index ec7e4b07d76..6fb9756ff92 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -44,7 +44,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg, float3 verts[3]) { #ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { if (UNLIKELY(t == 0.0f)) { return P; } @@ -70,7 +70,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg, /* Compute refined position. */ P = P + D * rt; - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -106,7 +106,7 @@ ccl_device_inline return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts); # else # ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -128,7 +128,7 @@ ccl_device_inline P = P + D * rt; - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -186,8 +186,9 @@ ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg, isect->t = t; isect->u = u; isect->v = v; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : + object; isect->type = PRIMITIVE_MOTION_TRIANGLE; return true; } @@ -288,8 +289,8 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg, isect->t = t; isect->u = u; isect->v = v; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = local_object; isect->type = PRIMITIVE_MOTION_TRIANGLE; /* Record geometric normal. */ diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h index 5dc03940238..f78d194359d 100644 --- a/intern/cycles/kernel/geom/geom_shader_data.h +++ b/intern/cycles/kernel/geom/geom_shader_data.h @@ -52,10 +52,9 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k sd->v = isect->v; sd->ray_length = isect->t; sd->type = isect->type; - sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : - isect->object; + sd->object = isect->object; sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); - sd->prim = kernel_tex_fetch(__prim_index, isect->prim); + sd->prim = isect->prim; sd->lamp = LAMP_NONE; sd->flag = 0; diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 910fb122c6d..8edba46fd39 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -29,9 +29,9 @@ ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* return normal */ if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { @@ -54,9 +54,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg, { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* compute point */ float t = 1.0f - u - v; *P = (u * v0 + v * v1 + t * v2); @@ -78,9 +78,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg, ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, float3 P[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); } /* Triangle vertex locations and vertex normals */ @@ -91,9 +91,9 @@ ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg, float3 N[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); @@ -145,9 +145,9 @@ ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg, { /* fetch triangle vertex coordinates */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* compute derivatives of P w.r.t. uv */ *dPdu = (p0 - p2); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 30b77ebd2eb..b784cc75d08 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -35,13 +35,14 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg, int object, int prim_addr) { - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; #else - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); #endif float t, u, v; if (ray_triangle_intersect(P, @@ -64,8 +65,9 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg, if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif { - isect->prim = prim_addr; - isect->object = object; + isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : + object; + isect->prim = prim; isect->type = PRIMITIVE_TRIANGLE; isect->u = u; isect->v = v; @@ -102,13 +104,14 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, } } - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; # else - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); # endif float t, u, v; if (!ray_triangle_intersect(P, @@ -167,8 +170,8 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, /* Record intersection. */ Intersection *isect = &local_isect->hits[hit]; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = local_object; isect->type = PRIMITIVE_TRIANGLE; isect->u = u; isect->v = v; @@ -176,9 +179,9 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, /* Record geometric normal. */ # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); # endif local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); @@ -206,7 +209,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, const int isect_prim) { #ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { if (UNLIKELY(t == 0.0f)) { return P; } @@ -219,10 +222,10 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, P = P + D * t; - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -239,7 +242,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, P = P + D * rt; } - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -265,7 +268,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, /* t is always in world space with OptiX. */ return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim); #else - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -276,10 +279,10 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, P = P + D * t; # ifdef __INTERSECTION_REFINE__ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -297,7 +300,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, } # endif /* __INTERSECTION_REFINE__ */ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h index 4e581df1870..579a9c4d200 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h @@ -160,10 +160,7 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS) if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { ray.t = kernel_data.integrator.ao_bounces_distance; - const int last_object = last_isect_object != OBJECT_NONE ? - last_isect_object : - kernel_tex_fetch(__prim_object, last_isect_prim); - const float object_ao_distance = kernel_tex_fetch(__objects, last_object).ao_distance; + const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance; if (object_ao_distance != 0.0f) { ray.t = object_ao_distance; } diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h index 3e4cc837e9b..234aa7cae63 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_background.h +++ b/intern/cycles/kernel/integrator/integrator_shade_background.h @@ -192,7 +192,8 @@ ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS, INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND; const int isect_prim = INTEGRATOR_STATE(isect, prim); - const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim); + const int isect_type = INTEGRATOR_STATE(isect, type); + const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type); const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) { diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h index 9490738404e..c309d20a046 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface.h @@ -577,7 +577,7 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) # ifdef __VOLUME__ /* Update volume stack if needed. */ if (kernel_data.integrator.use_volumes) { - const int object = intersection_get_object(kg, &ss_isect.hits[0]); + const int object = ss_isect.hits[0].object; const int object_flag = kernel_tex_fetch(__object_flag, object); if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index bf9b94c1753..464ecb183cb 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -18,11 +18,9 @@ # define KERNEL_TEX(type, name) #endif -/* bvh */ +/* BVH2, not used for OptiX or Embree. */ KERNEL_TEX(float4, __bvh_nodes) KERNEL_TEX(float4, __bvh_leaf_nodes) -KERNEL_TEX(float4, __prim_tri_verts) -KERNEL_TEX(uint, __prim_tri_index) KERNEL_TEX(uint, __prim_type) KERNEL_TEX(uint, __prim_visibility) KERNEL_TEX(uint, __prim_index) @@ -46,10 +44,12 @@ KERNEL_TEX(float4, __tri_vnormal) KERNEL_TEX(uint4, __tri_vindex) KERNEL_TEX(uint, __tri_patch) KERNEL_TEX(float2, __tri_patch_uv) +KERNEL_TEX(float4, __tri_verts) /* curves */ -KERNEL_TEX(float4, __curves) +KERNEL_TEX(KernelCurve, __curves) KERNEL_TEX(float4, __curve_keys) +KERNEL_TEX(KernelCurveSegment, __curve_segments) /* patches */ KERNEL_TEX(uint, __patches) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 22dde3537eb..4a72f45f1a2 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -1270,10 +1270,25 @@ typedef struct KernelObject { float ao_distance; - float pad1, pad2; + uint visibility; + int primitive_type; } KernelObject; static_assert_align(KernelObject, 16); +typedef struct KernelCurve { + int shader_id; + int first_key; + int num_keys; + int type; +} KernelCurve; +static_assert_align(KernelCurve, 16); + +typedef struct KernelCurveSegment { + int prim; + int type; +} KernelCurveSegment; +static_assert_align(KernelCurveSegment, 8); + typedef struct KernelSpotLight { float radius; float invarea; diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index 9d7ce202d49..19176087180 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -206,8 +206,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, # ifdef __OBJECT_MOTION__ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { float3 verts[3]; - motion_triangle_vertices( - kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts); + motion_triangle_vertices(kg, sd->object, isect.hits[hit].prim, sd->time, verts); hit_P = motion_triangle_refine_local( kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim, verts); } @@ -215,9 +214,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, /* Get geometric normal. */ float3 hit_Ng = isect.Ng[hit]; - int object = (isect.hits[hit].object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, isect.hits[hit].prim) : - isect.hits[hit].object; + int object = isect.hits[hit].object; int object_flag = kernel_tex_fetch(__object_flag, object); if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { hit_Ng = -hit_Ng; @@ -225,7 +222,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, /* Compute smooth normal. */ float3 N = hit_Ng; - int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim); + int prim = isect.hits[hit].prim; int shader = kernel_tex_fetch(__tri_shader, prim); if (shader & SHADER_SMOOTH_NORMAL) { |