diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-03-01 01:23:24 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-10-06 18:52:04 +0300 |
commit | 04857cc8efb385af5d8f40b655eeca41e2b73494 (patch) | |
tree | b16edec8a0e91fddfa050b2e8b747ca194c0b622 /intern | |
parent | 0fd0b0643a7a1c0334f39bddba4067d8fa8eede6 (diff) |
Cycles: fully decouple triangle and curve primitive storage from BVH2
Previously the storage here was optimized to avoid indirections in BVH2
traversal. This helps improve performance a bit, but makes performance
and memory usage of Embree and OptiX BVHs a bit worse also. It also adds
code complexity in other parts of the code.
Now decouple triangle and curve primitive storage from BVH2.
* Reduced peak memory usage on all devices
* Bit better performance for OptiX and Embree
* Bit worse performance for CUDA
* Simplified code:
** Intersection.prim/object now matches ShaderData.prim/object
** No more offset manipulation for mesh displacement before a BVH is built
** Remove primitive packing code and flags for Embree and OptiX
** Curve segments are now stored in a KernelCurve struct
* Also happens to fix a bug in baking with incorrect prim/object
Fixes T91968, T91770, T91902
Differential Revision: https://developer.blender.org/D12766
Diffstat (limited to 'intern')
32 files changed, 365 insertions, 671 deletions
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h index 94935c26f10..d9e2ad9526c 100644 --- a/intern/cycles/bvh/bvh.h +++ b/intern/cycles/bvh/bvh.h @@ -50,10 +50,6 @@ struct PackedBVH { array<int4> leaf_nodes; /* object index to BVH node index mapping for instances */ array<int> object_node; - /* Mapping from primitive index to index in triangle array. */ - array<uint> prim_tri_index; - /* Continuous storage of triangle vertices. */ - array<float4> prim_tri_verts; /* primitive type - triangle or strand */ array<int> prim_type; /* visibility visibilitys for primitives */ diff --git a/intern/cycles/bvh/bvh2.cpp b/intern/cycles/bvh/bvh2.cpp index 379ae9b25ff..4a90a1e8796 100644 --- a/intern/cycles/bvh/bvh2.cpp +++ b/intern/cycles/bvh/bvh2.cpp @@ -439,61 +439,20 @@ void BVH2::refit_primitives(int start, int end, BoundBox &bbox, uint &visibility /* Triangles */ -void BVH2::pack_triangle(int idx, float4 tri_verts[3]) -{ - int tob = pack.prim_object[idx]; - assert(tob >= 0 && tob < objects.size()); - const Mesh *mesh = static_cast<const Mesh *>(objects[tob]->get_geometry()); - - int tidx = pack.prim_index[idx]; - Mesh::Triangle t = mesh->get_triangle(tidx); - const float3 *vpos = &mesh->verts[0]; - float3 v0 = vpos[t.v[0]]; - float3 v1 = vpos[t.v[1]]; - float3 v2 = vpos[t.v[2]]; - - tri_verts[0] = float3_to_float4(v0); - tri_verts[1] = float3_to_float4(v1); - tri_verts[2] = float3_to_float4(v2); -} - void BVH2::pack_primitives() { const size_t tidx_size = pack.prim_index.size(); - size_t num_prim_triangles = 0; - /* Count number of triangles primitives in BVH. */ - for (unsigned int i = 0; i < tidx_size; i++) { - if ((pack.prim_index[i] != -1)) { - if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) { - ++num_prim_triangles; - } - } - } /* Reserve size for arrays. */ - pack.prim_tri_index.clear(); - pack.prim_tri_index.resize(tidx_size); - pack.prim_tri_verts.clear(); - pack.prim_tri_verts.resize(num_prim_triangles * 3); pack.prim_visibility.clear(); pack.prim_visibility.resize(tidx_size); /* Fill in all the arrays. */ - size_t prim_triangle_index = 0; for (unsigned int i = 0; i < tidx_size; i++) { if (pack.prim_index[i] != -1) { int tob = pack.prim_object[i]; Object *ob = objects[tob]; - if ((pack.prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) { - pack_triangle(i, (float4 *)&pack.prim_tri_verts[3 * prim_triangle_index]); - pack.prim_tri_index[i] = 3 * prim_triangle_index; - ++prim_triangle_index; - } - else { - pack.prim_tri_index[i] = -1; - } pack.prim_visibility[i] = ob->visibility_for_tracing(); } else { - pack.prim_tri_index[i] = -1; pack.prim_visibility[i] = 0; } } @@ -522,10 +481,8 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size) /* reserve */ size_t prim_index_size = pack.prim_index.size(); - size_t prim_tri_verts_size = pack.prim_tri_verts.size(); size_t pack_prim_index_offset = prim_index_size; - size_t pack_prim_tri_verts_offset = prim_tri_verts_size; size_t pack_nodes_offset = nodes_size; size_t pack_leaf_nodes_offset = leaf_nodes_size; size_t object_offset = 0; @@ -535,7 +492,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size) if (geom->need_build_bvh(params.bvh_layout)) { prim_index_size += bvh->pack.prim_index.size(); - prim_tri_verts_size += bvh->pack.prim_tri_verts.size(); nodes_size += bvh->pack.nodes.size(); leaf_nodes_size += bvh->pack.leaf_nodes.size(); } @@ -545,8 +501,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size) pack.prim_type.resize(prim_index_size); pack.prim_object.resize(prim_index_size); pack.prim_visibility.resize(prim_index_size); - pack.prim_tri_verts.resize(prim_tri_verts_size); - pack.prim_tri_index.resize(prim_index_size); pack.nodes.resize(nodes_size); pack.leaf_nodes.resize(leaf_nodes_size); pack.object_node.resize(objects.size()); @@ -559,8 +513,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size) int *pack_prim_type = (pack.prim_type.size()) ? &pack.prim_type[0] : NULL; int *pack_prim_object = (pack.prim_object.size()) ? &pack.prim_object[0] : NULL; uint *pack_prim_visibility = (pack.prim_visibility.size()) ? &pack.prim_visibility[0] : NULL; - float4 *pack_prim_tri_verts = (pack.prim_tri_verts.size()) ? &pack.prim_tri_verts[0] : NULL; - uint *pack_prim_tri_index = (pack.prim_tri_index.size()) ? &pack.prim_tri_index[0] : NULL; int4 *pack_nodes = (pack.nodes.size()) ? &pack.nodes[0] : NULL; int4 *pack_leaf_nodes = (pack.leaf_nodes.size()) ? &pack.leaf_nodes[0] : NULL; float2 *pack_prim_time = (pack.prim_time.size()) ? &pack.prim_time[0] : NULL; @@ -609,18 +561,14 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size) int *bvh_prim_index = &bvh->pack.prim_index[0]; int *bvh_prim_type = &bvh->pack.prim_type[0]; uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0]; - uint *bvh_prim_tri_index = &bvh->pack.prim_tri_index[0]; float2 *bvh_prim_time = bvh->pack.prim_time.size() ? &bvh->pack.prim_time[0] : NULL; for (size_t i = 0; i < bvh_prim_index_size; i++) { if (bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) { pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset; - pack_prim_tri_index[pack_prim_index_offset] = -1; } else { pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + geom_prim_offset; - pack_prim_tri_index[pack_prim_index_offset] = bvh_prim_tri_index[i] + - pack_prim_tri_verts_offset; } pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i]; @@ -633,15 +581,6 @@ void BVH2::pack_instances(size_t nodes_size, size_t leaf_nodes_size) } } - /* Merge triangle vertices data. */ - if (bvh->pack.prim_tri_verts.size()) { - const size_t prim_tri_size = bvh->pack.prim_tri_verts.size(); - memcpy(pack_prim_tri_verts + pack_prim_tri_verts_offset, - &bvh->pack.prim_tri_verts[0], - prim_tri_size * sizeof(float4)); - pack_prim_tri_verts_offset += prim_tri_size; - } - /* merge nodes */ if (bvh->pack.leaf_nodes.size()) { int4 *leaf_nodes_offset = &bvh->pack.leaf_nodes[0]; diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index d3497f3a8d8..025a103d6f8 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -67,8 +67,12 @@ BVHBuild::~BVHBuild() /* Adding References */ -void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *mesh, int i) +void BVHBuild::add_reference_triangles(BoundBox &root, + BoundBox ¢er, + Mesh *mesh, + int object_index) { + const PrimitiveType primitive_type = mesh->primitive_type(); const Attribute *attr_mP = NULL; if (mesh->has_motion_blur()) { attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); @@ -81,7 +85,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m BoundBox bounds = BoundBox::empty; t.bounds_grow(verts, bounds); if (bounds.valid() && t.valid(verts)) { - references.push_back(BVHReference(bounds, j, i, PRIMITIVE_TRIANGLE)); + references.push_back(BVHReference(bounds, j, object_index, primitive_type)); root.grow(bounds); center.grow(bounds.center2()); } @@ -101,7 +105,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m t.bounds_grow(vert_steps + step * num_verts, bounds); } if (bounds.valid()) { - references.push_back(BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE)); + references.push_back(BVHReference(bounds, j, object_index, primitive_type)); root.grow(bounds); center.grow(bounds.center2()); } @@ -140,7 +144,7 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m if (bounds.valid()) { const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1; references.push_back( - BVHReference(bounds, j, i, PRIMITIVE_MOTION_TRIANGLE, prev_time, curr_time)); + BVHReference(bounds, j, object_index, primitive_type, prev_time, curr_time)); root.grow(bounds); center.grow(bounds.center2()); } @@ -153,18 +157,14 @@ void BVHBuild::add_reference_triangles(BoundBox &root, BoundBox ¢er, Mesh *m } } -void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int i) +void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair, int object_index) { const Attribute *curve_attr_mP = NULL; if (hair->has_motion_blur()) { curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); } - const PrimitiveType primitive_type = - (curve_attr_mP != NULL) ? - ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON : - PRIMITIVE_MOTION_CURVE_THICK) : - ((hair->curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK); + const PrimitiveType primitive_type = hair->primitive_type(); const size_t num_curves = hair->num_curves(); for (uint j = 0; j < num_curves; j++) { @@ -177,7 +177,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair curve.bounds_grow(k, &hair->get_curve_keys()[0], curve_radius, bounds); if (bounds.valid()) { int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k); - references.push_back(BVHReference(bounds, j, i, packed_type)); + references.push_back(BVHReference(bounds, j, object_index, packed_type)); root.grow(bounds); center.grow(bounds.center2()); } @@ -198,7 +198,7 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair } if (bounds.valid()) { int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k); - references.push_back(BVHReference(bounds, j, i, packed_type)); + references.push_back(BVHReference(bounds, j, object_index, packed_type)); root.grow(bounds); center.grow(bounds.center2()); } @@ -254,7 +254,8 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair if (bounds.valid()) { const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1; int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k); - references.push_back(BVHReference(bounds, j, i, packed_type, prev_time, curr_time)); + references.push_back( + BVHReference(bounds, j, object_index, packed_type, prev_time, curr_time)); root.grow(bounds); center.grow(bounds.center2()); } @@ -268,15 +269,18 @@ void BVHBuild::add_reference_curves(BoundBox &root, BoundBox ¢er, Hair *hair } } -void BVHBuild::add_reference_geometry(BoundBox &root, BoundBox ¢er, Geometry *geom, int i) +void BVHBuild::add_reference_geometry(BoundBox &root, + BoundBox ¢er, + Geometry *geom, + int object_index) { if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { Mesh *mesh = static_cast<Mesh *>(geom); - add_reference_triangles(root, center, mesh, i); + add_reference_triangles(root, center, mesh, object_index); } else if (geom->geometry_type == Geometry::HAIR) { Hair *hair = static_cast<Hair *>(geom); - add_reference_curves(root, center, hair, i); + add_reference_curves(root, center, hair, object_index); } } diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp index eebc1e1e547..8c1ca1f5b38 100644 --- a/intern/cycles/bvh/bvh_embree.cpp +++ b/intern/cycles/bvh/bvh_embree.cpp @@ -136,10 +136,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) } else { kernel_embree_convert_hit(kg, ray, hit, ¤t_isect); - int object = (current_isect.object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, current_isect.prim) : - current_isect.object; - if (ctx->local_object_id != object) { + if (ctx->local_object_id != current_isect.object) { /* This tells Embree to continue tracing. */ *args->valid = 0; break; @@ -206,9 +203,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args) ++ctx->num_hits; *isect = current_isect; /* Only primitives from volume object. */ - uint tri_object = (isect->object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, isect->prim) : - isect->object; + uint tri_object = isect->object; int object_flag = kernel_tex_fetch(__object_flag, tri_object); if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { --ctx->num_hits; @@ -437,7 +432,7 @@ void BVHEmbree::add_instance(Object *ob, int i) void BVHEmbree::add_triangles(const Object *ob, const Mesh *mesh, int i) { - size_t prim_offset = mesh->optix_prim_offset; + size_t prim_offset = mesh->prim_offset; const Attribute *attr_mP = NULL; size_t num_motion_steps = 1; @@ -606,7 +601,7 @@ void BVHEmbree::set_curve_vertex_buffer(RTCGeometry geom_id, const Hair *hair, c void BVHEmbree::add_curves(const Object *ob, const Hair *hair, int i) { - size_t prim_offset = hair->optix_prim_offset; + size_t prim_offset = hair->curve_segment_offset; const Attribute *attr_mP = NULL; size_t num_motion_steps = 1; @@ -683,7 +678,7 @@ void BVHEmbree::refit(Progress &progress) if (mesh->num_triangles() > 0) { RTCGeometry geom = rtcGetGeometry(scene, geom_id); set_tri_vertex_buffer(geom, mesh, true); - rtcSetGeometryUserData(geom, (void *)mesh->optix_prim_offset); + rtcSetGeometryUserData(geom, (void *)mesh->prim_offset); rtcCommitGeometry(geom); } } @@ -692,7 +687,7 @@ void BVHEmbree::refit(Progress &progress) if (hair->num_curves() > 0) { RTCGeometry geom = rtcGetGeometry(scene, geom_id + 1); set_curve_vertex_buffer(geom, hair, true); - rtcSetGeometryUserData(geom, (void *)hair->optix_prim_offset); + rtcSetGeometryUserData(geom, (void *)hair->curve_segment_offset); rtcCommitGeometry(geom); } } diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index f9a15553aa9..89f4b696b4c 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -1252,7 +1252,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer; build_input.curveArray.indexStrideInBytes = sizeof(int); build_input.curveArray.flag = build_flags; - build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset; + build_input.curveArray.primitiveIndexOffset = hair->curve_segment_offset; } else { /* Disable visibility test any-hit program, since it is already checked during @@ -1265,7 +1265,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb); build_input.customPrimitiveArray.flags = &build_flags; build_input.customPrimitiveArray.numSbtRecords = 1; - build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset; + build_input.customPrimitiveArray.primitiveIndexOffset = hair->curve_segment_offset; } if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) { @@ -1334,7 +1334,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) * buffers for that purpose. OptiX does not allow this to be zero though, so just pass in * one and rely on that having the same meaning in this case. */ build_input.triangleArray.numSbtRecords = 1; - build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset; + build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset; if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) { progress.set_error("Failed to build OptiX acceleration structure"); @@ -1401,8 +1401,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) instance.transform[5] = 1.0f; instance.transform[10] = 1.0f; - /* Set user instance ID to object index (but leave low bit blank). */ - instance.instanceId = ob->get_device_index() << 1; + /* Set user instance ID to object index. */ + instance.instanceId = ob->get_device_index(); /* Add some of the object visibility bits to the mask. * __prim_visibility contains the combined visibility bits of all instances, so is not @@ -1514,9 +1514,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) else { /* Disable instance transform if geometry already has it applied to vertex data. */ instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; - /* Non-instanced objects read ID from 'prim_object', so distinguish - * them from instanced objects with the low bit set. */ - instance.instanceId |= 1; } } } diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h index 092d770dcac..d3db6295ea5 100644 --- a/intern/cycles/kernel/bvh/bvh_embree.h +++ b/intern/cycles/kernel/bvh/bvh_embree.h @@ -106,9 +106,6 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg, const RTCHit *hit, Intersection *isect) { - bool is_hair = hit->geomID & 1; - isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u; - isect->v = is_hair ? hit->v : hit->u; isect->t = ray->tfar; isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { @@ -121,27 +118,37 @@ ccl_device_inline void kernel_embree_convert_hit(const KernelGlobals *kg, else { isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - isect->object = OBJECT_NONE; + isect->object = hit->geomID / 2; + } + + const bool is_hair = hit->geomID & 1; + if (is_hair) { + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, isect->prim); + isect->type = segment.type; + isect->prim = segment.prim; + isect->u = hit->u; + isect->v = hit->v; + } + else { + isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type; + isect->u = 1.0f - hit->v - hit->u; + isect->v = hit->u; } - isect->type = kernel_tex_fetch(__prim_type, isect->prim); } -ccl_device_inline void kernel_embree_convert_sss_hit(const KernelGlobals *kg, - const RTCRay *ray, - const RTCHit *hit, - Intersection *isect, - int local_object_id) +ccl_device_inline void kernel_embree_convert_sss_hit( + const KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object) { isect->u = 1.0f - hit->v - hit->u; isect->v = hit->u; isect->t = ray->tfar; isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( - rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2)); + rtcGetGeometry(kernel_data.bvh.scene, object * 2)); isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)); - isect->object = local_object_id; - isect->type = kernel_tex_fetch(__prim_type, isect->prim); + isect->object = object; + isect->type = kernel_tex_fetch(__objects, object).primitive_type; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index 0ae36fccf9b..82c7c1a8a6c 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -130,7 +130,6 @@ ccl_device_inline if (prim_addr >= 0) { const int prim_addr2 = __float_as_int(leaf.y); const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; /* pop */ node_addr = traversal_stack[stack_ptr]; @@ -138,14 +137,15 @@ ccl_device_inline /* primitive intersection */ while (prim_addr < prim_addr2) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); + kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == + (type & PRIMITIVE_ALL)); bool hit; /* todo: specialized intersect functions which don't fill in * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? * might give a few % performance improvement */ - switch (p_type) { + switch (type & PRIMITIVE_ALL) { case PRIMITIVE_TRIANGLE: { hit = triangle_intersect( kg, isect, P, dir, isect_t, visibility, object, prim_addr); @@ -163,17 +163,20 @@ ccl_device_inline case PRIMITIVE_MOTION_CURVE_THICK: case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - hit = curve_intersect(kg, - isect, - P, - dir, - isect_t, - visibility, - object, - prim_addr, - ray->time, - curve_type); + if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + if (ray->time < prim_time.x || ray->time > prim_time.y) { + hit = false; + break; + } + } + + const int curve_object = kernel_tex_fetch(__prim_object, prim_addr); + const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr); + hit = curve_intersect( + kg, isect, P, dir, isect_t, curve_object, curve_prim, ray->time, curve_type); + break; } #endif diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index a26d8c514f3..2feff593c10 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -165,18 +165,18 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(const KernelGlobals *kg, case PRIMITIVE_CURVE_RIBBON: case PRIMITIVE_MOTION_CURVE_RIBBON: { for (; prim_addr < prim_addr2; prim_addr++) { - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); - const bool hit = curve_intersect(kg, - isect, - P, - dir, - isect->t, - visibility, - object, - prim_addr, - ray->time, - curve_type); + if ((type & PRIMITIVE_ALL_MOTION) && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, prim_addr); + if (ray->time < prim_time.x || ray->time > prim_time.y) { + continue; + } + } + + const int curve_object = kernel_tex_fetch(__prim_object, prim_addr); + const int curve_prim = kernel_tex_fetch(__prim_index, prim_addr); + const int curve_type = kernel_tex_fetch(__prim_type, prim_addr); + const bool hit = curve_intersect( + kg, isect, P, dir, isect->t, curve_object, curve_prim, ray->time, curve_type); if (hit) { /* shadow ray early termination */ if (visibility & PATH_RAY_SHADOW_OPAQUE) diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h index 21384457b16..9f188a93e2c 100644 --- a/intern/cycles/kernel/bvh/bvh_util.h +++ b/intern/cycles/kernel/bvh/bvh_util.h @@ -118,19 +118,18 @@ ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits) ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *ccl_restrict kg, const Intersection *ccl_restrict isect) { - const int prim = kernel_tex_fetch(__prim_index, isect->prim); + const int prim = isect->prim; int shader = 0; #ifdef __HAIR__ - if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) + if (isect->type & PRIMITIVE_ALL_TRIANGLE) #endif { shader = kernel_tex_fetch(__tri_shader, prim); } #ifdef __HAIR__ else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); + shader = kernel_tex_fetch(__curves, prim).shader_id; } #endif @@ -138,21 +137,19 @@ ccl_device_forceinline int intersection_get_shader_flags(const KernelGlobals *cc } ccl_device_forceinline int intersection_get_shader_from_isect_prim( - const KernelGlobals *ccl_restrict kg, const int isect_prim) + const KernelGlobals *ccl_restrict kg, const int prim, const int isect_type) { - const int prim = kernel_tex_fetch(__prim_index, isect_prim); int shader = 0; #ifdef __HAIR__ - if (kernel_tex_fetch(__prim_type, isect_prim) & PRIMITIVE_ALL_TRIANGLE) + if (isect_type & PRIMITIVE_ALL_TRIANGLE) #endif { shader = kernel_tex_fetch(__tri_shader, prim); } #ifdef __HAIR__ else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); + shader = kernel_tex_fetch(__curves, prim).shader_id; } #endif @@ -162,25 +159,13 @@ ccl_device_forceinline int intersection_get_shader_from_isect_prim( ccl_device_forceinline int intersection_get_shader(const KernelGlobals *ccl_restrict kg, const Intersection *ccl_restrict isect) { - return intersection_get_shader_from_isect_prim(kg, isect->prim); -} - -ccl_device_forceinline int intersection_get_object(const KernelGlobals *ccl_restrict kg, - const Intersection *ccl_restrict isect) -{ - if (isect->object != OBJECT_NONE) { - return isect->object; - } - - return kernel_tex_fetch(__prim_object, isect->prim); + return intersection_get_shader_from_isect_prim(kg, isect->prim, isect->type); } ccl_device_forceinline int intersection_get_object_flags(const KernelGlobals *ccl_restrict kg, const Intersection *ccl_restrict isect) { - const int object = intersection_get_object(kg, isect); - - return kernel_tex_fetch(__object_flag, object); + return kernel_tex_fetch(__object_flag, isect->object); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 7a79e0c4823..736f30d93ef 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -41,22 +41,15 @@ template<typename T> ccl_device_forceinline T *get_payload_ptr_2() return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2()); } -template<bool always = false> ccl_device_forceinline uint get_object_id() +ccl_device_forceinline int get_object_id() { #ifdef __OBJECT_MOTION__ - /* Always get the the instance ID from the TLAS. + /* Always get the the instance ID from the TLAS * There might be a motion transform node between TLAS and BLAS which does not have one. */ - uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); + return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); #else - uint object = optixGetInstanceId(); + return optixGetInstanceId(); #endif - /* Choose between always returning object ID or only for instances. */ - if (always || (object & 1) == 0) - /* Can just remove the low bit since instance always contains object ID. */ - return object >> 1; - else - /* Set to OBJECT_NONE if this is not an instanced object. */ - return OBJECT_NONE; } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest() @@ -108,7 +101,7 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() #endif #ifdef __BVH_LOCAL__ - const uint object = get_object_id<true>(); + const int object = get_object_id(); if (object != optixGetPayload_4() /* local_object */) { /* Only intersect with matching object. */ return optixIgnoreIntersection(); @@ -152,21 +145,23 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() local_isect->num_hits = 1; } + const int prim = optixGetPrimitiveIndex(); + Intersection *isect = &local_isect->hits[hit]; isect->t = optixGetRayTmax(); - isect->prim = optixGetPrimitiveIndex(); + isect->prim = prim; isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__prim_type, isect->prim); + isect->type = kernel_tex_fetch(__objects, isect->object).primitive_type; const float2 barycentrics = optixGetTriangleBarycentrics(); isect->u = 1.0f - barycentrics.y - barycentrics.x; isect->v = barycentrics.x; /* Record geometric normal. */ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)); - const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)); - const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)); + const float3 tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)); + const float3 tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); /* Continue tracing (without this the trace call would return after the first hit). */ @@ -179,25 +174,32 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() #ifdef __SHADOW_RECORD_ALL__ bool ignore_intersection = false; - const uint prim = optixGetPrimitiveIndex(); + int prim = optixGetPrimitiveIndex(); + const uint object = get_object_id(); # ifdef __VISIBILITY_FLAG__ const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { ignore_intersection = true; } # endif float u = 0.0f, v = 0.0f; + int type = 0; if (optixIsTriangleHit()) { const float2 barycentrics = optixGetTriangleBarycentrics(); u = 1.0f - barycentrics.y - barycentrics.x; v = barycentrics.x; + type = kernel_tex_fetch(__objects, object).primitive_type; } # ifdef __HAIR__ else { u = __uint_as_float(optixGetAttribute_0()); v = __uint_as_float(optixGetAttribute_1()); + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); + type = segment.type; + prim = segment.prim; + /* Filter out curve endcaps. */ if (u == 0.0f || u == 1.0f) { ignore_intersection = true; @@ -245,8 +247,8 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() isect->v = v; isect->t = optixGetRayTmax(); isect->prim = prim; - isect->object = get_object_id(); - isect->type = kernel_tex_fetch(__prim_type, prim); + isect->object = object; + isect->type = type; # ifdef __TRANSPARENT_SHADOWS__ /* Detect if this surface has a shader with transparent shadows. */ @@ -274,15 +276,14 @@ extern "C" __global__ void __anyhit__kernel_optix_volume_test() } #endif + const uint object = get_object_id(); #ifdef __VISIBILITY_FLAG__ - const uint prim = optixGetPrimitiveIndex(); const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { return optixIgnoreIntersection(); } #endif - const uint object = get_object_id<true>(); if ((kernel_tex_fetch(__object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) { return optixIgnoreIntersection(); } @@ -301,9 +302,9 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test() #endif #ifdef __VISIBILITY_FLAG__ - const uint prim = optixGetPrimitiveIndex(); + const uint object = get_object_id(); const uint visibility = optixGetPayload_4(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { return optixIgnoreIntersection(); } @@ -316,28 +317,39 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test() extern "C" __global__ void __closesthit__kernel_optix_hit() { + const int object = get_object_id(); + const int prim = optixGetPrimitiveIndex(); + optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */ - optixSetPayload_3(optixGetPrimitiveIndex()); - optixSetPayload_4(get_object_id()); - /* Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index. */ - optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex())); + optixSetPayload_4(object); if (optixIsTriangleHit()) { const float2 barycentrics = optixGetTriangleBarycentrics(); optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); optixSetPayload_2(__float_as_uint(barycentrics.x)); + optixSetPayload_3(prim); + optixSetPayload_5(kernel_tex_fetch(__objects, object).primitive_type); } else { + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, prim); optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */ optixSetPayload_2(optixGetAttribute_1()); + optixSetPayload_3(segment.prim); + optixSetPayload_5(segment.type); } } #ifdef __HAIR__ -ccl_device_inline void optix_intersection_curve(const uint prim, const uint type) +ccl_device_inline void optix_intersection_curve(const int prim, const int type) { - const uint object = get_object_id<true>(); + const int object = get_object_id(); + +# ifdef __VISIBILITY_FLAG__ const uint visibility = optixGetPayload_4(); + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + return; + } +# endif float3 P = optixGetObjectRayOrigin(); float3 dir = optixGetObjectRayDirection(); @@ -358,7 +370,7 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type if (isect.t != FLT_MAX) isect.t *= len; - if (curve_intersect(NULL, &isect, P, dir, isect.t, visibility, object, prim, time, type)) { + if (curve_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL, __float_as_int(isect.u), /* Attribute_0 */ @@ -368,9 +380,9 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type extern "C" __global__ void __intersection__curve_ribbon() { - const uint prim = optixGetPrimitiveIndex(); - const uint type = kernel_tex_fetch(__prim_type, prim); - + const KernelCurveSegment segment = kernel_tex_fetch(__curve_segments, optixGetPrimitiveIndex()); + const int prim = segment.prim; + const int type = segment.type; if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { optix_intersection_curve(prim, type); } diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index a827a67ce7a..811558edae9 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -34,8 +34,8 @@ ccl_device float curve_attribute_float(const KernelGlobals *kg, float *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); @@ -76,8 +76,8 @@ ccl_device float2 curve_attribute_float2(const KernelGlobals *kg, float2 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); @@ -122,8 +122,8 @@ ccl_device float3 curve_attribute_float3(const KernelGlobals *kg, float3 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); @@ -164,8 +164,8 @@ ccl_device float4 curve_attribute_float4(const KernelGlobals *kg, float4 *dy) { if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 f0 = kernel_tex_fetch(__attributes_float3, desc.offset + k0); @@ -206,8 +206,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd) float r = 0.0f; if (sd->type & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; @@ -231,8 +231,8 @@ ccl_device float curve_thickness(const KernelGlobals *kg, const ShaderData *sd) ccl_device float3 curve_motion_center_location(const KernelGlobals *kg, const ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + KernelCurve curve = kernel_tex_fetch(__curves, sd->prim); + int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; float4 P_curve[2]; diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index a068e93790a..30addb9616d 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -630,33 +630,19 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, const float3 P, const float3 dir, const float tmax, - uint visibility, int object, - int curveAddr, + int prim, float time, int type) { const bool is_motion = (type & PRIMITIVE_ALL_MOTION); -# ifndef __KERNEL_OPTIX__ /* See OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */ - if (is_motion && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); - if (time < prim_time.x || time > prim_time.y) { - return false; - } - } -# endif + KernelCurve kcurve = kernel_tex_fetch(__curves, prim); - int segment = PRIMITIVE_UNPACK_SEGMENT(type); - int prim = kernel_tex_fetch(__prim_index, curveAddr); - - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; + int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type); int k1 = k0 + 1; - - int ka = max(k0 - 1, __float_as_int(v00.x)); - int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + int ka = max(k0 - 1, kcurve.first_key); + int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1); float4 curve[4]; if (!is_motion) { @@ -666,21 +652,14 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, curve[3] = kernel_tex_fetch(__curve_keys, kb); } else { - int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; - motion_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, curve); + motion_curve_keys(kg, object, prim, time, ka, k0, k1, kb, curve); } -# ifdef __VISIBILITY_FLAG__ - if (!(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)) { - return false; - } -# endif - if (type & (PRIMITIVE_CURVE_RIBBON | PRIMITIVE_MOTION_CURVE_RIBBON)) { /* todo: adaptive number of subdivisions could help performance here. */ const int subdivisions = kernel_data.bvh.curve_subdivisions; if (ribbon_intersect(P, dir, tmax, subdivisions, curve, isect)) { - isect->prim = curveAddr; + isect->prim = prim; isect->object = object; isect->type = type; return true; @@ -690,7 +669,7 @@ ccl_device_forceinline bool curve_intersect(const KernelGlobals *kg, } else { if (curve_intersect_recursive(P, dir, tmax, curve, isect)) { - isect->prim = curveAddr; + isect->prim = prim; isect->object = object; isect->type = type; return true; @@ -708,7 +687,7 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, const int isect_object, const int isect_prim) { - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -716,14 +695,12 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, D = safe_normalize_len(D, &t); } - int prim = kernel_tex_fetch(__prim_index, isect_prim); - float4 v00 = kernel_tex_fetch(__curves, prim); + KernelCurve kcurve = kernel_tex_fetch(__curves, isect_prim); - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); int k1 = k0 + 1; - - int ka = max(k0 - 1, __float_as_int(v00.x)); - int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + int ka = max(k0 - 1, kcurve.first_key); + int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1); float4 P_curve[4]; @@ -780,15 +757,13 @@ ccl_device_inline void curve_shader_setup(const KernelGlobals *kg, sd->dPdv = cross(dPdu, sd->Ng); # endif - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } sd->P = P; - - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - sd->shader = __float_as_int(curvedata.z); + sd->shader = kernel_tex_fetch(__curves, sd->prim).shader_id; } #endif diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 239bd0a37b2..b7f182090aa 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -72,9 +72,9 @@ ccl_device_inline void motion_triangle_verts_for_step(const KernelGlobals *kg, { if (step == numsteps) { /* center step: regular vertex location */ - verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); } else { /* center step not store in this array */ diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index ec7e4b07d76..6fb9756ff92 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -44,7 +44,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg, float3 verts[3]) { #ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { if (UNLIKELY(t == 0.0f)) { return P; } @@ -70,7 +70,7 @@ ccl_device_inline float3 motion_triangle_refine(const KernelGlobals *kg, /* Compute refined position. */ P = P + D * rt; - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -106,7 +106,7 @@ ccl_device_inline return motion_triangle_refine(kg, sd, P, D, t, isect_object, isect_prim, verts); # else # ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -128,7 +128,7 @@ ccl_device_inline P = P + D * rt; - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -186,8 +186,9 @@ ccl_device_inline bool motion_triangle_intersect(const KernelGlobals *kg, isect->t = t; isect->u = u; isect->v = v; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : + object; isect->type = PRIMITIVE_MOTION_TRIANGLE; return true; } @@ -288,8 +289,8 @@ ccl_device_inline bool motion_triangle_intersect_local(const KernelGlobals *kg, isect->t = t; isect->u = u; isect->v = v; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = local_object; isect->type = PRIMITIVE_MOTION_TRIANGLE; /* Record geometric normal. */ diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h index 5dc03940238..f78d194359d 100644 --- a/intern/cycles/kernel/geom/geom_shader_data.h +++ b/intern/cycles/kernel/geom/geom_shader_data.h @@ -52,10 +52,9 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k sd->v = isect->v; sd->ray_length = isect->t; sd->type = isect->type; - sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : - isect->object; + sd->object = isect->object; sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); - sd->prim = kernel_tex_fetch(__prim_index, isect->prim); + sd->prim = isect->prim; sd->lamp = LAMP_NONE; sd->flag = 0; diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 910fb122c6d..8edba46fd39 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -29,9 +29,9 @@ ccl_device_inline float3 triangle_normal(const KernelGlobals *kg, ShaderData *sd { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + const float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + const float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + const float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* return normal */ if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { @@ -54,9 +54,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg, { /* load triangle vertices */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* compute point */ float t = 1.0f - u - v; *P = (u * v0 + v * v1 + t * v2); @@ -78,9 +78,9 @@ ccl_device_inline void triangle_point_normal(const KernelGlobals *kg, ccl_device_inline void triangle_vertices(const KernelGlobals *kg, int prim, float3 P[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); } /* Triangle vertex locations and vertex normals */ @@ -91,9 +91,9 @@ ccl_device_inline void triangle_vertices_and_normals(const KernelGlobals *kg, float3 N[3]) { const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); N[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); N[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); N[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); @@ -145,9 +145,9 @@ ccl_device_inline void triangle_dPdudv(const KernelGlobals *kg, { /* fetch triangle vertex coordinates */ const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); - const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); - const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + const float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 0)); + const float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 1)); + const float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex.w + 2)); /* compute derivatives of P w.r.t. uv */ *dPdu = (p0 - p2); diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 30b77ebd2eb..b784cc75d08 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -35,13 +35,14 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg, int object, int prim_addr) { - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; #else - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); #endif float t, u, v; if (ray_triangle_intersect(P, @@ -64,8 +65,9 @@ ccl_device_inline bool triangle_intersect(const KernelGlobals *kg, if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif { - isect->prim = prim_addr; - isect->object = object; + isect->object = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : + object; + isect->prim = prim; isect->type = PRIMITIVE_TRIANGLE; isect->u = u; isect->v = v; @@ -102,13 +104,14 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, } } - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const int prim = kernel_tex_fetch(__prim_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, prim).w; # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__tri_verts.data[tri_vindex]; # else - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); # endif float t, u, v; if (!ray_triangle_intersect(P, @@ -167,8 +170,8 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, /* Record intersection. */ Intersection *isect = &local_isect->hits[hit]; - isect->prim = prim_addr; - isect->object = object; + isect->prim = prim; + isect->object = local_object; isect->type = PRIMITIVE_TRIANGLE; isect->u = u; isect->v = v; @@ -176,9 +179,9 @@ ccl_device_inline bool triangle_intersect_local(const KernelGlobals *kg, /* Record geometric normal. */ # if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__tri_verts, tri_vindex + 2)); # endif local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); @@ -206,7 +209,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, const int isect_prim) { #ifdef __INTERSECTION_REFINE__ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { if (UNLIKELY(t == 0.0f)) { return P; } @@ -219,10 +222,10 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, P = P + D * t; - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -239,7 +242,7 @@ ccl_device_inline float3 triangle_refine(const KernelGlobals *kg, P = P + D * rt; } - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } @@ -265,7 +268,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, /* t is always in world space with OptiX. */ return triangle_refine(kg, sd, P, D, t, isect_object, isect_prim); #else - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_inverse_transform(kg, sd); P = transform_point(&tfm, P); @@ -276,10 +279,10 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, P = P + D * t; # ifdef __INTERSECTION_REFINE__ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect_prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + const uint tri_vindex = kernel_tex_fetch(__tri_vindex, isect_prim).w; + const float4 tri_a = kernel_tex_fetch(__tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__tri_verts, tri_vindex + 2); float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); @@ -297,7 +300,7 @@ ccl_device_inline float3 triangle_refine_local(const KernelGlobals *kg, } # endif /* __INTERSECTION_REFINE__ */ - if (isect_object != OBJECT_NONE) { + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { const Transform tfm = object_get_transform(kg, sd); P = transform_point(&tfm, P); } diff --git a/intern/cycles/kernel/integrator/integrator_intersect_closest.h b/intern/cycles/kernel/integrator/integrator_intersect_closest.h index 4e581df1870..579a9c4d200 100644 --- a/intern/cycles/kernel/integrator/integrator_intersect_closest.h +++ b/intern/cycles/kernel/integrator/integrator_intersect_closest.h @@ -160,10 +160,7 @@ ccl_device void integrator_intersect_closest(INTEGRATOR_STATE_ARGS) if (path_state_ao_bounce(INTEGRATOR_STATE_PASS)) { ray.t = kernel_data.integrator.ao_bounces_distance; - const int last_object = last_isect_object != OBJECT_NONE ? - last_isect_object : - kernel_tex_fetch(__prim_object, last_isect_prim); - const float object_ao_distance = kernel_tex_fetch(__objects, last_object).ao_distance; + const float object_ao_distance = kernel_tex_fetch(__objects, last_isect_object).ao_distance; if (object_ao_distance != 0.0f) { ray.t = object_ao_distance; } diff --git a/intern/cycles/kernel/integrator/integrator_shade_background.h b/intern/cycles/kernel/integrator/integrator_shade_background.h index 3e4cc837e9b..234aa7cae63 100644 --- a/intern/cycles/kernel/integrator/integrator_shade_background.h +++ b/intern/cycles/kernel/integrator/integrator_shade_background.h @@ -192,7 +192,8 @@ ccl_device void integrator_shade_background(INTEGRATOR_STATE_ARGS, INTEGRATOR_STATE_WRITE(path, flag) &= ~PATH_RAY_SHADOW_CATCHER_BACKGROUND; const int isect_prim = INTEGRATOR_STATE(isect, prim); - const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim); + const int isect_type = INTEGRATOR_STATE(isect, type); + const int shader = intersection_get_shader_from_isect_prim(kg, isect_prim, isect_type); const int shader_flags = kernel_tex_fetch(__shaders, shader).flags; if ((shader_flags & SD_HAS_RAYTRACE) || (kernel_data.film.pass_ao != PASS_UNUSED)) { diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h index 9490738404e..c309d20a046 100644 --- a/intern/cycles/kernel/integrator/integrator_subsurface.h +++ b/intern/cycles/kernel/integrator/integrator_subsurface.h @@ -577,7 +577,7 @@ ccl_device_inline bool subsurface_scatter(INTEGRATOR_STATE_ARGS) # ifdef __VOLUME__ /* Update volume stack if needed. */ if (kernel_data.integrator.use_volumes) { - const int object = intersection_get_object(kg, &ss_isect.hits[0]); + const int object = ss_isect.hits[0].object; const int object_flag = kernel_tex_fetch(__object_flag, object); if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) { diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index bf9b94c1753..464ecb183cb 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -18,11 +18,9 @@ # define KERNEL_TEX(type, name) #endif -/* bvh */ +/* BVH2, not used for OptiX or Embree. */ KERNEL_TEX(float4, __bvh_nodes) KERNEL_TEX(float4, __bvh_leaf_nodes) -KERNEL_TEX(float4, __prim_tri_verts) -KERNEL_TEX(uint, __prim_tri_index) KERNEL_TEX(uint, __prim_type) KERNEL_TEX(uint, __prim_visibility) KERNEL_TEX(uint, __prim_index) @@ -46,10 +44,12 @@ KERNEL_TEX(float4, __tri_vnormal) KERNEL_TEX(uint4, __tri_vindex) KERNEL_TEX(uint, __tri_patch) KERNEL_TEX(float2, __tri_patch_uv) +KERNEL_TEX(float4, __tri_verts) /* curves */ -KERNEL_TEX(float4, __curves) +KERNEL_TEX(KernelCurve, __curves) KERNEL_TEX(float4, __curve_keys) +KERNEL_TEX(KernelCurveSegment, __curve_segments) /* patches */ KERNEL_TEX(uint, __patches) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 22dde3537eb..4a72f45f1a2 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -1270,10 +1270,25 @@ typedef struct KernelObject { float ao_distance; - float pad1, pad2; + uint visibility; + int primitive_type; } KernelObject; static_assert_align(KernelObject, 16); +typedef struct KernelCurve { + int shader_id; + int first_key; + int num_keys; + int type; +} KernelCurve; +static_assert_align(KernelCurve, 16); + +typedef struct KernelCurveSegment { + int prim; + int type; +} KernelCurveSegment; +static_assert_align(KernelCurveSegment, 8); + typedef struct KernelSpotLight { float radius; float invarea; diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index 9d7ce202d49..19176087180 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -206,8 +206,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, # ifdef __OBJECT_MOTION__ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { float3 verts[3]; - motion_triangle_vertices( - kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts); + motion_triangle_vertices(kg, sd->object, isect.hits[hit].prim, sd->time, verts); hit_P = motion_triangle_refine_local( kg, sd, ray->P, ray->D, ray->t, isect.hits[hit].object, isect.hits[hit].prim, verts); } @@ -215,9 +214,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, /* Get geometric normal. */ float3 hit_Ng = isect.Ng[hit]; - int object = (isect.hits[hit].object == OBJECT_NONE) ? - kernel_tex_fetch(__prim_object, isect.hits[hit].prim) : - isect.hits[hit].object; + int object = isect.hits[hit].object; int object_flag = kernel_tex_fetch(__object_flag, object); if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { hit_Ng = -hit_Ng; @@ -225,7 +222,7 @@ ccl_device float3 svm_bevel(INTEGRATOR_STATE_CONST_ARGS, /* Compute smooth normal. */ float3 N = hit_Ng; - int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim); + int prim = isect.hits[hit].prim; int shader = kernel_tex_fetch(__tri_shader, prim); if (shader & SHADER_SMOOTH_NORMAL) { diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp index 4de458de271..49b5f9e27ee 100644 --- a/intern/cycles/render/geometry.cpp +++ b/intern/cycles/render/geometry.cpp @@ -46,12 +46,6 @@ CCL_NAMESPACE_BEGIN /* Geometry */ -PackFlags operator|=(PackFlags &pack_flags, uint32_t value) -{ - pack_flags = (PackFlags)((uint32_t)pack_flags | value); - return pack_flags; -} - NODE_ABSTRACT_DEFINE(Geometry) { NodeType *type = NodeType::add("geometry_base", NULL); @@ -79,7 +73,6 @@ Geometry::Geometry(const NodeType *node_type, const Type type) bvh = NULL; attr_map_offset = 0; - optix_prim_offset = 0; prim_offset = 0; } @@ -707,9 +700,9 @@ void GeometryManager::update_attribute_element_offset(Geometry *geom, if (element == ATTR_ELEMENT_CURVE) offset -= hair->prim_offset; else if (element == ATTR_ELEMENT_CURVE_KEY) - offset -= hair->curvekey_offset; + offset -= hair->curve_key_offset; else if (element == ATTR_ELEMENT_CURVE_KEY_MOTION) - offset -= hair->curvekey_offset; + offset -= hair->curve_key_offset; } } else { @@ -972,28 +965,22 @@ void GeometryManager::mesh_calc_offset(Scene *scene, BVHLayout bvh_layout) size_t vert_size = 0; size_t tri_size = 0; - size_t curve_key_size = 0; size_t curve_size = 0; + size_t curve_key_size = 0; + size_t curve_segment_size = 0; size_t patch_size = 0; size_t face_size = 0; size_t corner_size = 0; - size_t optix_prim_size = 0; - foreach (Geometry *geom, scene->geometry) { - if (geom->optix_prim_offset != optix_prim_size) { - /* Need to rebuild BVH in OptiX, since refit only allows modified mesh data there */ - const bool has_optix_bvh = bvh_layout == BVH_LAYOUT_OPTIX || - bvh_layout == BVH_LAYOUT_MULTI_OPTIX || - bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE; - geom->need_update_rebuild |= has_optix_bvh; - geom->need_update_bvh_for_offset = true; - } + bool prim_offset_changed = false; if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { Mesh *mesh = static_cast<Mesh *>(geom); + prim_offset_changed = (mesh->prim_offset != tri_size); + mesh->vert_offset = vert_size; mesh->prim_offset = tri_size; @@ -1017,27 +1004,35 @@ void GeometryManager::mesh_calc_offset(Scene *scene, BVHLayout bvh_layout) face_size += mesh->get_num_subd_faces(); corner_size += mesh->subd_face_corners.size(); - - mesh->optix_prim_offset = optix_prim_size; - optix_prim_size += mesh->num_triangles(); } else if (geom->is_hair()) { Hair *hair = static_cast<Hair *>(geom); - hair->curvekey_offset = curve_key_size; + prim_offset_changed = (hair->curve_segment_offset != curve_segment_size); + hair->curve_key_offset = curve_key_size; + hair->curve_segment_offset = curve_segment_size; hair->prim_offset = curve_size; - curve_key_size += hair->get_curve_keys().size(); curve_size += hair->num_curves(); + curve_key_size += hair->get_curve_keys().size(); + curve_segment_size += hair->num_segments(); + } - hair->optix_prim_offset = optix_prim_size; - optix_prim_size += hair->num_segments(); + if (prim_offset_changed) { + /* Need to rebuild BVH in OptiX, since refit only allows modified mesh data there */ + const bool has_optix_bvh = bvh_layout == BVH_LAYOUT_OPTIX || + bvh_layout == BVH_LAYOUT_MULTI_OPTIX || + bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE; + geom->need_update_rebuild |= has_optix_bvh; + geom->need_update_bvh_for_offset = true; } } } -void GeometryManager::device_update_mesh( - Device *, DeviceScene *dscene, Scene *scene, bool for_displacement, Progress &progress) +void GeometryManager::device_update_mesh(Device *, + DeviceScene *dscene, + Scene *scene, + Progress &progress) { /* Count. */ size_t vert_size = 0; @@ -1045,6 +1040,7 @@ void GeometryManager::device_update_mesh( size_t curve_key_size = 0; size_t curve_size = 0; + size_t curve_segment_size = 0; size_t patch_size = 0; @@ -1071,31 +1067,7 @@ void GeometryManager::device_update_mesh( curve_key_size += hair->get_curve_keys().size(); curve_size += hair->num_curves(); - } - } - - /* Create mapping from triangle to primitive triangle array. */ - vector<uint> tri_prim_index(tri_size); - if (for_displacement) { - /* For displacement kernels we do some trickery to make them believe - * we've got all required data ready. However, that data is different - * from final render kernels since we don't have BVH yet, so can't - * really use same semantic of arrays. - */ - foreach (Geometry *geom, scene->geometry) { - if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { - Mesh *mesh = static_cast<Mesh *>(geom); - for (size_t i = 0; i < mesh->num_triangles(); ++i) { - tri_prim_index[i + mesh->prim_offset] = 3 * (i + mesh->prim_offset); - } - } - } - } - else { - for (size_t i = 0; i < dscene->prim_index.size(); ++i) { - if ((dscene->prim_type[i] & PRIMITIVE_ALL_TRIANGLE) != 0) { - tri_prim_index[dscene->prim_index[i]] = dscene->prim_tri_index[i]; - } + curve_segment_size += hair->num_segments(); } } @@ -1104,6 +1076,7 @@ void GeometryManager::device_update_mesh( /* normals */ progress.set_status("Updating Mesh", "Computing normals"); + float4 *tri_verts = dscene->tri_verts.alloc(tri_size * 3); uint *tri_shader = dscene->tri_shader.alloc(tri_size); float4 *vnormal = dscene->tri_vnormal.alloc(vert_size); uint4 *tri_vindex = dscene->tri_vindex.alloc(tri_size); @@ -1129,13 +1102,12 @@ void GeometryManager::device_update_mesh( mesh->pack_normals(&vnormal[mesh->vert_offset]); } - if (mesh->triangles_is_modified() || mesh->vert_patch_uv_is_modified() || copy_all_data) { - mesh->pack_verts(tri_prim_index, + if (mesh->verts_is_modified() || mesh->triangles_is_modified() || + mesh->vert_patch_uv_is_modified() || copy_all_data) { + mesh->pack_verts(&tri_verts[mesh->prim_offset * 3], &tri_vindex[mesh->prim_offset], &tri_patch[mesh->prim_offset], - &tri_patch_uv[mesh->vert_offset], - mesh->vert_offset, - mesh->prim_offset); + &tri_patch_uv[mesh->vert_offset]); } if (progress.get_cancel()) @@ -1146,6 +1118,7 @@ void GeometryManager::device_update_mesh( /* vertex coordinates */ progress.set_status("Updating Mesh", "Copying Mesh to device"); + dscene->tri_verts.copy_to_device_if_modified(); dscene->tri_shader.copy_to_device_if_modified(); dscene->tri_vnormal.copy_to_device_if_modified(); dscene->tri_vindex.copy_to_device_if_modified(); @@ -1153,13 +1126,16 @@ void GeometryManager::device_update_mesh( dscene->tri_patch_uv.copy_to_device_if_modified(); } - if (curve_size != 0) { - progress.set_status("Updating Mesh", "Copying Strands to device"); + if (curve_segment_size != 0) { + progress.set_status("Updating Mesh", "Copying Curves to device"); float4 *curve_keys = dscene->curve_keys.alloc(curve_key_size); - float4 *curves = dscene->curves.alloc(curve_size); + KernelCurve *curves = dscene->curves.alloc(curve_size); + KernelCurveSegment *curve_segments = dscene->curve_segments.alloc(curve_segment_size); - const bool copy_all_data = dscene->curve_keys.need_realloc() || dscene->curves.need_realloc(); + const bool copy_all_data = dscene->curve_keys.need_realloc() || + dscene->curves.need_realloc() || + dscene->curve_segments.need_realloc(); foreach (Geometry *geom, scene->geometry) { if (geom->is_hair()) { @@ -1175,9 +1151,9 @@ void GeometryManager::device_update_mesh( } hair->pack_curves(scene, - &curve_keys[hair->curvekey_offset], + &curve_keys[hair->curve_key_offset], &curves[hair->prim_offset], - hair->curvekey_offset); + &curve_segments[hair->curve_segment_offset]); if (progress.get_cancel()) return; } @@ -1185,6 +1161,7 @@ void GeometryManager::device_update_mesh( dscene->curve_keys.copy_to_device_if_modified(); dscene->curves.copy_to_device_if_modified(); + dscene->curve_segments.copy_to_device_if_modified(); } if (patch_size != 0 && dscene->patches.need_realloc()) { @@ -1195,10 +1172,7 @@ void GeometryManager::device_update_mesh( foreach (Geometry *geom, scene->geometry) { if (geom->is_mesh()) { Mesh *mesh = static_cast<Mesh *>(geom); - mesh->pack_patches(&patch_data[mesh->patch_offset], - mesh->vert_offset, - mesh->face_offset, - mesh->corner_offset); + mesh->pack_patches(&patch_data[mesh->patch_offset]); if (mesh->patch_table) { mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset], @@ -1212,23 +1186,6 @@ void GeometryManager::device_update_mesh( dscene->patches.copy_to_device(); } - - if (for_displacement) { - float4 *prim_tri_verts = dscene->prim_tri_verts.alloc(tri_size * 3); - foreach (Geometry *geom, scene->geometry) { - if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { - Mesh *mesh = static_cast<Mesh *>(geom); - for (size_t i = 0; i < mesh->num_triangles(); ++i) { - Mesh::Triangle t = mesh->get_triangle(i); - size_t offset = 3 * (i + mesh->prim_offset); - prim_tri_verts[offset + 0] = float3_to_float4(mesh->verts[t.v[0]]); - prim_tri_verts[offset + 1] = float3_to_float4(mesh->verts[t.v[1]]); - prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]); - } - } - } - dscene->prim_tri_verts.copy_to_device(); - } } void GeometryManager::device_update_bvh(Device *device, @@ -1256,16 +1213,6 @@ void GeometryManager::device_update_bvh(Device *device, const bool can_refit = scene->bvh != nullptr && (bparams.bvh_layout == BVHLayout::BVH_LAYOUT_OPTIX); - PackFlags pack_flags = PackFlags::PACK_NONE; - - if (scene->bvh == nullptr) { - pack_flags |= PackFlags::PACK_ALL; - } - - if (dscene->prim_visibility.is_modified()) { - pack_flags |= PackFlags::PACK_VISIBILITY; - } - BVH *bvh = scene->bvh; if (!scene->bvh) { bvh = scene->bvh = BVH::create(bparams, scene->geometry, scene->objects, device); @@ -1284,77 +1231,7 @@ void GeometryManager::device_update_bvh(Device *device, pack = std::move(static_cast<BVH2 *>(bvh)->pack); } else { - progress.set_status("Updating Scene BVH", "Packing BVH primitives"); - - size_t num_prims = 0; - size_t num_tri_verts = 0; - foreach (Geometry *geom, scene->geometry) { - if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { - Mesh *mesh = static_cast<Mesh *>(geom); - num_prims += mesh->num_triangles(); - num_tri_verts += 3 * mesh->num_triangles(); - } - else if (geom->is_hair()) { - Hair *hair = static_cast<Hair *>(geom); - num_prims += hair->num_segments(); - } - } - pack.root_index = -1; - - if (pack_flags != PackFlags::PACK_ALL) { - /* if we do not need to recreate the BVH, then only the vertices are updated, so we can - * safely retake the memory */ - dscene->prim_tri_verts.give_data(pack.prim_tri_verts); - - if ((pack_flags & PackFlags::PACK_VISIBILITY) != 0) { - dscene->prim_visibility.give_data(pack.prim_visibility); - } - } - else { - /* It is not strictly necessary to skip those resizes we if do not have to repack, as the OS - * will not allocate pages if we do not touch them, however it does help catching bugs. */ - pack.prim_tri_index.resize(num_prims); - pack.prim_tri_verts.resize(num_tri_verts); - pack.prim_type.resize(num_prims); - pack.prim_index.resize(num_prims); - pack.prim_object.resize(num_prims); - pack.prim_visibility.resize(num_prims); - } - - // Merge visibility flags of all objects and find object index for non-instanced geometry - unordered_map<const Geometry *, pair<int, uint>> geometry_to_object_info; - geometry_to_object_info.reserve(scene->geometry.size()); - foreach (Object *ob, scene->objects) { - const Geometry *const geom = ob->get_geometry(); - pair<int, uint> &info = geometry_to_object_info[geom]; - info.second |= ob->visibility_for_tracing(); - if (!geom->is_instanced()) { - info.first = ob->get_device_index(); - } - } - - TaskPool pool; - // Iterate over scene mesh list instead of objects, since 'optix_prim_offset' was calculated - // based on that list, which may be ordered differently from the object list. - foreach (Geometry *geom, scene->geometry) { - /* Make a copy of the pack_flags so the current geometry's flags do not pollute the others'. - */ - PackFlags geom_pack_flags = pack_flags; - - if (geom->is_modified()) { - geom_pack_flags |= PackFlags::PACK_VERTICES; - } - - if (geom_pack_flags == PACK_NONE) { - continue; - } - - const pair<int, uint> &info = geometry_to_object_info[geom]; - pool.push(function_bind( - &Geometry::pack_primitives, geom, &pack, info.first, info.second, geom_pack_flags)); - } - pool.wait_work(); } /* copy to device */ @@ -1375,31 +1252,23 @@ void GeometryManager::device_update_bvh(Device *device, dscene->object_node.steal_data(pack.object_node); dscene->object_node.copy_to_device(); } - if (pack.prim_tri_index.size() && (dscene->prim_tri_index.need_realloc() || has_bvh2_layout)) { - dscene->prim_tri_index.steal_data(pack.prim_tri_index); - dscene->prim_tri_index.copy_to_device(); - } - if (pack.prim_tri_verts.size()) { - dscene->prim_tri_verts.steal_data(pack.prim_tri_verts); - dscene->prim_tri_verts.copy_to_device(); - } - if (pack.prim_type.size() && (dscene->prim_type.need_realloc() || has_bvh2_layout)) { + if (pack.prim_type.size()) { dscene->prim_type.steal_data(pack.prim_type); dscene->prim_type.copy_to_device(); } - if (pack.prim_visibility.size() && (dscene->prim_visibility.is_modified() || has_bvh2_layout)) { + if (pack.prim_visibility.size()) { dscene->prim_visibility.steal_data(pack.prim_visibility); dscene->prim_visibility.copy_to_device(); } - if (pack.prim_index.size() && (dscene->prim_index.need_realloc() || has_bvh2_layout)) { + if (pack.prim_index.size()) { dscene->prim_index.steal_data(pack.prim_index); dscene->prim_index.copy_to_device(); } - if (pack.prim_object.size() && (dscene->prim_object.need_realloc() || has_bvh2_layout)) { + if (pack.prim_object.size()) { dscene->prim_object.steal_data(pack.prim_object); dscene->prim_object.copy_to_device(); } - if (pack.prim_time.size() && (dscene->prim_time.need_realloc() || has_bvh2_layout)) { + if (pack.prim_time.size()) { dscene->prim_time.steal_data(pack.prim_time); dscene->prim_time.copy_to_device(); } @@ -1629,8 +1498,6 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro dscene->bvh_nodes.tag_realloc(); dscene->bvh_leaf_nodes.tag_realloc(); dscene->object_node.tag_realloc(); - dscene->prim_tri_verts.tag_realloc(); - dscene->prim_tri_index.tag_realloc(); dscene->prim_type.tag_realloc(); dscene->prim_visibility.tag_realloc(); dscene->prim_index.tag_realloc(); @@ -1649,6 +1516,7 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro if (device_update_flags & DEVICE_CURVE_DATA_NEEDS_REALLOC) { dscene->curves.tag_realloc(); dscene->curve_keys.tag_realloc(); + dscene->curve_segments.tag_realloc(); } } @@ -1691,6 +1559,7 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro if (device_update_flags & DEVICE_MESH_DATA_MODIFIED) { /* if anything else than vertices or shaders are modified, we would need to reallocate, so * these are the only arrays that can be updated */ + dscene->tri_verts.tag_modified(); dscene->tri_vnormal.tag_modified(); dscene->tri_shader.tag_modified(); } @@ -1698,6 +1567,7 @@ void GeometryManager::device_update_preprocess(Device *device, Scene *scene, Pro if (device_update_flags & DEVICE_CURVE_DATA_MODIFIED) { dscene->curve_keys.tag_modified(); dscene->curves.tag_modified(); + dscene->curve_segments.tag_modified(); } need_flags_update = false; @@ -1906,7 +1776,7 @@ void GeometryManager::device_update(Device *device, {"device_update (displacement: copy meshes to device)", time}); } }); - device_update_mesh(device, dscene, scene, true, progress); + device_update_mesh(device, dscene, scene, progress); } if (progress.get_cancel()) { return; @@ -2058,7 +1928,7 @@ void GeometryManager::device_update(Device *device, {"device_update (copy meshes to device)", time}); } }); - device_update_mesh(device, dscene, scene, false, progress); + device_update_mesh(device, dscene, scene, progress); if (progress.get_cancel()) { return; } @@ -2091,13 +1961,12 @@ void GeometryManager::device_update(Device *device, dscene->bvh_nodes.clear_modified(); dscene->bvh_leaf_nodes.clear_modified(); dscene->object_node.clear_modified(); - dscene->prim_tri_verts.clear_modified(); - dscene->prim_tri_index.clear_modified(); dscene->prim_type.clear_modified(); dscene->prim_visibility.clear_modified(); dscene->prim_index.clear_modified(); dscene->prim_object.clear_modified(); dscene->prim_time.clear_modified(); + dscene->tri_verts.clear_modified(); dscene->tri_shader.clear_modified(); dscene->tri_vindex.clear_modified(); dscene->tri_patch.clear_modified(); @@ -2105,6 +1974,7 @@ void GeometryManager::device_update(Device *device, dscene->tri_patch_uv.clear_modified(); dscene->curves.clear_modified(); dscene->curve_keys.clear_modified(); + dscene->curve_segments.clear_modified(); dscene->patches.clear_modified(); dscene->attributes_map.clear_modified(); dscene->attributes_float.clear_modified(); @@ -2118,13 +1988,12 @@ void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool forc dscene->bvh_nodes.free_if_need_realloc(force_free); dscene->bvh_leaf_nodes.free_if_need_realloc(force_free); dscene->object_node.free_if_need_realloc(force_free); - dscene->prim_tri_verts.free_if_need_realloc(force_free); - dscene->prim_tri_index.free_if_need_realloc(force_free); dscene->prim_type.free_if_need_realloc(force_free); dscene->prim_visibility.free_if_need_realloc(force_free); dscene->prim_index.free_if_need_realloc(force_free); dscene->prim_object.free_if_need_realloc(force_free); dscene->prim_time.free_if_need_realloc(force_free); + dscene->tri_verts.free_if_need_realloc(force_free); dscene->tri_shader.free_if_need_realloc(force_free); dscene->tri_vnormal.free_if_need_realloc(force_free); dscene->tri_vindex.free_if_need_realloc(force_free); @@ -2132,6 +2001,7 @@ void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool forc dscene->tri_patch_uv.free_if_need_realloc(force_free); dscene->curves.free_if_need_realloc(force_free); dscene->curve_keys.free_if_need_realloc(force_free); + dscene->curve_segments.free_if_need_realloc(force_free); dscene->patches.free_if_need_realloc(force_free); dscene->attributes_map.free_if_need_realloc(force_free); dscene->attributes_float.free_if_need_realloc(force_free); diff --git a/intern/cycles/render/geometry.h b/intern/cycles/render/geometry.h index 7db122f69cb..cd42f62c669 100644 --- a/intern/cycles/render/geometry.h +++ b/intern/cycles/render/geometry.h @@ -43,24 +43,6 @@ class Shader; class Volume; struct PackedBVH; -/* Flags used to determine which geometry data need to be packed. */ -enum PackFlags : uint32_t { - PACK_NONE = 0u, - - /* Pack the geometry information (e.g. triangle or curve keys indices). */ - PACK_GEOMETRY = (1u << 0), - - /* Pack the vertices, for Meshes and Volumes' bounding meshes. */ - PACK_VERTICES = (1u << 1), - - /* Pack the visibility flags for each triangle or curve. */ - PACK_VISIBILITY = (1u << 2), - - PACK_ALL = (PACK_GEOMETRY | PACK_VERTICES | PACK_VISIBILITY), -}; - -PackFlags operator|=(PackFlags &pack_flags, uint32_t value); - /* Geometry * * Base class for geometric types like Mesh and Hair. */ @@ -100,7 +82,6 @@ class Geometry : public Node { BVH *bvh; size_t attr_map_offset; size_t prim_offset; - size_t optix_prim_offset; /* Shader Properties */ bool has_volume; /* Set in the device_update_flags(). */ @@ -144,10 +125,7 @@ class Geometry : public Node { int n, int total); - virtual void pack_primitives(PackedBVH *pack, - int object, - uint visibility, - PackFlags pack_flags) = 0; + virtual PrimitiveType primitive_type() const = 0; /* Check whether the geometry should have own BVH built separately. Briefly, * own BVH is needed for geometry, if: @@ -260,11 +238,7 @@ class GeometryManager { void device_update_object(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); - void device_update_mesh(Device *device, - DeviceScene *dscene, - Scene *scene, - bool for_displacement, - Progress &progress); + void device_update_mesh(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); void device_update_attributes(Device *device, DeviceScene *dscene, diff --git a/intern/cycles/render/hair.cpp b/intern/cycles/render/hair.cpp index 72fc612c0c0..e104455f7dd 100644 --- a/intern/cycles/render/hair.cpp +++ b/intern/cycles/render/hair.cpp @@ -295,7 +295,8 @@ NODE_DEFINE(Hair) Hair::Hair() : Geometry(get_node_type(), Geometry::HAIR) { - curvekey_offset = 0; + curve_key_offset = 0; + curve_segment_offset = 0; curve_shape = CURVE_RIBBON; } @@ -462,8 +463,8 @@ void Hair::apply_transform(const Transform &tfm, const bool apply_to_motion) void Hair::pack_curves(Scene *scene, float4 *curve_key_co, - float4 *curve_data, - size_t curvekey_offset) + KernelCurve *curves, + KernelCurveSegment *curve_segments) { size_t curve_keys_size = curve_keys.size(); @@ -477,7 +478,10 @@ void Hair::pack_curves(Scene *scene, } /* pack curve segments */ + const PrimitiveType type = primitive_type(); + size_t curve_num = num_curves(); + size_t index = 0; for (size_t i = 0; i < curve_num; i++) { Curve curve = get_curve(i); @@ -487,56 +491,24 @@ void Hair::pack_curves(Scene *scene, scene->default_surface; shader_id = scene->shader_manager->get_shader_id(shader, false); - curve_data[i] = make_float4(__int_as_float(curve.first_key + curvekey_offset), - __int_as_float(curve.num_keys), - __int_as_float(shader_id), - 0.0f); - } -} + curves[i].shader_id = shader_id; + curves[i].first_key = curve_key_offset + curve.first_key; + curves[i].num_keys = curve.num_keys; + curves[i].type = type; -void Hair::pack_primitives(PackedBVH *pack, int object, uint visibility, PackFlags pack_flags) -{ - if (curve_first_key.empty()) - return; - - /* Separate loop as other arrays are not initialized if their packing is not required. */ - if ((pack_flags & PACK_VISIBILITY) != 0) { - unsigned int *prim_visibility = &pack->prim_visibility[optix_prim_offset]; - - size_t index = 0; - for (size_t j = 0; j < num_curves(); ++j) { - Curve curve = get_curve(j); - for (size_t k = 0; k < curve.num_segments(); ++k, ++index) { - prim_visibility[index] = visibility; - } + for (int k = 0; k < curve.num_segments(); ++k, ++index) { + curve_segments[index].prim = prim_offset + i; + curve_segments[index].type = PRIMITIVE_PACK_SEGMENT(type, k); } } +} - if ((pack_flags & PACK_GEOMETRY) != 0) { - unsigned int *prim_tri_index = &pack->prim_tri_index[optix_prim_offset]; - int *prim_type = &pack->prim_type[optix_prim_offset]; - int *prim_index = &pack->prim_index[optix_prim_offset]; - int *prim_object = &pack->prim_object[optix_prim_offset]; - // 'pack->prim_time' is unused by Embree and OptiX - - uint type = has_motion_blur() ? - ((curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON : - PRIMITIVE_MOTION_CURVE_THICK) : - ((curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : - PRIMITIVE_CURVE_THICK); - - size_t index = 0; - for (size_t j = 0; j < num_curves(); ++j) { - Curve curve = get_curve(j); - for (size_t k = 0; k < curve.num_segments(); ++k, ++index) { - prim_tri_index[index] = -1; - prim_type[index] = PRIMITIVE_PACK_SEGMENT(type, k); - // Each curve segment points back to its curve index - prim_index[index] = j + prim_offset; - prim_object[index] = object; - } - } - } +PrimitiveType Hair::primitive_type() const +{ + return has_motion_blur() ? + ((curve_shape == CURVE_RIBBON) ? PRIMITIVE_MOTION_CURVE_RIBBON : + PRIMITIVE_MOTION_CURVE_THICK) : + ((curve_shape == CURVE_RIBBON) ? PRIMITIVE_CURVE_RIBBON : PRIMITIVE_CURVE_THICK); } CCL_NAMESPACE_END diff --git a/intern/cycles/render/hair.h b/intern/cycles/render/hair.h index e4451d70767..920e9601b35 100644 --- a/intern/cycles/render/hair.h +++ b/intern/cycles/render/hair.h @@ -21,6 +21,8 @@ CCL_NAMESPACE_BEGIN +struct KernelCurveSegment; + class Hair : public Geometry { public: NODE_DECLARE @@ -95,7 +97,8 @@ class Hair : public Geometry { NODE_SOCKET_API_ARRAY(array<int>, curve_shader) /* BVH */ - size_t curvekey_offset; + size_t curve_key_offset; + size_t curve_segment_offset; CurveShapeType curve_shape; /* Constructor/Destructor */ @@ -144,12 +147,12 @@ class Hair : public Geometry { void get_uv_tiles(ustring map, unordered_set<int> &tiles) override; /* BVH */ - void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset); + void pack_curves(Scene *scene, + float4 *curve_key_co, + KernelCurve *curve, + KernelCurveSegment *curve_segments); - void pack_primitives(PackedBVH *pack, - int object, - uint visibility, - PackFlags pack_flags) override; + PrimitiveType primitive_type() const override; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index fd9879dd5dd..2ecea3101db 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -729,12 +729,7 @@ void Mesh::pack_normals(float4 *vnormal) } } -void Mesh::pack_verts(const vector<uint> &tri_prim_index, - uint4 *tri_vindex, - uint *tri_patch, - float2 *tri_patch_uv, - size_t vert_offset, - size_t tri_offset) +void Mesh::pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv) { size_t verts_size = verts.size(); @@ -749,17 +744,19 @@ void Mesh::pack_verts(const vector<uint> &tri_prim_index, size_t triangles_size = num_triangles(); for (size_t i = 0; i < triangles_size; i++) { - Triangle t = get_triangle(i); - tri_vindex[i] = make_uint4(t.v[0] + vert_offset, - t.v[1] + vert_offset, - t.v[2] + vert_offset, - tri_prim_index[i + tri_offset]); + const Triangle t = get_triangle(i); + tri_vindex[i] = make_uint4( + t.v[0] + vert_offset, t.v[1] + vert_offset, t.v[2] + vert_offset, 3 * (prim_offset + i)); tri_patch[i] = (!get_num_subd_faces()) ? -1 : (triangle_patch[i] * 8 + patch_offset); + + tri_verts[i * 3] = float3_to_float4(verts[t.v[0]]); + tri_verts[i * 3 + 1] = float3_to_float4(verts[t.v[1]]); + tri_verts[i * 3 + 2] = float3_to_float4(verts[t.v[2]]); } } -void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset) +void Mesh::pack_patches(uint *patch_data) { size_t num_faces = get_num_subd_faces(); int ngons = 0; @@ -805,53 +802,9 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui } } -void Mesh::pack_primitives(ccl::PackedBVH *pack, int object, uint visibility, PackFlags pack_flags) +PrimitiveType Mesh::primitive_type() const { - if (triangles.empty()) - return; - - const size_t num_prims = num_triangles(); - - /* Use prim_offset for indexing as it is computed per geometry type, and prim_tri_verts does not - * contain data for Hair geometries. */ - float4 *prim_tri_verts = &pack->prim_tri_verts[prim_offset * 3]; - // 'pack->prim_time' is unused by Embree and OptiX - - uint type = has_motion_blur() ? PRIMITIVE_MOTION_TRIANGLE : PRIMITIVE_TRIANGLE; - - /* Separate loop as other arrays are not initialized if their packing is not required. */ - if ((pack_flags & PackFlags::PACK_VISIBILITY) != 0) { - unsigned int *prim_visibility = &pack->prim_visibility[optix_prim_offset]; - for (size_t k = 0; k < num_prims; ++k) { - prim_visibility[k] = visibility; - } - } - - if ((pack_flags & PackFlags::PACK_GEOMETRY) != 0) { - /* Use optix_prim_offset for indexing as those arrays also contain data for Hair geometries. */ - unsigned int *prim_tri_index = &pack->prim_tri_index[optix_prim_offset]; - int *prim_type = &pack->prim_type[optix_prim_offset]; - int *prim_index = &pack->prim_index[optix_prim_offset]; - int *prim_object = &pack->prim_object[optix_prim_offset]; - - for (size_t k = 0; k < num_prims; ++k) { - if ((pack_flags & PackFlags::PACK_GEOMETRY) != 0) { - prim_tri_index[k] = (prim_offset + k) * 3; - prim_type[k] = type; - prim_index[k] = prim_offset + k; - prim_object[k] = object; - } - } - } - - if ((pack_flags & PackFlags::PACK_VERTICES) != 0) { - for (size_t k = 0; k < num_prims; ++k) { - const Mesh::Triangle t = get_triangle(k); - prim_tri_verts[k * 3] = float3_to_float4(verts[t.v[0]]); - prim_tri_verts[k * 3 + 1] = float3_to_float4(verts[t.v[1]]); - prim_tri_verts[k * 3 + 2] = float3_to_float4(verts[t.v[2]]); - } - } + return has_motion_blur() ? PRIMITIVE_MOTION_TRIANGLE : PRIMITIVE_TRIANGLE; } CCL_NAMESPACE_END diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index e9e79f7f20d..8258c18ddd1 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -224,18 +224,10 @@ class Mesh : public Geometry { void pack_shaders(Scene *scene, uint *shader); void pack_normals(float4 *vnormal); - void pack_verts(const vector<uint> &tri_prim_index, - uint4 *tri_vindex, - uint *tri_patch, - float2 *tri_patch_uv, - size_t vert_offset, - size_t tri_offset); - void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset); - - void pack_primitives(PackedBVH *pack, - int object, - uint visibility, - PackFlags pack_flags) override; + void pack_verts(float4 *tri_verts, uint4 *tri_vindex, uint *tri_patch, float2 *tri_patch_uv); + void pack_patches(uint *patch_data); + + PrimitiveType primitive_type() const override; void tessellate(DiagSplit *split); diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 1320a5eb7a6..d3ea93ca8a5 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -60,6 +60,7 @@ struct UpdateObjectTransformState { /* Packed object arrays. Those will be filled in. */ uint *object_flag; + uint *object_visibility; KernelObject *objects; Transform *object_motion_pass; DecomposedTransform *object_motion; @@ -528,6 +529,9 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s (1.0f - 0.5f * ob->shadow_terminator_shading_offset); kobject.shadow_terminator_geometry_offset = ob->shadow_terminator_geometry_offset; + kobject.visibility = ob->visibility_for_tracing(); + kobject.primitive_type = geom->primitive_type(); + /* Object flag. */ if (ob->use_holdout) { flag |= SD_OBJECT_HOLDOUT_MASK; diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index ecd6946bbf8..eeb92122825 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -49,13 +49,12 @@ DeviceScene::DeviceScene(Device *device) : bvh_nodes(device, "__bvh_nodes", MEM_GLOBAL), bvh_leaf_nodes(device, "__bvh_leaf_nodes", MEM_GLOBAL), object_node(device, "__object_node", MEM_GLOBAL), - prim_tri_index(device, "__prim_tri_index", MEM_GLOBAL), - prim_tri_verts(device, "__prim_tri_verts", MEM_GLOBAL), prim_type(device, "__prim_type", MEM_GLOBAL), prim_visibility(device, "__prim_visibility", MEM_GLOBAL), prim_index(device, "__prim_index", MEM_GLOBAL), prim_object(device, "__prim_object", MEM_GLOBAL), prim_time(device, "__prim_time", MEM_GLOBAL), + tri_verts(device, "__tri_verts", MEM_GLOBAL), tri_shader(device, "__tri_shader", MEM_GLOBAL), tri_vnormal(device, "__tri_vnormal", MEM_GLOBAL), tri_vindex(device, "__tri_vindex", MEM_GLOBAL), @@ -63,6 +62,7 @@ DeviceScene::DeviceScene(Device *device) tri_patch_uv(device, "__tri_patch_uv", MEM_GLOBAL), curves(device, "__curves", MEM_GLOBAL), curve_keys(device, "__curve_keys", MEM_GLOBAL), + curve_segments(device, "__curve_segments", MEM_GLOBAL), patches(device, "__patches", MEM_GLOBAL), objects(device, "__objects", MEM_GLOBAL), object_motion_pass(device, "__object_motion_pass", MEM_GLOBAL), diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 8076d0dc09c..001da31e893 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -74,8 +74,6 @@ class DeviceScene { device_vector<int4> bvh_nodes; device_vector<int4> bvh_leaf_nodes; device_vector<int> object_node; - device_vector<uint> prim_tri_index; - device_vector<float4> prim_tri_verts; device_vector<int> prim_type; device_vector<uint> prim_visibility; device_vector<int> prim_index; @@ -83,14 +81,16 @@ class DeviceScene { device_vector<float2> prim_time; /* mesh */ + device_vector<float4> tri_verts; device_vector<uint> tri_shader; device_vector<float4> tri_vnormal; device_vector<uint4> tri_vindex; device_vector<uint> tri_patch; device_vector<float2> tri_patch_uv; - device_vector<float4> curves; + device_vector<KernelCurve> curves; device_vector<float4> curve_keys; + device_vector<KernelCurveSegment> curve_segments; device_vector<uint> patches; |