diff options
author | Patrick Mours <pmours@nvidia.com> | 2019-11-28 15:57:02 +0300 |
---|---|---|
committer | Patrick Mours <pmours@nvidia.com> | 2019-11-28 15:57:02 +0300 |
commit | 70a32adfeb41f9b3fc376a933fbdc2d11bb73c15 (patch) | |
tree | 5f1d6639c093b818022969ce776d5dabc24ed695 /intern | |
parent | d4835b88b23bce6c28d8514e35d771408d5ef18b (diff) |
Fix assert in Cycles memory statistics when using OptiX on multiple GPUs
The acceleration structure built by OptiX may be different between GPUs, so cannot assume the memory size is the same for all.
This fixes that by moving the memory management for all OptiX acceleration structures into the responsibility of each device (was already the case for BLAS previously, now for TLAS too).
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/bvh/bvh_optix.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/device/device.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 12 | ||||
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 32 |
4 files changed, 22 insertions, 26 deletions
diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp index b3a9aab3266..86d755ab06a 100644 --- a/intern/cycles/bvh/bvh_optix.cpp +++ b/intern/cycles/bvh/bvh_optix.cpp @@ -49,7 +49,7 @@ void BVHOptiX::copy_to_device(Progress &progress, DeviceScene *dscene) progress.set_status("Updating Scene BVH", "Building OptiX acceleration structure"); Device *const device = dscene->bvh_nodes.device; - if (!device->build_optix_bvh(this, dscene->bvh_nodes)) + if (!device->build_optix_bvh(this)) progress.set_error("Failed to build OptiX acceleration structure"); } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 672d93c2581..66fcac921d3 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -407,7 +407,7 @@ class Device { const DeviceDrawParams &draw_params); /* acceleration structure building */ - virtual bool build_optix_bvh(BVH *, device_memory &) + virtual bool build_optix_bvh(BVH *) { return false; } diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index ac71be9dbea..b8587eb0a62 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -153,21 +153,13 @@ class MultiDevice : public Device { return result; } - bool build_optix_bvh(BVH *bvh, device_memory &mem) + bool build_optix_bvh(BVH *bvh) { - device_ptr key = unique_key++; - // Broadcast acceleration structure build to all devices foreach (SubDevice &sub, devices) { - mem.device = sub.device; - if (!sub.device->build_optix_bvh(bvh, mem)) + if (!sub.device->build_optix_bvh(bvh)) return false; - sub.ptr_map[key] = mem.device_pointer; } - - mem.device = this; - mem.device_pointer = key; - stats.mem_alloc(mem.device_size); return true; } diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index e10bab5a0d8..3ef954ab27f 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -174,7 +174,7 @@ class OptiXDevice : public Device { device_vector<SbtRecord> sbt_data; device_vector<TextureInfo> texture_info; device_only_memory<KernelParams> launch_params; - vector<device_only_memory<uint8_t>> blas; + vector<device_only_memory<uint8_t>> as_mem; OptixTraversableHandle tlas_handle = 0; // TODO(pmours): This is copied from device_cuda.cpp, so move to common code eventually @@ -268,8 +268,8 @@ class OptiXDevice : public Device { // Stop processing any more tasks task_pool.stop(); - // Clean up all memory before destroying context - blas.clear(); + // Free all acceleration structures + as_mem.clear(); sbt_data.free(); texture_info.free(); @@ -881,15 +881,16 @@ class OptiXDevice : public Device { return true; } - bool build_optix_bvh(BVH *bvh, device_memory &out_data) override + bool build_optix_bvh(BVH *bvh) override { assert(bvh->params.top_level); unsigned int num_instances = 0; unordered_map<Mesh *, vector<OptixTraversableHandle>> meshes; + meshes.reserve(bvh->meshes.size()); - // Clear all previous AS - blas.clear(); + // Free all previous acceleration structure + as_mem.clear(); // Build bottom level acceleration structures (BLAS) // Note: Always keep this logic in sync with bvh_optix.cpp! @@ -900,6 +901,7 @@ class OptiXDevice : public Device { Mesh *const mesh = ob->mesh; vector<OptixTraversableHandle> handles; + handles.reserve(2); // Build BLAS for curve primitives if (bvh->params.primitive_mask & PRIMITIVE_ALL_CURVE && mesh->num_curves() > 0) { @@ -966,9 +968,9 @@ class OptiXDevice : public Device { build_input.aabbArray.primitiveIndexOffset = mesh->prim_offset; // Allocate memory for new BLAS and build it - blas.emplace_back(this, "blas"); + as_mem.emplace_back(this, "blas"); handles.emplace_back(); - if (!build_optix_bvh(build_input, num_motion_steps, blas.back(), handles.back())) + if (!build_optix_bvh(build_input, num_motion_steps, as_mem.back(), handles.back())) return false; } @@ -1032,9 +1034,9 @@ class OptiXDevice : public Device { build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset + mesh->num_segments(); // Allocate memory for new BLAS and build it - blas.emplace_back(this, "blas"); + as_mem.emplace_back(this, "blas"); handles.emplace_back(); - if (!build_optix_bvh(build_input, num_motion_steps, blas.back(), handles.back())) + if (!build_optix_bvh(build_input, num_motion_steps, as_mem.back(), handles.back())) return false; } @@ -1051,6 +1053,7 @@ class OptiXDevice : public Device { // Skip non-traceable objects if (!ob->is_traceable()) continue; + // Create separate instance for triangle/curve meshes of an object for (OptixTraversableHandle handle : meshes[ob->mesh]) { OptixAabb &aabb = aabbs[num_instances]; @@ -1078,8 +1081,8 @@ class OptiXDevice : public Device { // Insert motion traversable if object has motion if (motion_blur && ob->use_motion()) { - blas.emplace_back(this, "motion_transform"); - device_only_memory<uint8_t> &motion_transform_gpu = blas.back(); + as_mem.emplace_back(this, "motion_transform"); + device_only_memory<uint8_t> &motion_transform_gpu = as_mem.back(); motion_transform_gpu.alloc_to_device(sizeof(OptixSRTMotionTransform) + (max(ob->motion.size(), 2) - 2) * sizeof(OptixSRTData)); @@ -1157,7 +1160,7 @@ class OptiXDevice : public Device { instances.resize(num_instances); instances.copy_to_device(); - // Build top-level acceleration structure + // Build top-level acceleration structure (TLAS) OptixBuildInput build_input = {}; build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES; build_input.instanceArray.instances = instances.device_pointer; @@ -1165,7 +1168,8 @@ class OptiXDevice : public Device { build_input.instanceArray.aabbs = aabbs.device_pointer; build_input.instanceArray.numAabbs = num_instances; - return build_optix_bvh(build_input, 0 /* TLAS has no motion itself */, out_data, tlas_handle); + as_mem.emplace_back(this, "tlas"); + return build_optix_bvh(build_input, 0, as_mem.back(), tlas_handle); } void update_texture_info() |