diff options
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 187 |
1 files changed, 109 insertions, 78 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index 43b1fb30baf..95234845f98 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -18,6 +18,7 @@ #ifdef WITH_OPTIX # include "bvh/bvh.h" +# include "bvh/bvh_optix.h" # include "device/cuda/device_cuda.h" # include "device/device_denoising.h" # include "device/device_intern.h" @@ -137,9 +138,6 @@ class OptiXDevice : public CUDADevice { PG_HITD_MOTION, PG_HITS_MOTION, # endif -# ifdef WITH_CYCLES_DEBUG - PG_EXCP, -# endif PG_BAKE, // kernel_bake_evaluate PG_DISP, // kernel_displace_evaluate PG_BACK, // kernel_background_evaluate @@ -232,6 +230,9 @@ class OptiXDevice : public CUDADevice { } }; # endif +# if OPTIX_ABI_VERSION >= 41 && defined(WITH_CYCLES_DEBUG) + options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL; +# endif check_result_optix(optixDeviceContextCreate(cuContext, &options, &context)); # ifdef WITH_CYCLES_LOGGING check_result_optix(optixDeviceContextSetLogCallback( @@ -368,6 +369,12 @@ class OptiXDevice : public CUDADevice { module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; # endif + +# if OPTIX_ABI_VERSION >= 41 + module_options.boundValues = nullptr; + module_options.numBoundValues = 0; +# endif + OptixPipelineCompileOptions pipeline_options; // Default to no motion blur and two-level graph, since it is the fastest option pipeline_options.usesMotionBlur = false; @@ -375,12 +382,7 @@ class OptiXDevice : public CUDADevice { OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING; pipeline_options.numPayloadValues = 6; pipeline_options.numAttributeValues = 2; // u, v -# ifdef WITH_CYCLES_DEBUG - pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW | - OPTIX_EXCEPTION_FLAG_TRACE_DEPTH; -# else pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; -# endif pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h # if OPTIX_ABI_VERSION >= 36 @@ -505,12 +507,6 @@ class OptiXDevice : public CUDADevice { group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit"; } -# ifdef WITH_CYCLES_DEBUG - group_descs[PG_EXCP].kind = OPTIX_PROGRAM_GROUP_KIND_EXCEPTION; - group_descs[PG_EXCP].exception.module = optix_module; - group_descs[PG_EXCP].exception.entryFunctionName = "__exception__kernel_optix_exception"; -# endif - if (requested_features.use_baking) { group_descs[PG_BAKE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; group_descs[PG_BAKE].raygen.module = optix_module; @@ -578,9 +574,6 @@ class OptiXDevice : public CUDADevice { groups[PG_HITD_MOTION], groups[PG_HITS_MOTION], # endif -# ifdef WITH_CYCLES_DEBUG - groups[PG_EXCP], -# endif }; check_result_optix_ret( optixPipelineCreate(context, @@ -618,9 +611,6 @@ class OptiXDevice : public CUDADevice { groups[PG_HITD_MOTION], groups[PG_HITS_MOTION], # endif -# ifdef WITH_CYCLES_DEBUG - groups[PG_EXCP], -# endif }; check_result_optix_ret( optixPipelineCreate(context, @@ -734,9 +724,6 @@ class OptiXDevice : public CUDADevice { OptixShaderBindingTable sbt_params = {}; sbt_params.raygenRecord = sbt_data.device_pointer + PG_RGEN * sizeof(SbtRecord); -# ifdef WITH_CYCLES_DEBUG - sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord); -# endif sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord); sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); sbt_params.missRecordCount = 1; @@ -1064,9 +1051,6 @@ class OptiXDevice : public CUDADevice { OptixShaderBindingTable sbt_params = {}; sbt_params.raygenRecord = sbt_data.device_pointer + rgen_index * sizeof(SbtRecord); -# ifdef WITH_CYCLES_DEBUG - sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord); -# endif sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord); sbt_params.missRecordStrideInBytes = sizeof(SbtRecord); sbt_params.missRecordCount = 1; @@ -1095,23 +1079,23 @@ class OptiXDevice : public CUDADevice { bool build_optix_bvh(const OptixBuildInput &build_input, uint16_t num_motion_steps, - OptixTraversableHandle &out_handle) + OptixTraversableHandle &out_handle, + CUdeviceptr &out_data, + OptixBuildOperation operation) { - out_handle = 0; - const CUDAContextScope scope(cuContext); // Compute memory usage OptixAccelBufferSizes sizes = {}; OptixAccelBuildOptions options; - options.operation = OPTIX_BUILD_OPERATION_BUILD; + options.operation = operation; if (background) { // Prefer best performance and lowest memory consumption in background options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION; } else { // Prefer fast updates in viewport - options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD; + options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE; } options.motionOptions.numKeys = num_motion_steps; @@ -1136,8 +1120,10 @@ class OptiXDevice : public CUDADevice { move_textures_to_host(size - free, false); } - CUdeviceptr out_data = 0; - check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes)); + if (operation == OPTIX_BUILD_OPERATION_BUILD) { + check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes)); + } + as_mem.push_back(out_data); // Finally build the acceleration structure @@ -1204,23 +1190,49 @@ class OptiXDevice : public CUDADevice { unordered_map<Geometry *, OptixTraversableHandle> geometry; geometry.reserve(bvh->geometry.size()); - // Free all previous acceleration structures + // Free all previous acceleration structures which can not be refit + std::set<CUdeviceptr> refit_mem; + + for (Geometry *geom : bvh->geometry) { + if (static_cast<BVHOptiX *>(geom->bvh)->do_refit) { + refit_mem.insert(static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle); + } + } + for (CUdeviceptr mem : as_mem) { - cuMemFree(mem); + if (refit_mem.find(mem) == refit_mem.end()) { + cuMemFree(mem); + } } + as_mem.clear(); // Build bottom level acceleration structures (BLAS) // Note: Always keep this logic in sync with bvh_optix.cpp! for (Object *ob : bvh->objects) { // Skip geometry for which acceleration structure already exists - Geometry *geom = ob->geometry; + Geometry *geom = ob->get_geometry(); if (geometry.find(geom) != geometry.end()) continue; - if (geom->type == Geometry::HAIR) { + OptixTraversableHandle handle; + OptixBuildOperation operation; + CUdeviceptr out_data; + // Refit is only possible in viewport for now. + if (static_cast<BVHOptiX *>(geom->bvh)->do_refit && !background) { + out_data = static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle; + handle = static_cast<BVHOptiX *>(geom->bvh)->optix_handle; + operation = OPTIX_BUILD_OPERATION_UPDATE; + } + else { + out_data = 0; + handle = 0; + operation = OPTIX_BUILD_OPERATION_BUILD; + } + + if (geom->geometry_type == Geometry::HAIR) { // Build BLAS for curve primitives - Hair *const hair = static_cast<Hair *const>(ob->geometry); + Hair *const hair = static_cast<Hair *const>(ob->get_geometry()); if (hair->num_curves() == 0) { continue; } @@ -1229,8 +1241,8 @@ class OptiXDevice : public CUDADevice { size_t num_motion_steps = 1; Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - if (motion_blur && hair->use_motion_blur && motion_keys) { - num_motion_steps = hair->motion_steps; + if (motion_blur && hair->get_use_motion_blur() && motion_keys) { + num_motion_steps = hair->get_motion_steps(); } device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY); @@ -1250,16 +1262,19 @@ class OptiXDevice : public CUDADevice { // Get AABBs for each motion step for (size_t step = 0; step < num_motion_steps; ++step) { // The center step for motion vertices is not stored in the attribute - const float3 *keys = hair->curve_keys.data(); + const float3 *keys = hair->get_curve_keys().data(); size_t center_step = (num_motion_steps - 1) / 2; if (step != center_step) { size_t attr_offset = (step > center_step) ? step - 1 : step; // Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4) - keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size(); + keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size(); } for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) { const Hair::Curve curve = hair->get_curve(j); +# if OPTIX_ABI_VERSION >= 36 + const array<float> &curve_radius = hair->get_curve_radius(); +# endif for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) { # if OPTIX_ABI_VERSION >= 36 @@ -1272,10 +1287,8 @@ class OptiXDevice : public CUDADevice { const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); - const float4 pw = make_float4(hair->curve_radius[ka], - hair->curve_radius[k0], - hair->curve_radius[k1], - hair->curve_radius[kb]); + const float4 pw = make_float4( + curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]); // Convert Catmull-Rom data to Bezier spline static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; @@ -1298,7 +1311,7 @@ class OptiXDevice : public CUDADevice { # endif { BoundBox bounds = BoundBox::empty; - curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds); + curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds); const size_t index = step * num_segments + i; aabb_data[index].minX = bounds.min.x; @@ -1381,37 +1394,41 @@ class OptiXDevice : public CUDADevice { } // Allocate memory for new BLAS and build it - OptixTraversableHandle handle; - if (build_optix_bvh(build_input, num_motion_steps, handle)) { - geometry.insert({ob->geometry, handle}); + if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) { + geometry.insert({ob->get_geometry(), handle}); + static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data; + static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle; + static_cast<BVHOptiX *>(geom->bvh)->do_refit = false; } else { return false; } } - else if (geom->type == Geometry::MESH || geom->type == Geometry::VOLUME) { + else if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) { // Build BLAS for triangle primitives - Mesh *const mesh = static_cast<Mesh *const>(ob->geometry); + Mesh *const mesh = static_cast<Mesh *const>(ob->get_geometry()); if (mesh->num_triangles() == 0) { continue; } - const size_t num_verts = mesh->verts.size(); + const size_t num_verts = mesh->get_verts().size(); size_t num_motion_steps = 1; Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - if (motion_blur && mesh->use_motion_blur && motion_keys) { - num_motion_steps = mesh->motion_steps; + if (motion_blur && mesh->get_use_motion_blur() && motion_keys) { + num_motion_steps = mesh->get_motion_steps(); } device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY); - index_data.alloc(mesh->triangles.size()); - memcpy(index_data.data(), mesh->triangles.data(), mesh->triangles.size() * sizeof(int)); + index_data.alloc(mesh->get_triangles().size()); + memcpy(index_data.data(), + mesh->get_triangles().data(), + mesh->get_triangles().size() * sizeof(int)); device_vector<float3> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY); vertex_data.alloc(num_verts * num_motion_steps); for (size_t step = 0; step < num_motion_steps; ++step) { - const float3 *verts = mesh->verts.data(); + const float3 *verts = mesh->get_verts().data(); size_t center_step = (num_motion_steps - 1) / 2; // The center step for motion vertices is not stored in the attribute @@ -1453,9 +1470,11 @@ class OptiXDevice : public CUDADevice { build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset; // Allocate memory for new BLAS and build it - OptixTraversableHandle handle; - if (build_optix_bvh(build_input, num_motion_steps, handle)) { - geometry.insert({ob->geometry, handle}); + if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) { + geometry.insert({ob->get_geometry(), handle}); + static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data; + static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle; + static_cast<BVHOptiX *>(geom->bvh)->do_refit = false; } else { return false; @@ -1464,8 +1483,10 @@ class OptiXDevice : public CUDADevice { } // Fill instance descriptions +# if OPTIX_ABI_VERSION < 41 device_vector<OptixAabb> aabbs(this, "tlas_aabbs", MEM_READ_ONLY); aabbs.alloc(bvh->objects.size()); +# endif device_vector<OptixInstance> instances(this, "tlas_instances", MEM_READ_ONLY); instances.alloc(bvh->objects.size()); @@ -1475,12 +1496,13 @@ class OptiXDevice : public CUDADevice { continue; // Create separate instance for triangle/curve meshes of an object - auto handle_it = geometry.find(ob->geometry); + const auto handle_it = geometry.find(ob->get_geometry()); if (handle_it == geometry.end()) { continue; } OptixTraversableHandle handle = handle_it->second; +# if OPTIX_ABI_VERSION < 41 OptixAabb &aabb = aabbs[num_instances]; aabb.minX = ob->bounds.min.x; aabb.minY = ob->bounds.min.y; @@ -1488,6 +1510,7 @@ class OptiXDevice : public CUDADevice { aabb.maxX = ob->bounds.max.x; aabb.maxY = ob->bounds.max.y; aabb.maxZ = ob->bounds.max.z; +# endif OptixInstance &instance = instances[num_instances++]; memset(&instance, 0, sizeof(instance)); @@ -1503,18 +1526,19 @@ class OptiXDevice : public CUDADevice { // Have to have at least one bit in the mask, or else instance would always be culled instance.visibilityMask = 1; - if (ob->geometry->has_volume) { + if (ob->get_geometry()->has_volume) { // Volumes have a special bit set in the visibility mask so a trace can mask only volumes instance.visibilityMask |= 2; } - if (ob->geometry->type == Geometry::HAIR) { + if (ob->get_geometry()->geometry_type == Geometry::HAIR) { // Same applies to curves (so they can be skipped in local trace calls) instance.visibilityMask |= 4; # if OPTIX_ABI_VERSION >= 36 - if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api && - static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) { + if (motion_blur && ob->get_geometry()->has_motion_blur() && + DebugFlags().optix.curves_api && + static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) { // Select between motion blur and non-motion blur built-in intersection module instance.sbtOffset = PG_HITD_MOTION - PG_HITD; } @@ -1523,7 +1547,7 @@ class OptiXDevice : public CUDADevice { // Insert motion traversable if object has motion if (motion_blur && ob->use_motion()) { - size_t motion_keys = max(ob->motion.size(), 2) - 2; + size_t motion_keys = max(ob->get_motion().size(), 2) - 2; size_t motion_transform_size = sizeof(OptixSRTMotionTransform) + motion_keys * sizeof(OptixSRTData); @@ -1537,16 +1561,17 @@ class OptiXDevice : public CUDADevice { OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>( new uint8_t[motion_transform_size]); motion_transform.child = handle; - motion_transform.motionOptions.numKeys = ob->motion.size(); + motion_transform.motionOptions.numKeys = ob->get_motion().size(); motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE; motion_transform.motionOptions.timeBegin = 0.0f; motion_transform.motionOptions.timeEnd = 1.0f; OptixSRTData *const srt_data = motion_transform.srtData; - array<DecomposedTransform> decomp(ob->motion.size()); - transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size()); + array<DecomposedTransform> decomp(ob->get_motion().size()); + transform_motion_decompose( + decomp.data(), ob->get_motion().data(), ob->get_motion().size()); - for (size_t i = 0; i < ob->motion.size(); ++i) { + for (size_t i = 0; i < ob->get_motion().size(); ++i) { // Scale srt_data[i].sx = decomp[i].y.w; // scale.x.x srt_data[i].sy = decomp[i].z.w; // scale.y.y @@ -1593,9 +1618,9 @@ class OptiXDevice : public CUDADevice { else { instance.traversableHandle = handle; - if (ob->geometry->is_instanced()) { + if (ob->get_geometry()->is_instanced()) { // Set transform matrix - memcpy(instance.transform, &ob->tfm, sizeof(instance.transform)); + memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform)); } else { // Disable instance transform if geometry already has it applied to vertex data @@ -1608,20 +1633,26 @@ class OptiXDevice : public CUDADevice { } // Upload instance descriptions +# if OPTIX_ABI_VERSION < 41 aabbs.resize(num_instances); aabbs.copy_to_device(); +# endif instances.resize(num_instances); instances.copy_to_device(); // Build top-level acceleration structure (TLAS) OptixBuildInput build_input = {}; build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES; - build_input.instanceArray.instances = instances.device_pointer; - build_input.instanceArray.numInstances = num_instances; +# if OPTIX_ABI_VERSION < 41 // Instance AABBs no longer need to be set since OptiX 7.2 build_input.instanceArray.aabbs = aabbs.device_pointer; build_input.instanceArray.numAabbs = num_instances; +# endif + build_input.instanceArray.instances = instances.device_pointer; + build_input.instanceArray.numInstances = num_instances; - return build_optix_bvh(build_input, 0, tlas_handle); + CUdeviceptr out_data = 0; + tlas_handle = 0; + return build_optix_bvh(build_input, 0, tlas_handle, out_data, OPTIX_BUILD_OPERATION_BUILD); } void const_copy_to(const char *name, void *host, size_t size) override @@ -1725,8 +1756,8 @@ bool device_optix_init() const OptixResult result = optixInit(); if (result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) { - VLOG(1) << "OptiX initialization failed because driver does not support ABI version " - << OPTIX_ABI_VERSION; + VLOG(1) << "OptiX initialization failed because the installed NVIDIA driver is too old. " + "Please update to the latest driver first!"; return false; } else if (result != OPTIX_SUCCESS) { |