diff options
author | Patrick Mours <pmours@nvidia.com> | 2020-07-06 13:25:54 +0300 |
---|---|---|
committer | Patrick Mours <pmours@nvidia.com> | 2020-07-07 16:39:02 +0300 |
commit | 737bd549b6eeee81f0573ad8e305fb8d888d82ec (patch) | |
tree | fc8aaca871557d652287cb3d4f9ee3033ff06e46 /intern/cycles/device | |
parent | 95f0f312799e10e4a9f5c884f8f20ec76d0ff363 (diff) |
Cycles: Add support for native OptiX curve primitive
This patch adds support for the curve primitive from OptiX to Cycles. It's currently hidden
behind a debug option, since there can be some slight rendering differences still (because no
backface culling is performed and something seems off with endcaps). The curve primitive
was added with the OptiX 7.1 SDK and requires a r450 driver or newer, so this also updates
the codebase to be able to build with the new SDK.
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D8223
Diffstat (limited to 'intern/cycles/device')
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 300 |
1 files changed, 247 insertions, 53 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index ececca3df53..266222c74c5 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -131,8 +131,12 @@ class OptiXDevice : public CUDADevice { PG_RGEN, PG_MISS, PG_HITD, // Default hit group - PG_HITL, // __BVH_LOCAL__ hit group PG_HITS, // __SHADOW_RECORD_ALL__ hit group + PG_HITL, // __BVH_LOCAL__ hit group (only used for triangles) +# if OPTIX_ABI_VERSION >= 36 + PG_HITD_MOTION, + PG_HITS_MOTION, +# endif # ifdef WITH_CYCLES_DEBUG PG_EXCP, # endif @@ -177,6 +181,7 @@ class OptiXDevice : public CUDADevice { OptixDeviceContext context = NULL; OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module + OptixModule builtin_modules[2] = {}; OptixPipeline pipelines[NUM_PIPELINES] = {}; bool motion_blur = false; @@ -264,6 +269,9 @@ class OptiXDevice : public CUDADevice { // Unload modules if (optix_module != NULL) optixModuleDestroy(optix_module); + for (unsigned int i = 0; i < 2; ++i) + if (builtin_modules[i] != NULL) + optixModuleDestroy(builtin_modules[i]); for (unsigned int i = 0; i < NUM_PIPELINES; ++i) if (pipelines[i] != NULL) optixPipelineDestroy(pipelines[i]); @@ -338,6 +346,12 @@ class OptiXDevice : public CUDADevice { optixModuleDestroy(optix_module); optix_module = NULL; } + for (unsigned int i = 0; i < 2; ++i) { + if (builtin_modules[i] != NULL) { + optixModuleDestroy(builtin_modules[i]); + builtin_modules[i] = NULL; + } + } for (unsigned int i = 0; i < NUM_PIPELINES; ++i) { if (pipelines[i] != NULL) { optixPipelineDestroy(pipelines[i]); @@ -369,6 +383,18 @@ class OptiXDevice : public CUDADevice { # endif pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h +# if OPTIX_ABI_VERSION >= 36 + pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; + if (requested_features.use_hair) { + if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { + pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE; + } + else { + pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; + } + } +# endif + // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds // This is necessary since objects may be reported to have motion if the Vector pass is // active, but may still need to be rendered without motion blur if that isn't active as well @@ -442,6 +468,34 @@ class OptiXDevice : public CUDADevice { group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; } + +# if OPTIX_ABI_VERSION >= 36 + if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { + OptixBuiltinISOptions builtin_options; + builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; + builtin_options.usesMotionBlur = false; + + check_result_optix_ret(optixBuiltinISModuleGet( + context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0])); + + group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0]; + group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr; + group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0]; + group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr; + + if (motion_blur) { + builtin_options.usesMotionBlur = true; + + check_result_optix_ret(optixBuiltinISModuleGet( + context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1])); + + group_descs[PG_HITD_MOTION] = group_descs[PG_HITD]; + group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1]; + group_descs[PG_HITS_MOTION] = group_descs[PG_HITS]; + group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1]; + } + } +# endif } if (requested_features.use_subsurface || requested_features.use_shader_raytrace) { @@ -493,8 +547,14 @@ class OptiXDevice : public CUDADevice { unsigned int trace_css = stack_size[PG_HITD].cssCH; // This is based on the maximum of closest-hit and any-hit/intersection programs trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); - trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); + trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); +# if OPTIX_ABI_VERSION >= 36 + trace_css = std::max(trace_css, + stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH); + trace_css = std::max(trace_css, + stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); +# endif OptixPipelineLinkOptions link_options; link_options.maxTraceDepth = 1; @@ -503,17 +563,23 @@ class OptiXDevice : public CUDADevice { # else link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; # endif - link_options.overrideUsesMotionBlur = pipeline_options.usesMotionBlur; +# if OPTIX_ABI_VERSION < 24 + link_options.overrideUsesMotionBlur = motion_blur; +# endif { // Create path tracing pipeline OptixProgramGroup pipeline_groups[] = { - groups[PG_RGEN], - groups[PG_MISS], - groups[PG_HITD], - groups[PG_HITS], - groups[PG_HITL], + groups[PG_RGEN], + groups[PG_MISS], + groups[PG_HITD], + groups[PG_HITS], + groups[PG_HITL], +# if OPTIX_ABI_VERSION >= 36 + groups[PG_HITD_MOTION], + groups[PG_HITS_MOTION], +# endif # ifdef WITH_CYCLES_DEBUG - groups[PG_EXCP], + groups[PG_EXCP], # endif }; check_result_optix_ret( @@ -530,8 +596,8 @@ class OptiXDevice : public CUDADevice { const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css; // Set stack size depending on pipeline options - check_result_optix_ret(optixPipelineSetStackSize( - pipelines[PIP_PATH_TRACE], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2))); + check_result_optix_ret( + optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2))); } // Only need to create shader evaluation pipeline if one of these features is used: @@ -541,15 +607,19 @@ class OptiXDevice : public CUDADevice { if (use_shader_eval_pipeline) { // Create shader evaluation pipeline OptixProgramGroup pipeline_groups[] = { - groups[PG_BAKE], - groups[PG_DISP], - groups[PG_BACK], - groups[PG_MISS], - groups[PG_HITD], - groups[PG_HITS], - groups[PG_HITL], + groups[PG_BAKE], + groups[PG_DISP], + groups[PG_BACK], + groups[PG_MISS], + groups[PG_HITD], + groups[PG_HITS], + groups[PG_HITL], +# if OPTIX_ABI_VERSION >= 36 + groups[PG_HITD_MOTION], + groups[PG_HITS_MOTION], +# endif # ifdef WITH_CYCLES_DEBUG - groups[PG_EXCP], + groups[PG_EXCP], # endif }; check_result_optix_ret( @@ -672,7 +742,11 @@ class OptiXDevice : public CUDADevice { sbt_params.missRecordCount = 1; sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); - sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS +# if OPTIX_ABI_VERSION >= 36 + sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL +# else + sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL +# endif // Launch the ray generation program check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE], @@ -836,7 +910,9 @@ class OptiXDevice : public CUDADevice { assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3); denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>( OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1)); +# if OPTIX_ABI_VERSION < 28 denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3; +# endif check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); check_result_optix_ret( optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); @@ -849,7 +925,11 @@ class OptiXDevice : public CUDADevice { check_result_optix_ret( optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes)); +# if OPTIX_ABI_VERSION < 28 const size_t scratch_size = sizes.recommendedScratchSizeInBytes; +# else + const size_t scratch_size = sizes.withOverlapScratchSizeInBytes; +# endif const size_t scratch_offset = sizes.stateSizeInBytes; // Allocate denoiser state if tile size has changed since last setup @@ -993,7 +1073,11 @@ class OptiXDevice : public CUDADevice { sbt_params.missRecordCount = 1; sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); - sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS +# if OPTIX_ABI_VERSION >= 36 + sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL +# else + sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL +# endif check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL], cuda_stream[thread_index], @@ -1070,7 +1154,7 @@ class OptiXDevice : public CUDADevice { &build_input, 1, temp_mem.device_pointer, - temp_mem.device_size, + sizes.tempSizeInBytes, out_data, sizes.outputSizeInBytes, &out_handle, @@ -1142,7 +1226,6 @@ class OptiXDevice : public CUDADevice { continue; } - const size_t num_curves = hair->num_curves(); const size_t num_segments = hair->num_segments(); size_t num_motion_steps = 1; @@ -1152,7 +1235,18 @@ class OptiXDevice : public CUDADevice { } device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY); - aabb_data.alloc(num_segments * num_motion_steps); +# if OPTIX_ABI_VERSION >= 36 + device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY); + device_vector<float4> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY); + // Four control points for each curve segment + const size_t num_vertices = num_segments * 4; + if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { + index_data.alloc(num_segments); + vertex_data.alloc(num_vertices * num_motion_steps); + } + else +# endif + aabb_data.alloc(num_segments * num_motion_steps); // Get AABBs for each motion step for (size_t step = 0; step < num_motion_steps; ++step) { @@ -1165,44 +1259,127 @@ class OptiXDevice : public CUDADevice { keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size(); } - size_t i = step * num_segments; - for (size_t j = 0; j < num_curves; ++j) { - const Hair::Curve c = hair->get_curve(j); - - for (size_t k = 0; k < c.num_segments(); ++i, ++k) { - BoundBox bounds = BoundBox::empty; - c.bounds_grow(k, keys, hair->curve_radius.data(), bounds); - - aabb_data[i].minX = bounds.min.x; - aabb_data[i].minY = bounds.min.y; - aabb_data[i].minZ = bounds.min.z; - aabb_data[i].maxX = bounds.max.x; - aabb_data[i].maxY = bounds.max.y; - aabb_data[i].maxZ = bounds.max.z; + for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) { + const Hair::Curve curve = hair->get_curve(j); + + for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) { +# if OPTIX_ABI_VERSION >= 36 + if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { + int k0 = curve.first_key + segment; + int k1 = k0 + 1; + int ka = max(k0 - 1, curve.first_key); + int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1); + + const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); + const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); + const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); + const float4 pw = make_float4(hair->curve_radius[ka], + hair->curve_radius[k0], + hair->curve_radius[k1], + hair->curve_radius[kb]); + + // Convert Catmull-Rom data to Bezier spline + static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; + static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; + static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; + static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f; + + index_data[i] = i * 4; + float4 *const v = vertex_data.data() + step * num_vertices + index_data[i]; + v[0] = make_float4( + dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw)); + v[1] = make_float4( + dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw)); + v[2] = make_float4( + dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw)); + v[3] = make_float4( + dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw)); + } + else +# endif + { + BoundBox bounds = BoundBox::empty; + curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds); + + const size_t index = step * num_segments + i; + aabb_data[index].minX = bounds.min.x; + aabb_data[index].minY = bounds.min.y; + aabb_data[index].minZ = bounds.min.z; + aabb_data[index].maxX = bounds.max.x; + aabb_data[index].maxY = bounds.max.y; + aabb_data[index].maxZ = bounds.max.z; + } } } } // Upload AABB data to GPU aabb_data.copy_to_device(); +# if OPTIX_ABI_VERSION >= 36 + index_data.copy_to_device(); + vertex_data.copy_to_device(); +# endif vector<device_ptr> aabb_ptrs; aabb_ptrs.reserve(num_motion_steps); +# if OPTIX_ABI_VERSION >= 36 + vector<device_ptr> width_ptrs; + vector<device_ptr> vertex_ptrs; + width_ptrs.reserve(num_motion_steps); + vertex_ptrs.reserve(num_motion_steps); +# endif for (size_t step = 0; step < num_motion_steps; ++step) { aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb)); +# if OPTIX_ABI_VERSION >= 36 + const device_ptr base_ptr = vertex_data.device_pointer + + step * num_vertices * sizeof(float4); + width_ptrs.push_back(base_ptr + 3 * sizeof(float)); // Offset by vertex size + vertex_ptrs.push_back(base_ptr); +# endif } - // Disable visibility test anyhit program, since it is already checked during intersection - // Those trace calls that require anyhit can force it with OPTIX_RAY_FLAG_ENFORCE_ANYHIT - unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; + // Force a single any-hit call, so shadow record-all behavior works correctly + unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL; OptixBuildInput build_input = {}; - build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; - build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); - build_input.aabbArray.numPrimitives = num_segments; - build_input.aabbArray.strideInBytes = sizeof(OptixAabb); - build_input.aabbArray.flags = &build_flags; - build_input.aabbArray.numSbtRecords = 1; - build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset; +# if OPTIX_ABI_VERSION >= 36 + if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { + build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES; + build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; + build_input.curveArray.numPrimitives = num_segments; + build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); + build_input.curveArray.numVertices = num_vertices; + build_input.curveArray.vertexStrideInBytes = sizeof(float4); + build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data(); + build_input.curveArray.widthStrideInBytes = sizeof(float4); + build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer; + build_input.curveArray.indexStrideInBytes = sizeof(int); + build_input.curveArray.flag = build_flags; + build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset; + } + else +# endif + { + // Disable visibility test any-hit program, since it is already checked during + // intersection. Those trace calls that require anyhit can force it with a ray flag. + build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; + + build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; +# if OPTIX_ABI_VERSION < 23 + build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); + build_input.aabbArray.numPrimitives = num_segments; + build_input.aabbArray.strideInBytes = sizeof(OptixAabb); + build_input.aabbArray.flags = &build_flags; + build_input.aabbArray.numSbtRecords = 1; + build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset; +# else + build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); + build_input.customPrimitiveArray.numPrimitives = num_segments; + build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb); + build_input.customPrimitiveArray.flags = &build_flags; + build_input.customPrimitiveArray.numSbtRecords = 1; + build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset; +# endif + } // Allocate memory for new BLAS and build it OptixTraversableHandle handle; @@ -1257,8 +1434,8 @@ class OptiXDevice : public CUDADevice { vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3)); } - // No special build flags for triangle primitives - unsigned int build_flags = OPTIX_GEOMETRY_FLAG_NONE; + // Force a single any-hit call, so shadow record-all behavior works correctly + unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL; OptixBuildInput build_input = {}; build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); @@ -1324,9 +1501,26 @@ class OptiXDevice : public CUDADevice { // Set user instance ID to object index instance.instanceId = ob->get_device_index(); - // Volumes have a special bit set in the visibility mask so a trace can mask only volumes - // See 'scene_intersect_volume' in bvh.h - instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1); + // Have to have at least one bit in the mask, or else instance would always be culled + instance.visibilityMask = 1; + + if (ob->geometry->has_volume) { + // Volumes have a special bit set in the visibility mask so a trace can mask only volumes + instance.visibilityMask |= 2; + } + + if (ob->geometry->type == Geometry::HAIR) { + // Same applies to curves (so they can be skipped in local trace calls) + instance.visibilityMask |= 4; + +# if OPTIX_ABI_VERSION >= 36 + if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api && + static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) { + // Select between motion blur and non-motion blur built-in intersection module + instance.sbtOffset = PG_HITD_MOTION - PG_HITD; + } +# endif + } // Insert motion traversable if object has motion if (motion_blur && ob->use_motion()) { |