diff options
-rw-r--r-- | intern/cycles/blender/addon/properties.py | 1 | ||||
-rw-r--r-- | intern/cycles/blender/addon/ui.py | 1 | ||||
-rw-r--r-- | intern/cycles/blender/blender_python.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 300 | ||||
-rw-r--r-- | intern/cycles/kernel/bvh/bvh.h | 23 | ||||
-rw-r--r-- | intern/cycles/kernel/geom/geom_curve_intersect.h | 9 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/optix/kernel_optix.cu | 48 | ||||
-rw-r--r-- | intern/cycles/util/util_debug.cpp | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_debug.h | 3 |
9 files changed, 303 insertions, 84 deletions
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 0cc419c7025..def35573ec3 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -830,6 +830,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): debug_use_cuda_split_kernel: BoolProperty(name="Split Kernel", default=False) debug_optix_cuda_streams: IntProperty(name="CUDA Streams", default=1, min=1) + debug_optix_curves_api: BoolProperty(name="Native OptiX Curve Primitive", default=False) debug_opencl_kernel_type: EnumProperty( name="OpenCL Kernel Type", diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index bf81a360020..cce3f805b39 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -2031,6 +2031,7 @@ class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel): col = layout.column() col.label(text="OptiX Flags:") col.prop(cscene, "debug_optix_cuda_streams") + col.prop(cscene, "debug_optix_curves_api") col.separator() diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index 3e595c3ee52..25c77b74ce3 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -92,6 +92,7 @@ bool debug_flags_sync_from_scene(BL::Scene b_scene) flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel"); /* Synchronize OptiX flags. */ flags.optix.cuda_streams = get_int(cscene, "debug_optix_cuda_streams"); + flags.optix.curves_api = get_boolean(cscene, "debug_optix_curves_api"); /* Synchronize OpenCL device type. */ switch (get_enum(cscene, "debug_opencl_device_type")) { case 0: diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index ececca3df53..266222c74c5 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -131,8 +131,12 @@ class OptiXDevice : public CUDADevice { PG_RGEN, PG_MISS, PG_HITD, // Default hit group - PG_HITL, // __BVH_LOCAL__ hit group PG_HITS, // __SHADOW_RECORD_ALL__ hit group + PG_HITL, // __BVH_LOCAL__ hit group (only used for triangles) +# if OPTIX_ABI_VERSION >= 36 + PG_HITD_MOTION, + PG_HITS_MOTION, +# endif # ifdef WITH_CYCLES_DEBUG PG_EXCP, # endif @@ -177,6 +181,7 @@ class OptiXDevice : public CUDADevice { OptixDeviceContext context = NULL; OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module + OptixModule builtin_modules[2] = {}; OptixPipeline pipelines[NUM_PIPELINES] = {}; bool motion_blur = false; @@ -264,6 +269,9 @@ class OptiXDevice : public CUDADevice { // Unload modules if (optix_module != NULL) optixModuleDestroy(optix_module); + for (unsigned int i = 0; i < 2; ++i) + if (builtin_modules[i] != NULL) + optixModuleDestroy(builtin_modules[i]); for (unsigned int i = 0; i < NUM_PIPELINES; ++i) if (pipelines[i] != NULL) optixPipelineDestroy(pipelines[i]); @@ -338,6 +346,12 @@ class OptiXDevice : public CUDADevice { optixModuleDestroy(optix_module); optix_module = NULL; } + for (unsigned int i = 0; i < 2; ++i) { + if (builtin_modules[i] != NULL) { + optixModuleDestroy(builtin_modules[i]); + builtin_modules[i] = NULL; + } + } for (unsigned int i = 0; i < NUM_PIPELINES; ++i) { if (pipelines[i] != NULL) { optixPipelineDestroy(pipelines[i]); @@ -369,6 +383,18 @@ class OptiXDevice : public CUDADevice { # endif pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h +# if OPTIX_ABI_VERSION >= 36 + pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; + if (requested_features.use_hair) { + if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { + pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE; + } + else { + pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; + } + } +# endif + // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds // This is necessary since objects may be reported to have motion if the Vector pass is // active, but may still need to be rendered without motion blur if that isn't active as well @@ -442,6 +468,34 @@ class OptiXDevice : public CUDADevice { group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon"; } + +# if OPTIX_ABI_VERSION >= 36 + if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) { + OptixBuiltinISOptions builtin_options; + builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; + builtin_options.usesMotionBlur = false; + + check_result_optix_ret(optixBuiltinISModuleGet( + context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0])); + + group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0]; + group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr; + group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0]; + group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr; + + if (motion_blur) { + builtin_options.usesMotionBlur = true; + + check_result_optix_ret(optixBuiltinISModuleGet( + context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1])); + + group_descs[PG_HITD_MOTION] = group_descs[PG_HITD]; + group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1]; + group_descs[PG_HITS_MOTION] = group_descs[PG_HITS]; + group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1]; + } + } +# endif } if (requested_features.use_subsurface || requested_features.use_shader_raytrace) { @@ -493,8 +547,14 @@ class OptiXDevice : public CUDADevice { unsigned int trace_css = stack_size[PG_HITD].cssCH; // This is based on the maximum of closest-hit and any-hit/intersection programs trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH); - trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH); + trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH); +# if OPTIX_ABI_VERSION >= 36 + trace_css = std::max(trace_css, + stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH); + trace_css = std::max(trace_css, + stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); +# endif OptixPipelineLinkOptions link_options; link_options.maxTraceDepth = 1; @@ -503,17 +563,23 @@ class OptiXDevice : public CUDADevice { # else link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; # endif - link_options.overrideUsesMotionBlur = pipeline_options.usesMotionBlur; +# if OPTIX_ABI_VERSION < 24 + link_options.overrideUsesMotionBlur = motion_blur; +# endif { // Create path tracing pipeline OptixProgramGroup pipeline_groups[] = { - groups[PG_RGEN], - groups[PG_MISS], - groups[PG_HITD], - groups[PG_HITS], - groups[PG_HITL], + groups[PG_RGEN], + groups[PG_MISS], + groups[PG_HITD], + groups[PG_HITS], + groups[PG_HITL], +# if OPTIX_ABI_VERSION >= 36 + groups[PG_HITD_MOTION], + groups[PG_HITS_MOTION], +# endif # ifdef WITH_CYCLES_DEBUG - groups[PG_EXCP], + groups[PG_EXCP], # endif }; check_result_optix_ret( @@ -530,8 +596,8 @@ class OptiXDevice : public CUDADevice { const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css; // Set stack size depending on pipeline options - check_result_optix_ret(optixPipelineSetStackSize( - pipelines[PIP_PATH_TRACE], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2))); + check_result_optix_ret( + optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2))); } // Only need to create shader evaluation pipeline if one of these features is used: @@ -541,15 +607,19 @@ class OptiXDevice : public CUDADevice { if (use_shader_eval_pipeline) { // Create shader evaluation pipeline OptixProgramGroup pipeline_groups[] = { - groups[PG_BAKE], - groups[PG_DISP], - groups[PG_BACK], - groups[PG_MISS], - groups[PG_HITD], - groups[PG_HITS], - groups[PG_HITL], + groups[PG_BAKE], + groups[PG_DISP], + groups[PG_BACK], + groups[PG_MISS], + groups[PG_HITD], + groups[PG_HITS], + groups[PG_HITL], +# if OPTIX_ABI_VERSION >= 36 + groups[PG_HITD_MOTION], + groups[PG_HITS_MOTION], +# endif # ifdef WITH_CYCLES_DEBUG - groups[PG_EXCP], + groups[PG_EXCP], # endif }; check_result_optix_ret( @@ -672,7 +742,11 @@ class OptiXDevice : public CUDADevice { sbt_params.missRecordCount = 1; sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); - sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS +# if OPTIX_ABI_VERSION >= 36 + sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL +# else + sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL +# endif // Launch the ray generation program check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE], @@ -836,7 +910,9 @@ class OptiXDevice : public CUDADevice { assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3); denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>( OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1)); +# if OPTIX_ABI_VERSION < 28 denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3; +# endif check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser)); check_result_optix_ret( optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0)); @@ -849,7 +925,11 @@ class OptiXDevice : public CUDADevice { check_result_optix_ret( optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes)); +# if OPTIX_ABI_VERSION < 28 const size_t scratch_size = sizes.recommendedScratchSizeInBytes; +# else + const size_t scratch_size = sizes.withOverlapScratchSizeInBytes; +# endif const size_t scratch_offset = sizes.stateSizeInBytes; // Allocate denoiser state if tile size has changed since last setup @@ -993,7 +1073,11 @@ class OptiXDevice : public CUDADevice { sbt_params.missRecordCount = 1; sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord); sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord); - sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS +# if OPTIX_ABI_VERSION >= 36 + sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL +# else + sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL +# endif check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL], cuda_stream[thread_index], @@ -1070,7 +1154,7 @@ class OptiXDevice : public CUDADevice { &build_input, 1, temp_mem.device_pointer, - temp_mem.device_size, + sizes.tempSizeInBytes, out_data, sizes.outputSizeInBytes, &out_handle, @@ -1142,7 +1226,6 @@ class OptiXDevice : public CUDADevice { continue; } - const size_t num_curves = hair->num_curves(); const size_t num_segments = hair->num_segments(); size_t num_motion_steps = 1; @@ -1152,7 +1235,18 @@ class OptiXDevice : public CUDADevice { } device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY); - aabb_data.alloc(num_segments * num_motion_steps); +# if OPTIX_ABI_VERSION >= 36 + device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY); + device_vector<float4> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY); + // Four control points for each curve segment + const size_t num_vertices = num_segments * 4; + if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { + index_data.alloc(num_segments); + vertex_data.alloc(num_vertices * num_motion_steps); + } + else +# endif + aabb_data.alloc(num_segments * num_motion_steps); // Get AABBs for each motion step for (size_t step = 0; step < num_motion_steps; ++step) { @@ -1165,44 +1259,127 @@ class OptiXDevice : public CUDADevice { keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size(); } - size_t i = step * num_segments; - for (size_t j = 0; j < num_curves; ++j) { - const Hair::Curve c = hair->get_curve(j); - - for (size_t k = 0; k < c.num_segments(); ++i, ++k) { - BoundBox bounds = BoundBox::empty; - c.bounds_grow(k, keys, hair->curve_radius.data(), bounds); - - aabb_data[i].minX = bounds.min.x; - aabb_data[i].minY = bounds.min.y; - aabb_data[i].minZ = bounds.min.z; - aabb_data[i].maxX = bounds.max.x; - aabb_data[i].maxY = bounds.max.y; - aabb_data[i].maxZ = bounds.max.z; + for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) { + const Hair::Curve curve = hair->get_curve(j); + + for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) { +# if OPTIX_ABI_VERSION >= 36 + if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { + int k0 = curve.first_key + segment; + int k1 = k0 + 1; + int ka = max(k0 - 1, curve.first_key); + int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1); + + const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); + const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); + const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); + const float4 pw = make_float4(hair->curve_radius[ka], + hair->curve_radius[k0], + hair->curve_radius[k1], + hair->curve_radius[kb]); + + // Convert Catmull-Rom data to Bezier spline + static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; + static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; + static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; + static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f; + + index_data[i] = i * 4; + float4 *const v = vertex_data.data() + step * num_vertices + index_data[i]; + v[0] = make_float4( + dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw)); + v[1] = make_float4( + dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw)); + v[2] = make_float4( + dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw)); + v[3] = make_float4( + dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw)); + } + else +# endif + { + BoundBox bounds = BoundBox::empty; + curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds); + + const size_t index = step * num_segments + i; + aabb_data[index].minX = bounds.min.x; + aabb_data[index].minY = bounds.min.y; + aabb_data[index].minZ = bounds.min.z; + aabb_data[index].maxX = bounds.max.x; + aabb_data[index].maxY = bounds.max.y; + aabb_data[index].maxZ = bounds.max.z; + } } } } // Upload AABB data to GPU aabb_data.copy_to_device(); +# if OPTIX_ABI_VERSION >= 36 + index_data.copy_to_device(); + vertex_data.copy_to_device(); +# endif vector<device_ptr> aabb_ptrs; aabb_ptrs.reserve(num_motion_steps); +# if OPTIX_ABI_VERSION >= 36 + vector<device_ptr> width_ptrs; + vector<device_ptr> vertex_ptrs; + width_ptrs.reserve(num_motion_steps); + vertex_ptrs.reserve(num_motion_steps); +# endif for (size_t step = 0; step < num_motion_steps; ++step) { aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb)); +# if OPTIX_ABI_VERSION >= 36 + const device_ptr base_ptr = vertex_data.device_pointer + + step * num_vertices * sizeof(float4); + width_ptrs.push_back(base_ptr + 3 * sizeof(float)); // Offset by vertex size + vertex_ptrs.push_back(base_ptr); +# endif } - // Disable visibility test anyhit program, since it is already checked during intersection - // Those trace calls that require anyhit can force it with OPTIX_RAY_FLAG_ENFORCE_ANYHIT - unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; + // Force a single any-hit call, so shadow record-all behavior works correctly + unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL; OptixBuildInput build_input = {}; - build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; - build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); - build_input.aabbArray.numPrimitives = num_segments; - build_input.aabbArray.strideInBytes = sizeof(OptixAabb); - build_input.aabbArray.flags = &build_flags; - build_input.aabbArray.numSbtRecords = 1; - build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset; +# if OPTIX_ABI_VERSION >= 36 + if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) { + build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES; + build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; + build_input.curveArray.numPrimitives = num_segments; + build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); + build_input.curveArray.numVertices = num_vertices; + build_input.curveArray.vertexStrideInBytes = sizeof(float4); + build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data(); + build_input.curveArray.widthStrideInBytes = sizeof(float4); + build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer; + build_input.curveArray.indexStrideInBytes = sizeof(int); + build_input.curveArray.flag = build_flags; + build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset; + } + else +# endif + { + // Disable visibility test any-hit program, since it is already checked during + // intersection. Those trace calls that require anyhit can force it with a ray flag. + build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; + + build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; +# if OPTIX_ABI_VERSION < 23 + build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); + build_input.aabbArray.numPrimitives = num_segments; + build_input.aabbArray.strideInBytes = sizeof(OptixAabb); + build_input.aabbArray.flags = &build_flags; + build_input.aabbArray.numSbtRecords = 1; + build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset; +# else + build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); + build_input.customPrimitiveArray.numPrimitives = num_segments; + build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb); + build_input.customPrimitiveArray.flags = &build_flags; + build_input.customPrimitiveArray.numSbtRecords = 1; + build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset; +# endif + } // Allocate memory for new BLAS and build it OptixTraversableHandle handle; @@ -1257,8 +1434,8 @@ class OptiXDevice : public CUDADevice { vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3)); } - // No special build flags for triangle primitives - unsigned int build_flags = OPTIX_GEOMETRY_FLAG_NONE; + // Force a single any-hit call, so shadow record-all behavior works correctly + unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL; OptixBuildInput build_input = {}; build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); @@ -1324,9 +1501,26 @@ class OptiXDevice : public CUDADevice { // Set user instance ID to object index instance.instanceId = ob->get_device_index(); - // Volumes have a special bit set in the visibility mask so a trace can mask only volumes - // See 'scene_intersect_volume' in bvh.h - instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1); + // Have to have at least one bit in the mask, or else instance would always be culled + instance.visibilityMask = 1; + + if (ob->geometry->has_volume) { + // Volumes have a special bit set in the visibility mask so a trace can mask only volumes + instance.visibilityMask |= 2; + } + + if (ob->geometry->type == Geometry::HAIR) { + // Same applies to curves (so they can be skipped in local trace calls) + instance.visibilityMask |= 4; + +# if OPTIX_ABI_VERSION >= 36 + if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api && + static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) { + // Select between motion blur and non-motion blur built-in intersection module + instance.sbtOffset = PG_HITD_MOTION - PG_HITD; + } +# endif + } // Insert motion traversable if object has motion if (motion_blur && ob->use_motion()) { diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index 80b58f46329..3049f243ae9 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -172,11 +172,11 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, 0.0f, ray->t, ray->time, - 0xFF, + 0xF, OPTIX_RAY_FLAG_NONE, + 0, // SBT offset for PG_HITD 0, 0, - 0, // SBT offset for PG_HITD p0, p1, p2, @@ -264,12 +264,13 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, 0.0f, ray->t, ray->time, + // Skip curves + 0x3, // Need to always call into __anyhit__kernel_optix_local_hit - 0xFF, OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 1, + 2, // SBT offset for PG_HITL + 0, 0, - 0, // SBT offset for PG_HITL p0, p1, p2, @@ -374,12 +375,12 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, 0.0f, ray->t, ray->time, + 0xF, // Need to always call into __anyhit__kernel_optix_shadow_all_hit - 0xFF, OPTIX_RAY_FLAG_ENFORCE_ANYHIT, - 2, + 1, // SBT offset for PG_HITS + 0, 0, - 0, // SBT offset for PG_HITS p0, p1, *num_hits, @@ -458,12 +459,12 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, 0.0f, ray->t, ray->time, - // Visibility mask set to only intersect objects with volumes - 0x02, + // Skip everything but volumes + 0x2, OPTIX_RAY_FLAG_NONE, + 0, // SBT offset for PG_HITD 0, 0, - 0, // SBT offset for PG_HITD p0, p1, p2, diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index c04dbee52cc..06d2c016f5b 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -734,7 +734,6 @@ ccl_device_inline void curve_shader_setup(KernelGlobals *kg, } sd->u = isect->u; - sd->v = isect->v; P = P + D * t; @@ -750,6 +749,7 @@ ccl_device_inline void curve_shader_setup(KernelGlobals *kg, sd->N = normalize(sine * bitangent - cosine * normalize(cross(tangent, bitangent))); sd->Ng = -D; + sd->v = isect->v; # if 0 /* This approximates the position and geometric normal of a thick curve too, @@ -764,8 +764,11 @@ ccl_device_inline void curve_shader_setup(KernelGlobals *kg, * This could be optimized by recording the normal in the intersection, * however for Optix this would go beyond the size of the payload. */ const float3 P_inside = float4_to_float3(catmull_rom_basis_eval(P_curve, isect->u)); - sd->Ng = normalize(P - P_inside); - sd->N = sd->Ng; + const float3 Ng = normalize(P - P_inside); + + sd->N = Ng; + sd->Ng = Ng; + sd->v = 0.0f; } # ifdef __DPDU__ diff --git a/intern/cycles/kernel/kernels/optix/kernel_optix.cu b/intern/cycles/kernel/kernels/optix/kernel_optix.cu index c730d952ed4..3b166e59dfd 100644 --- a/intern/cycles/kernel/kernels/optix/kernel_optix.cu +++ b/intern/cycles/kernel/kernels/optix/kernel_optix.cu @@ -15,6 +15,7 @@ * limitations under the License. */ +// clang-format off #include "kernel/kernel_compat_optix.h" #include "util/util_atomic.h" #include "kernel/kernel_types.h" @@ -23,6 +24,7 @@ #include "kernel/kernel_path.h" #include "kernel/kernel_bake.h" +// clang-format on template<typename T> ccl_device_forceinline T *get_payload_ptr_0() { @@ -139,8 +141,8 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() } else { if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { - // Record closest intersection only (do not terminate ray here, since there is no guarantee - // about distance ordering in anyhit) + // Record closest intersection only + // Do not terminate ray here, since there is no guarantee about distance ordering in any-hit return optixIgnoreIntersection(); } @@ -153,15 +155,9 @@ extern "C" __global__ void __anyhit__kernel_optix_local_hit() isect->object = get_object_id(); isect->type = kernel_tex_fetch(__prim_type, isect->prim); - if (optixIsTriangleHit()) { - const float2 barycentrics = optixGetTriangleBarycentrics(); - isect->u = 1.0f - barycentrics.y - barycentrics.x; - isect->v = barycentrics.x; - } - else { - isect->u = __uint_as_float(optixGetAttribute_0()); - isect->v = __uint_as_float(optixGetAttribute_1()); - } + const float2 barycentrics = optixGetTriangleBarycentrics(); + isect->u = 1.0f - barycentrics.y - barycentrics.x; + isect->v = barycentrics.x; // Record geometric normal const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); @@ -198,10 +194,18 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() isect->u = 1.0f - barycentrics.y - barycentrics.x; isect->v = barycentrics.x; } +# ifdef __HAIR__ else { - isect->u = __uint_as_float(optixGetAttribute_0()); + const float u = __uint_as_float(optixGetAttribute_0()); + isect->u = u; isect->v = __uint_as_float(optixGetAttribute_1()); + + // Filter out curve endcaps + if (u == 0.0f || u == 1.0f) { + return optixIgnoreIntersection(); + } } +# endif # ifdef __TRANSPARENT_SHADOWS__ // Detect if this surface has a shader with transparent shadows @@ -213,7 +217,6 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() # ifdef __TRANSPARENT_SHADOWS__ } - // TODO(pmours): Do we need REQUIRE_UNIQUE_ANYHIT for this to work? optixSetPayload_2(optixGetPayload_2() + 1); // num_hits++ // Continue tracing @@ -227,13 +230,25 @@ extern "C" __global__ void __anyhit__kernel_optix_visibility_test() uint visibility = optixGetPayload_4(); #ifdef __VISIBILITY_FLAG__ const uint prim = optixGetPrimitiveIndex(); - if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) + if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { return optixIgnoreIntersection(); + } +#endif + +#ifdef __HAIR__ + if (!optixIsTriangleHit()) { + // Filter out curve endcaps + const float u = __uint_as_float(optixGetAttribute_0()); + if (u == 0.0f || u == 1.0f) { + return optixIgnoreIntersection(); + } + } #endif // Shadow ray early termination - if (visibility & PATH_RAY_SHADOW_OPAQUE) + if (visibility & PATH_RAY_SHADOW_OPAQUE) { return optixTerminateRay(); + } } extern "C" __global__ void __closesthit__kernel_optix_hit() @@ -250,7 +265,7 @@ extern "C" __global__ void __closesthit__kernel_optix_hit() optixSetPayload_2(__float_as_uint(barycentrics.x)); } else { - optixSetPayload_1(optixGetAttribute_0()); + optixSetPayload_1(optixGetAttribute_0()); // Same as 'optixGetCurveParameter()' optixSetPayload_2(optixGetAttribute_1()); } } @@ -286,7 +301,6 @@ ccl_device_inline void optix_intersection_curve(const uint prim, const uint type __float_as_int(isect.u), // Attribute_0 __float_as_int(isect.v)); // Attribute_1 } - } extern "C" __global__ void __intersection__curve_ribbon() diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp index 6ad4f709ab5..74ecefa1917 100644 --- a/intern/cycles/util/util_debug.cpp +++ b/intern/cycles/util/util_debug.cpp @@ -83,6 +83,7 @@ DebugFlags::OptiX::OptiX() void DebugFlags::OptiX::reset() { cuda_streams = 1; + curves_api = false; } DebugFlags::OpenCL::OpenCL() : device_type(DebugFlags::OpenCL::DEVICE_ALL), debug(false) diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index da9f5408b59..6ac4beb55b8 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -108,6 +108,9 @@ class DebugFlags { /* Number of CUDA streams to launch kernels concurrently from. */ int cuda_streams; + + /* Use OptiX curves API for hair instead of custom implementation. */ + bool curves_api; }; /* Descriptor of OpenCL feature-set to be used. */ |