diff options
Diffstat (limited to 'intern/cycles/device/optix/device_impl.cpp')
-rw-r--r-- | intern/cycles/device/optix/device_impl.cpp | 361 |
1 files changed, 257 insertions, 104 deletions
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index bb690551c04..38cc3330ebd 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -28,6 +28,7 @@ # include "scene/mesh.h" # include "scene/object.h" # include "scene/pass.h" +# include "scene/pointcloud.h" # include "scene/scene.h" # include "util/debug.h" @@ -41,17 +42,19 @@ # define __KERNEL_OPTIX__ # include "kernel/device/optix/globals.h" +# include <optix_denoiser_tiling.h> + CCL_NAMESPACE_BEGIN OptiXDevice::Denoiser::Denoiser(OptiXDevice *device) - : device(device), queue(device), state(device, "__denoiser_state") + : device(device), queue(device), state(device, "__denoiser_state", true) { } OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler) : CUDADevice(info, stats, profiler), sbt_data(this, "__sbt", MEM_READ_ONLY), - launch_params(this, "__params"), + launch_params(this, "__params", false), denoiser_(this) { /* Make the CUDA context current. */ @@ -208,11 +211,15 @@ bool OptiXDevice::load_kernels(const uint kernel_features) } else { module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3; - module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; + module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; } module_options.boundValues = nullptr; module_options.numBoundValues = 0; +# if OPTIX_ABI_VERSION >= 55 + module_options.payloadTypes = nullptr; + module_options.numPayloadTypes = 0; +# endif OptixPipelineCompileOptions pipeline_options = {}; /* Default to no motion blur and two-level graph, since it is the fastest option. */ @@ -227,11 +234,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; if (kernel_features & KERNEL_FEATURE_HAIR) { if (kernel_features & KERNEL_FEATURE_HAIR_THICK) { +# if OPTIX_ABI_VERSION >= 55 + pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM; +# else pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE; +# endif } else pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; } + if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { + pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; + } /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds * This is necessary since objects may be reported to have motion if the Vector pass is @@ -324,7 +338,13 @@ bool OptiXDevice::load_kernels(const uint kernel_features) if (kernel_features & KERNEL_FEATURE_HAIR_THICK) { /* Built-in thick curve intersection. */ OptixBuiltinISOptions builtin_options = {}; +# if OPTIX_ABI_VERSION >= 55 + builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM; + builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE; + builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */ +# else builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; +# endif builtin_options.usesMotionBlur = false; optix_assert(optixBuiltinISModuleGet( @@ -356,6 +376,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features) } } + /* Pointclouds */ + if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { + group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD]; + group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; + group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module; + group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point"; + group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS]; + group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; + group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module; + group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point"; + } + if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) { /* Add hit group for local intersections. */ group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; @@ -403,6 +435,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features) stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH); trace_css = std::max(trace_css, stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH); + trace_css = std::max( + trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH); + trace_css = std::max( + trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH); OptixPipelineLinkOptions link_options = {}; link_options.maxTraceDepth = 1; @@ -411,7 +447,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; } else { - link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO; + link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; } if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) { @@ -428,6 +464,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_groups.push_back(groups[PG_HITD_MOTION]); pipeline_groups.push_back(groups[PG_HITS_MOTION]); } + if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { + pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); + pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); + } pipeline_groups.push_back(groups[PG_CALL_SVM_AO]); pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]); @@ -467,6 +507,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipeline_groups.push_back(groups[PG_HITD_MOTION]); pipeline_groups.push_back(groups[PG_HITS_MOTION]); } + if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { + pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); + pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); + } optix_assert(optixPipelineCreate(context, &pipeline_options, @@ -507,7 +551,7 @@ class OptiXDevice::DenoiseContext { : denoise_params(task.params), render_buffers(task.render_buffers), buffer_params(task.buffer_params), - guiding_buffer(device, "denoiser guiding passes buffer"), + guiding_buffer(device, "denoiser guiding passes buffer", true), num_samples(task.num_samples) { num_input_passes = 1; @@ -522,9 +566,9 @@ class OptiXDevice::DenoiseContext { } } - const int num_guiding_passes = num_input_passes - 1; + use_guiding_passes = (num_input_passes - 1) > 0; - if (num_guiding_passes) { + if (use_guiding_passes) { if (task.allow_inplace_modification) { guiding_params.device_pointer = render_buffers->buffer.device_pointer; @@ -577,6 +621,7 @@ class OptiXDevice::DenoiseContext { /* Number of input passes. Including the color and extra auxiliary passes. */ int num_input_passes = 0; + bool use_guiding_passes = false; bool use_pass_albedo = false; bool use_pass_normal = false; @@ -653,22 +698,22 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context) const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer), - const_cast<int *>(&context.guiding_params.pass_stride), - const_cast<int *>(&context.guiding_params.pass_albedo), - const_cast<int *>(&context.guiding_params.pass_normal), - &context.render_buffers->buffer.device_pointer, - const_cast<int *>(&buffer_params.offset), - const_cast<int *>(&buffer_params.stride), - const_cast<int *>(&buffer_params.pass_stride), - const_cast<int *>(&context.pass_sample_count), - const_cast<int *>(&context.pass_denoising_albedo), - const_cast<int *>(&context.pass_denoising_normal), - const_cast<int *>(&buffer_params.full_x), - const_cast<int *>(&buffer_params.full_y), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height), - const_cast<int *>(&context.num_samples)}; + DeviceKernelArguments args(&context.guiding_params.device_pointer, + &context.guiding_params.pass_stride, + &context.guiding_params.pass_albedo, + &context.guiding_params.pass_normal, + &context.render_buffers->buffer.device_pointer, + &buffer_params.offset, + &buffer_params.stride, + &buffer_params.pass_stride, + &context.pass_sample_count, + &context.pass_denoising_albedo, + &context.pass_denoising_normal, + &buffer_params.full_x, + &buffer_params.full_y, + &buffer_params.width, + &buffer_params.height, + &context.num_samples); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args); } @@ -679,11 +724,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer), - const_cast<int *>(&context.guiding_params.pass_stride), - const_cast<int *>(&context.guiding_params.pass_albedo), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height)}; + DeviceKernelArguments args(&context.guiding_params.device_pointer, + &context.guiding_params.pass_stride, + &context.guiding_params.pass_albedo, + &buffer_params.width, + &buffer_params.height); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args); } @@ -708,7 +753,7 @@ void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type) return; } } - else if (!context.albedo_replaced_with_fake) { + else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) { context.albedo_replaced_with_fake = true; if (!denoise_filter_guiding_set_fake_albedo(context)) { LOG(ERROR) << "Error replacing real albedo with the fake one."; @@ -779,15 +824,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {&context.render_buffers->buffer.device_pointer, - const_cast<int *>(&buffer_params.full_x), - const_cast<int *>(&buffer_params.full_y), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height), - const_cast<int *>(&buffer_params.offset), - const_cast<int *>(&buffer_params.stride), - const_cast<int *>(&buffer_params.pass_stride), - const_cast<int *>(&pass.denoised_offset)}; + DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer, + &buffer_params.full_x, + &buffer_params.full_y, + &buffer_params.width, + &buffer_params.height, + &buffer_params.offset, + &buffer_params.stride, + &buffer_params.pass_stride, + &pass.denoised_offset); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args); } @@ -799,20 +844,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context, const int work_size = buffer_params.width * buffer_params.height; - void *args[] = {&context.render_buffers->buffer.device_pointer, - const_cast<int *>(&buffer_params.full_x), - const_cast<int *>(&buffer_params.full_y), - const_cast<int *>(&buffer_params.width), - const_cast<int *>(&buffer_params.height), - const_cast<int *>(&buffer_params.offset), - const_cast<int *>(&buffer_params.stride), - const_cast<int *>(&buffer_params.pass_stride), - const_cast<int *>(&context.num_samples), - const_cast<int *>(&pass.noisy_offset), - const_cast<int *>(&pass.denoised_offset), - const_cast<int *>(&context.pass_sample_count), - const_cast<int *>(&pass.num_components), - const_cast<bool *>(&pass.use_compositing)}; + DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer, + &buffer_params.full_x, + &buffer_params.full_y, + &buffer_params.width, + &buffer_params.height, + &buffer_params.offset, + &buffer_params.stride, + &buffer_params.pass_stride, + &context.num_samples, + &pass.noisy_offset, + &pass.denoised_offset, + &context.pass_sample_count, + &pass.num_components, + &pass.use_compositing); return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args); } @@ -870,35 +915,33 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context) bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context) { - if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width && - denoiser_.configured_size.y == context.buffer_params.height)) { + /* Limit maximum tile size denoiser can be invoked with. */ + const int2 tile_size = make_int2(min(context.buffer_params.width, 4096), + min(context.buffer_params.height, 4096)); + + if (denoiser_.is_configured && + (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) { return true; } - const BufferParams &buffer_params = context.buffer_params; - - OptixDenoiserSizes sizes = {}; optix_assert(optixDenoiserComputeMemoryResources( - denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes)); - - /* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */ - denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes; - denoiser_.scratch_offset = sizes.stateSizeInBytes; + denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes)); /* Allocate denoiser state if tile size has changed since last setup. */ - denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size); + denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes + + denoiser_.sizes.withOverlapScratchSizeInBytes); /* Initialize denoiser state for the current tile size. */ const OptixResult result = optixDenoiserSetup( denoiser_.optix_denoiser, 0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called on a stream that is not the default stream */ - buffer_params.width, - buffer_params.height, + tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2, + tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2, denoiser_.state.device_pointer, - denoiser_.scratch_offset, - denoiser_.state.device_pointer + denoiser_.scratch_offset, - denoiser_.scratch_size); + denoiser_.sizes.stateSizeInBytes, + denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes, + denoiser_.sizes.withOverlapScratchSizeInBytes); if (result != OPTIX_SUCCESS) { set_error("Failed to set up OptiX denoiser"); return false; @@ -907,8 +950,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context) cuda_assert(cuCtxSynchronize()); denoiser_.is_configured = true; - denoiser_.configured_size.x = buffer_params.width; - denoiser_.configured_size.y = buffer_params.height; + denoiser_.configured_size = tile_size; return true; } @@ -979,18 +1021,20 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass) guide_layers.albedo = albedo_layer; guide_layers.normal = normal_layer; - optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser, - denoiser_.queue.stream(), - ¶ms, - denoiser_.state.device_pointer, - denoiser_.scratch_offset, - &guide_layers, - &image_layers, - 1, - 0, - 0, - denoiser_.state.device_pointer + denoiser_.scratch_offset, - denoiser_.scratch_size)); + optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser, + denoiser_.queue.stream(), + ¶ms, + denoiser_.state.device_pointer, + denoiser_.sizes.stateSizeInBytes, + &guide_layers, + &image_layers, + 1, + denoiser_.state.device_pointer + + denoiser_.sizes.stateSizeInBytes, + denoiser_.sizes.withOverlapScratchSizeInBytes, + denoiser_.sizes.overlapWindowSizeInPixels, + denoiser_.configured_size.x, + denoiser_.configured_size.y)); return true; } @@ -1000,6 +1044,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, const OptixBuildInput &build_input, uint16_t num_motion_steps) { + /* Allocate and build acceleration structures only one at a time, to prevent parallel builds + * from running out of memory (since both original and compacted acceleration structure memory + * may be allocated at the same time for the duration of this function). The builds would + * otherwise happen on the same CUDA stream anyway. */ + static thread_mutex mutex; + thread_scoped_lock lock(mutex); + const CUDAContextScope scope(this); const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC); @@ -1025,14 +1076,15 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes)); /* Allocate required output buffers. */ - device_only_memory<char> temp_mem(this, "optix temp as build mem"); + device_only_memory<char> temp_mem(this, "optix temp as build mem", true); temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8); if (!temp_mem.device_pointer) { /* Make sure temporary memory allocation succeeded. */ return false; } - device_only_memory<char> &out_data = bvh->as_data; + /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */ + device_only_memory<char> &out_data = *bvh->as_data; if (operation == OPTIX_BUILD_OPERATION_BUILD) { assert(out_data.device == this); out_data.alloc_to_device(sizes.outputSizeInBytes); @@ -1080,12 +1132,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, /* There is no point compacting if the size does not change. */ if (compacted_size < sizes.outputSizeInBytes) { - device_only_memory<char> compacted_data(this, "optix compacted as"); + device_only_memory<char> compacted_data(this, "optix compacted as", false); compacted_data.alloc_to_device(compacted_size); - if (!compacted_data.device_pointer) + if (!compacted_data.device_pointer) { /* Do not compact if memory allocation for compacted acceleration structure fails. * Can just use the uncompacted one then, so succeed here regardless. */ return !have_error(); + } optix_assert(optixAccelCompact( context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle)); @@ -1096,6 +1149,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh, std::swap(out_data.device_size, compacted_data.device_size); std::swap(out_data.device_pointer, compacted_data.device_pointer); + /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope. + */ } } @@ -1123,7 +1178,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) operation = OPTIX_BUILD_OPERATION_UPDATE; } else { - bvh_optix->as_data.free(); + bvh_optix->as_data->free(); bvh_optix->traversable_handle = 0; } @@ -1178,20 +1233,27 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) int ka = max(k0 - 1, curve.first_key); int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1); + index_data[i] = i * 4; + float4 *const v = vertex_data.data() + step * num_vertices + index_data[i]; + +# if OPTIX_ABI_VERSION >= 55 + v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]); + v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]); + v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]); + v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]); +# else const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x); const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y); const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z); const float4 pw = make_float4( curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]); - /* Convert Catmull-Rom data to Bezier spline. */ + /* Convert Catmull-Rom data to B-spline. */ static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f; static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f; static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f; static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f; - index_data[i] = i * 4; - float4 *const v = vertex_data.data() + step * num_vertices + index_data[i]; v[0] = make_float4( dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw)); v[1] = make_float4( @@ -1200,6 +1262,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw)); v[3] = make_float4( dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw)); +# endif } else { BoundBox bounds = BoundBox::empty; @@ -1241,7 +1304,11 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) OptixBuildInput build_input = {}; if (hair->curve_shape == CURVE_THICK) { build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES; +# if OPTIX_ABI_VERSION >= 55 + build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM; +# else build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE; +# endif build_input.curveArray.numPrimitives = num_segments; build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data(); build_input.curveArray.numVertices = num_vertices; @@ -1255,7 +1322,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) } else { /* Disable visibility test any-hit program, since it is already checked during - * intersection. Those trace calls that require anyhit can force it with a ray flag. */ + * intersection. Those trace calls that require any-hit can force it with a ray flag. */ build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT; build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; @@ -1339,14 +1406,94 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) progress.set_error("Failed to build OptiX acceleration structure"); } } + else if (geom->geometry_type == Geometry::POINTCLOUD) { + /* Build BLAS for points primitives. */ + PointCloud *const pointcloud = static_cast<PointCloud *const>(geom); + const size_t num_points = pointcloud->num_points(); + if (num_points == 0) { + return; + } + + size_t num_motion_steps = 1; + Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) { + num_motion_steps = pointcloud->get_motion_steps(); + } + + device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY); + aabb_data.alloc(num_points * num_motion_steps); + + /* Get AABBs for each motion step. */ + for (size_t step = 0; step < num_motion_steps; ++step) { + /* The center step for motion vertices is not stored in the attribute. */ + const float3 *points = pointcloud->get_points().data(); + const float *radius = pointcloud->get_radius().data(); + size_t center_step = (num_motion_steps - 1) / 2; + if (step != center_step) { + size_t attr_offset = (step > center_step) ? step - 1 : step; + /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */ + points = motion_points->data_float3() + attr_offset * num_points; + } + + for (size_t i = 0; i < num_points; ++i) { + const PointCloud::Point point = pointcloud->get_point(i); + BoundBox bounds = BoundBox::empty; + point.bounds_grow(points, radius, bounds); + + const size_t index = step * num_points + i; + aabb_data[index].minX = bounds.min.x; + aabb_data[index].minY = bounds.min.y; + aabb_data[index].minZ = bounds.min.z; + aabb_data[index].maxX = bounds.max.x; + aabb_data[index].maxY = bounds.max.y; + aabb_data[index].maxZ = bounds.max.z; + } + } + + /* Upload AABB data to GPU. */ + aabb_data.copy_to_device(); + + vector<device_ptr> aabb_ptrs; + aabb_ptrs.reserve(num_motion_steps); + for (size_t step = 0; step < num_motion_steps; ++step) { + aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb)); + } + + /* Disable visibility test any-hit program, since it is already checked during + * intersection. Those trace calls that require anyhit can force it with a ray flag. + * For those, force a single any-hit call, so shadow record-all behavior works correctly. */ + unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT | + OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL; + OptixBuildInput build_input = {}; + build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES; +# if OPTIX_ABI_VERSION < 23 + build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); + build_input.aabbArray.numPrimitives = num_points; + build_input.aabbArray.strideInBytes = sizeof(OptixAabb); + build_input.aabbArray.flags = &build_flags; + build_input.aabbArray.numSbtRecords = 1; + build_input.aabbArray.primitiveIndexOffset = pointcloud->prim_offset; +# else + build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data(); + build_input.customPrimitiveArray.numPrimitives = num_points; + build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb); + build_input.customPrimitiveArray.flags = &build_flags; + build_input.customPrimitiveArray.numSbtRecords = 1; + build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset; +# endif + + if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) { + progress.set_error("Failed to build OptiX acceleration structure"); + } + } } else { unsigned int num_instances = 0; unsigned int max_num_instances = 0xFFFFFFFF; - bvh_optix->as_data.free(); + bvh_optix->as_data->free(); bvh_optix->traversable_handle = 0; - bvh_optix->motion_transform_data.free(); + bvh_optix->motion_transform_data->free(); optixDeviceContextGetProperty(context, OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID, @@ -1379,8 +1526,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) } } - assert(bvh_optix->motion_transform_data.device == this); - bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size); + assert(bvh_optix->motion_transform_data->device == this); + bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size); } for (Object *ob : bvh->objects) { @@ -1422,9 +1569,22 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) instance.sbtOffset = PG_HITD_MOTION - PG_HITD; } } - else { + else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) { + /* Use the hit group that has an intersection program for point clouds. */ + instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD; + + /* Also skip point clouds in local trace calls. */ + instance.visibilityMask |= 4; + } + +# if OPTIX_ABI_VERSION < 55 + /* Cannot disable any-hit program for thick curves, since it needs to filter out end-caps. */ + else +# endif + { /* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves, * since it needs to filter out end-caps there). + * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT. */ @@ -1441,7 +1601,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) motion_transform_offset = align_up(motion_transform_offset, OPTIX_TRANSFORM_BYTE_ALIGNMENT); - CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer + + CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer + motion_transform_offset; motion_transform_offset += motion_transform_size; @@ -1494,9 +1654,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size); delete[] reinterpret_cast<uint8_t *>(&motion_transform); - /* Disable instance transform if object uses motion transform already. */ - instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; - /* Get traversable handle to motion transform. */ optixConvertPointerToTraversableHandle(context, motion_transform_gpu, @@ -1510,10 +1667,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) /* Set transform matrix. */ memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform)); } - else { - /* Disable instance transform if geometry already has it applied to vertex data. */ - instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM; - } } } |