Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/optix/device_impl.cpp')
-rw-r--r--intern/cycles/device/optix/device_impl.cpp361
1 files changed, 257 insertions, 104 deletions
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index bb690551c04..38cc3330ebd 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -28,6 +28,7 @@
# include "scene/mesh.h"
# include "scene/object.h"
# include "scene/pass.h"
+# include "scene/pointcloud.h"
# include "scene/scene.h"
# include "util/debug.h"
@@ -41,17 +42,19 @@
# define __KERNEL_OPTIX__
# include "kernel/device/optix/globals.h"
+# include <optix_denoiser_tiling.h>
+
CCL_NAMESPACE_BEGIN
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
- : device(device), queue(device), state(device, "__denoiser_state")
+ : device(device), queue(device), state(device, "__denoiser_state", true)
{
}
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: CUDADevice(info, stats, profiler),
sbt_data(this, "__sbt", MEM_READ_ONLY),
- launch_params(this, "__params"),
+ launch_params(this, "__params", false),
denoiser_(this)
{
/* Make the CUDA context current. */
@@ -208,11 +211,15 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
else {
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
- module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+ module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
}
module_options.boundValues = nullptr;
module_options.numBoundValues = 0;
+# if OPTIX_ABI_VERSION >= 55
+ module_options.payloadTypes = nullptr;
+ module_options.numPayloadTypes = 0;
+# endif
OptixPipelineCompileOptions pipeline_options = {};
/* Default to no motion blur and two-level graph, since it is the fastest option. */
@@ -227,11 +234,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
if (kernel_features & KERNEL_FEATURE_HAIR) {
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
+# if OPTIX_ABI_VERSION >= 55
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
+# else
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
+# endif
}
else
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
}
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
+ }
/* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
* This is necessary since objects may be reported to have motion if the Vector pass is
@@ -324,7 +338,13 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
/* Built-in thick curve intersection. */
OptixBuiltinISOptions builtin_options = {};
+# if OPTIX_ABI_VERSION >= 55
+ builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
+ builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
+ builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
+# else
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+# endif
builtin_options.usesMotionBlur = false;
optix_assert(optixBuiltinISModuleGet(
@@ -356,6 +376,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
}
+ /* Pointclouds */
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
+ group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+ group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
+ group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
+ group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
+ group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+ group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
+ group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
+ }
+
if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
/* Add hit group for local intersections. */
group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
@@ -403,6 +435,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
trace_css = std::max(trace_css,
stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
+ trace_css = std::max(
+ trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
+ trace_css = std::max(
+ trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
OptixPipelineLinkOptions link_options = {};
link_options.maxTraceDepth = 1;
@@ -411,7 +447,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
}
else {
- link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+ link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
}
if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
@@ -428,6 +464,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
}
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
+ pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+ }
pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
@@ -467,6 +507,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
}
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
+ pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+ }
optix_assert(optixPipelineCreate(context,
&pipeline_options,
@@ -507,7 +551,7 @@ class OptiXDevice::DenoiseContext {
: denoise_params(task.params),
render_buffers(task.render_buffers),
buffer_params(task.buffer_params),
- guiding_buffer(device, "denoiser guiding passes buffer"),
+ guiding_buffer(device, "denoiser guiding passes buffer", true),
num_samples(task.num_samples)
{
num_input_passes = 1;
@@ -522,9 +566,9 @@ class OptiXDevice::DenoiseContext {
}
}
- const int num_guiding_passes = num_input_passes - 1;
+ use_guiding_passes = (num_input_passes - 1) > 0;
- if (num_guiding_passes) {
+ if (use_guiding_passes) {
if (task.allow_inplace_modification) {
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
@@ -577,6 +621,7 @@ class OptiXDevice::DenoiseContext {
/* Number of input passes. Including the color and extra auxiliary passes. */
int num_input_passes = 0;
+ bool use_guiding_passes = false;
bool use_pass_albedo = false;
bool use_pass_normal = false;
@@ -653,22 +698,22 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
- const_cast<int *>(&context.guiding_params.pass_stride),
- const_cast<int *>(&context.guiding_params.pass_albedo),
- const_cast<int *>(&context.guiding_params.pass_normal),
- &context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&context.pass_sample_count),
- const_cast<int *>(&context.pass_denoising_albedo),
- const_cast<int *>(&context.pass_denoising_normal),
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&context.num_samples)};
+ DeviceKernelArguments args(&context.guiding_params.device_pointer,
+ &context.guiding_params.pass_stride,
+ &context.guiding_params.pass_albedo,
+ &context.guiding_params.pass_normal,
+ &context.render_buffers->buffer.device_pointer,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &context.pass_sample_count,
+ &context.pass_denoising_albedo,
+ &context.pass_denoising_normal,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &context.num_samples);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
@@ -679,11 +724,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
- const_cast<int *>(&context.guiding_params.pass_stride),
- const_cast<int *>(&context.guiding_params.pass_albedo),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height)};
+ DeviceKernelArguments args(&context.guiding_params.device_pointer,
+ &context.guiding_params.pass_stride,
+ &context.guiding_params.pass_albedo,
+ &buffer_params.width,
+ &buffer_params.height);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
}
@@ -708,7 +753,7 @@ void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
return;
}
}
- else if (!context.albedo_replaced_with_fake) {
+ else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
context.albedo_replaced_with_fake = true;
if (!denoise_filter_guiding_set_fake_albedo(context)) {
LOG(ERROR) << "Error replacing real albedo with the fake one.";
@@ -779,15 +824,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {&context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&pass.denoised_offset)};
+ DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &pass.denoised_offset);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
}
@@ -799,20 +844,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {&context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&context.num_samples),
- const_cast<int *>(&pass.noisy_offset),
- const_cast<int *>(&pass.denoised_offset),
- const_cast<int *>(&context.pass_sample_count),
- const_cast<int *>(&pass.num_components),
- const_cast<bool *>(&pass.use_compositing)};
+ DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &context.num_samples,
+ &pass.noisy_offset,
+ &pass.denoised_offset,
+ &context.pass_sample_count,
+ &pass.num_components,
+ &pass.use_compositing);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
}
@@ -870,35 +915,33 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
{
- if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width &&
- denoiser_.configured_size.y == context.buffer_params.height)) {
+ /* Limit maximum tile size denoiser can be invoked with. */
+ const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
+ min(context.buffer_params.height, 4096));
+
+ if (denoiser_.is_configured &&
+ (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
return true;
}
- const BufferParams &buffer_params = context.buffer_params;
-
- OptixDenoiserSizes sizes = {};
optix_assert(optixDenoiserComputeMemoryResources(
- denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
-
- /* Denoiser is invoked on whole images only, so no overlap needed (would be used for tiling). */
- denoiser_.scratch_size = sizes.withoutOverlapScratchSizeInBytes;
- denoiser_.scratch_offset = sizes.stateSizeInBytes;
+ denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
/* Allocate denoiser state if tile size has changed since last setup. */
- denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
+ denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
+ denoiser_.sizes.withOverlapScratchSizeInBytes);
/* Initialize denoiser state for the current tile size. */
const OptixResult result = optixDenoiserSetup(
denoiser_.optix_denoiser,
0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
on a stream that is not the default stream */
- buffer_params.width,
- buffer_params.height,
+ tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+ tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
denoiser_.state.device_pointer,
- denoiser_.scratch_offset,
- denoiser_.state.device_pointer + denoiser_.scratch_offset,
- denoiser_.scratch_size);
+ denoiser_.sizes.stateSizeInBytes,
+ denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
+ denoiser_.sizes.withOverlapScratchSizeInBytes);
if (result != OPTIX_SUCCESS) {
set_error("Failed to set up OptiX denoiser");
return false;
@@ -907,8 +950,7 @@ bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
cuda_assert(cuCtxSynchronize());
denoiser_.is_configured = true;
- denoiser_.configured_size.x = buffer_params.width;
- denoiser_.configured_size.y = buffer_params.height;
+ denoiser_.configured_size = tile_size;
return true;
}
@@ -979,18 +1021,20 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
guide_layers.albedo = albedo_layer;
guide_layers.normal = normal_layer;
- optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser,
- denoiser_.queue.stream(),
- &params,
- denoiser_.state.device_pointer,
- denoiser_.scratch_offset,
- &guide_layers,
- &image_layers,
- 1,
- 0,
- 0,
- denoiser_.state.device_pointer + denoiser_.scratch_offset,
- denoiser_.scratch_size));
+ optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
+ denoiser_.queue.stream(),
+ &params,
+ denoiser_.state.device_pointer,
+ denoiser_.sizes.stateSizeInBytes,
+ &guide_layers,
+ &image_layers,
+ 1,
+ denoiser_.state.device_pointer +
+ denoiser_.sizes.stateSizeInBytes,
+ denoiser_.sizes.withOverlapScratchSizeInBytes,
+ denoiser_.sizes.overlapWindowSizeInPixels,
+ denoiser_.configured_size.x,
+ denoiser_.configured_size.y));
return true;
}
@@ -1000,6 +1044,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
const OptixBuildInput &build_input,
uint16_t num_motion_steps)
{
+ /* Allocate and build acceleration structures only one at a time, to prevent parallel builds
+ * from running out of memory (since both original and compacted acceleration structure memory
+ * may be allocated at the same time for the duration of this function). The builds would
+ * otherwise happen on the same CUDA stream anyway. */
+ static thread_mutex mutex;
+ thread_scoped_lock lock(mutex);
+
const CUDAContextScope scope(this);
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
@@ -1025,14 +1076,15 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
/* Allocate required output buffers. */
- device_only_memory<char> temp_mem(this, "optix temp as build mem");
+ device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
if (!temp_mem.device_pointer) {
/* Make sure temporary memory allocation succeeded. */
return false;
}
- device_only_memory<char> &out_data = bvh->as_data;
+ /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
+ device_only_memory<char> &out_data = *bvh->as_data;
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
assert(out_data.device == this);
out_data.alloc_to_device(sizes.outputSizeInBytes);
@@ -1080,12 +1132,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
/* There is no point compacting if the size does not change. */
if (compacted_size < sizes.outputSizeInBytes) {
- device_only_memory<char> compacted_data(this, "optix compacted as");
+ device_only_memory<char> compacted_data(this, "optix compacted as", false);
compacted_data.alloc_to_device(compacted_size);
- if (!compacted_data.device_pointer)
+ if (!compacted_data.device_pointer) {
/* Do not compact if memory allocation for compacted acceleration structure fails.
* Can just use the uncompacted one then, so succeed here regardless. */
return !have_error();
+ }
optix_assert(optixAccelCompact(
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
@@ -1096,6 +1149,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
std::swap(out_data.device_size, compacted_data.device_size);
std::swap(out_data.device_pointer, compacted_data.device_pointer);
+ /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
+ */
}
}
@@ -1123,7 +1178,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
operation = OPTIX_BUILD_OPERATION_UPDATE;
}
else {
- bvh_optix->as_data.free();
+ bvh_optix->as_data->free();
bvh_optix->traversable_handle = 0;
}
@@ -1178,20 +1233,27 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
int ka = max(k0 - 1, curve.first_key);
int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
+ index_data[i] = i * 4;
+ float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
+
+# if OPTIX_ABI_VERSION >= 55
+ v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]);
+ v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]);
+ v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]);
+ v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]);
+# else
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
const float4 pw = make_float4(
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
- /* Convert Catmull-Rom data to Bezier spline. */
+ /* Convert Catmull-Rom data to B-spline. */
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
- index_data[i] = i * 4;
- float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
v[0] = make_float4(
dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
v[1] = make_float4(
@@ -1200,6 +1262,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
v[3] = make_float4(
dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
+# endif
}
else {
BoundBox bounds = BoundBox::empty;
@@ -1241,7 +1304,11 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
OptixBuildInput build_input = {};
if (hair->curve_shape == CURVE_THICK) {
build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
+# if OPTIX_ABI_VERSION >= 55
+ build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
+# else
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+# endif
build_input.curveArray.numPrimitives = num_segments;
build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
build_input.curveArray.numVertices = num_vertices;
@@ -1255,7 +1322,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
else {
/* Disable visibility test any-hit program, since it is already checked during
- * intersection. Those trace calls that require anyhit can force it with a ray flag. */
+ * intersection. Those trace calls that require any-hit can force it with a ray flag. */
build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
@@ -1339,14 +1406,94 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
progress.set_error("Failed to build OptiX acceleration structure");
}
}
+ else if (geom->geometry_type == Geometry::POINTCLOUD) {
+ /* Build BLAS for points primitives. */
+ PointCloud *const pointcloud = static_cast<PointCloud *const>(geom);
+ const size_t num_points = pointcloud->num_points();
+ if (num_points == 0) {
+ return;
+ }
+
+ size_t num_motion_steps = 1;
+ Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) {
+ num_motion_steps = pointcloud->get_motion_steps();
+ }
+
+ device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
+ aabb_data.alloc(num_points * num_motion_steps);
+
+ /* Get AABBs for each motion step. */
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ /* The center step for motion vertices is not stored in the attribute. */
+ const float3 *points = pointcloud->get_points().data();
+ const float *radius = pointcloud->get_radius().data();
+ size_t center_step = (num_motion_steps - 1) / 2;
+ if (step != center_step) {
+ size_t attr_offset = (step > center_step) ? step - 1 : step;
+ /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
+ points = motion_points->data_float3() + attr_offset * num_points;
+ }
+
+ for (size_t i = 0; i < num_points; ++i) {
+ const PointCloud::Point point = pointcloud->get_point(i);
+ BoundBox bounds = BoundBox::empty;
+ point.bounds_grow(points, radius, bounds);
+
+ const size_t index = step * num_points + i;
+ aabb_data[index].minX = bounds.min.x;
+ aabb_data[index].minY = bounds.min.y;
+ aabb_data[index].minZ = bounds.min.z;
+ aabb_data[index].maxX = bounds.max.x;
+ aabb_data[index].maxY = bounds.max.y;
+ aabb_data[index].maxZ = bounds.max.z;
+ }
+ }
+
+ /* Upload AABB data to GPU. */
+ aabb_data.copy_to_device();
+
+ vector<device_ptr> aabb_ptrs;
+ aabb_ptrs.reserve(num_motion_steps);
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb));
+ }
+
+ /* Disable visibility test any-hit program, since it is already checked during
+ * intersection. Those trace calls that require anyhit can force it with a ray flag.
+ * For those, force a single any-hit call, so shadow record-all behavior works correctly. */
+ unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
+ OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
+ OptixBuildInput build_input = {};
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
+# if OPTIX_ABI_VERSION < 23
+ build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.aabbArray.numPrimitives = num_points;
+ build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
+ build_input.aabbArray.flags = &build_flags;
+ build_input.aabbArray.numSbtRecords = 1;
+ build_input.aabbArray.primitiveIndexOffset = pointcloud->prim_offset;
+# else
+ build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.customPrimitiveArray.numPrimitives = num_points;
+ build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
+ build_input.customPrimitiveArray.flags = &build_flags;
+ build_input.customPrimitiveArray.numSbtRecords = 1;
+ build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset;
+# endif
+
+ if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
+ progress.set_error("Failed to build OptiX acceleration structure");
+ }
+ }
}
else {
unsigned int num_instances = 0;
unsigned int max_num_instances = 0xFFFFFFFF;
- bvh_optix->as_data.free();
+ bvh_optix->as_data->free();
bvh_optix->traversable_handle = 0;
- bvh_optix->motion_transform_data.free();
+ bvh_optix->motion_transform_data->free();
optixDeviceContextGetProperty(context,
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
@@ -1379,8 +1526,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
- assert(bvh_optix->motion_transform_data.device == this);
- bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size);
+ assert(bvh_optix->motion_transform_data->device == this);
+ bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
}
for (Object *ob : bvh->objects) {
@@ -1422,9 +1569,22 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
}
- else {
+ else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) {
+ /* Use the hit group that has an intersection program for point clouds. */
+ instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
+
+ /* Also skip point clouds in local trace calls. */
+ instance.visibilityMask |= 4;
+ }
+
+# if OPTIX_ABI_VERSION < 55
+ /* Cannot disable any-hit program for thick curves, since it needs to filter out end-caps. */
+ else
+# endif
+ {
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
* since it needs to filter out end-caps there).
+
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
*/
@@ -1441,7 +1601,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
motion_transform_offset = align_up(motion_transform_offset,
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
- CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer +
+ CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
motion_transform_offset;
motion_transform_offset += motion_transform_size;
@@ -1494,9 +1654,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
- /* Disable instance transform if object uses motion transform already. */
- instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
-
/* Get traversable handle to motion transform. */
optixConvertPointerToTraversableHandle(context,
motion_transform_gpu,
@@ -1510,10 +1667,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
/* Set transform matrix. */
memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
}
- else {
- /* Disable instance transform if geometry already has it applied to vertex data. */
- instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
- }
}
}