Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/optix/device_impl.cpp')
-rw-r--r--intern/cycles/device/optix/device_impl.cpp526
1 files changed, 374 insertions, 152 deletions
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 5f5eff53063..cb6c36d5ea6 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -20,45 +20,41 @@
# include "device/optix/device_impl.h"
# include "bvh/bvh.h"
-# include "bvh/bvh_optix.h"
+# include "bvh/optix.h"
+
# include "integrator/pass_accessor_gpu.h"
-# include "render/buffers.h"
-# include "render/hair.h"
-# include "render/mesh.h"
-# include "render/object.h"
-# include "render/pass.h"
-# include "render/scene.h"
-
-# include "util/util_debug.h"
-# include "util/util_logging.h"
-# include "util/util_md5.h"
-# include "util/util_path.h"
-# include "util/util_progress.h"
-# include "util/util_time.h"
+
+# include "scene/hair.h"
+# include "scene/mesh.h"
+# include "scene/object.h"
+# include "scene/pass.h"
+# include "scene/pointcloud.h"
+# include "scene/scene.h"
+
+# include "util/debug.h"
+# include "util/log.h"
+# include "util/md5.h"
+# include "util/path.h"
+# include "util/progress.h"
+# include "util/time.h"
# undef __KERNEL_CPU__
# define __KERNEL_OPTIX__
# include "kernel/device/optix/globals.h"
+# include <optix_denoiser_tiling.h>
+
CCL_NAMESPACE_BEGIN
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
- : device(device), queue(device), state(device, "__denoiser_state")
-{
-}
-
-OptiXDevice::Denoiser::~Denoiser()
+ : device(device), queue(device), state(device, "__denoiser_state", true)
{
- const CUDAContextScope scope(device);
- if (optix_denoiser != nullptr) {
- optixDenoiserDestroy(optix_denoiser);
- }
}
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: CUDADevice(info, stats, profiler),
sbt_data(this, "__sbt", MEM_READ_ONLY),
- launch_params(this, "__params"),
+ launch_params(this, "__params", false),
denoiser_(this)
{
/* Make the CUDA context current. */
@@ -90,6 +86,7 @@ OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
};
# endif
if (DebugFlags().optix.use_debug) {
+ VLOG(1) << "Using OptiX debug mode.";
options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
}
optix_assert(optixDeviceContextCreate(cuContext, &options, &context));
@@ -131,6 +128,11 @@ OptiXDevice::~OptiXDevice()
}
}
+ /* Make sure denoiser is destroyed before device context! */
+ if (denoiser_.optix_denoiser != nullptr) {
+ optixDenoiserDestroy(denoiser_.optix_denoiser);
+ }
+
optixDeviceContextDestroy(context);
}
@@ -209,18 +211,22 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
else {
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
- module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+ module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
}
module_options.boundValues = nullptr;
module_options.numBoundValues = 0;
+# if OPTIX_ABI_VERSION >= 55
+ module_options.payloadTypes = nullptr;
+ module_options.numPayloadTypes = 0;
+# endif
OptixPipelineCompileOptions pipeline_options = {};
/* Default to no motion blur and two-level graph, since it is the fastest option. */
pipeline_options.usesMotionBlur = false;
pipeline_options.traversableGraphFlags =
OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
- pipeline_options.numPayloadValues = 6;
+ pipeline_options.numPayloadValues = 8;
pipeline_options.numAttributeValues = 2; /* u, v */
pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
pipeline_options.pipelineLaunchParamsVariableName = "__params"; /* See globals.h */
@@ -228,11 +234,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
if (kernel_features & KERNEL_FEATURE_HAIR) {
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
+# if OPTIX_ABI_VERSION >= 55
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
+# else
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
+# endif
}
else
pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
}
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
+ }
/* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
* This is necessary since objects may be reported to have motion if the Vector pass is
@@ -325,7 +338,13 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
/* Built-in thick curve intersection. */
OptixBuiltinISOptions builtin_options = {};
+# if OPTIX_ABI_VERSION >= 55
+ builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
+ builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
+ builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
+# else
builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+# endif
builtin_options.usesMotionBlur = false;
optix_assert(optixBuiltinISModuleGet(
@@ -357,6 +376,18 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
}
+ /* Pointclouds */
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
+ group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+ group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
+ group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
+ group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
+ group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
+ group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
+ group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
+ }
+
if (kernel_features & (KERNEL_FEATURE_SUBSURFACE | KERNEL_FEATURE_NODE_RAYTRACE)) {
/* Add hit group for local intersections. */
group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
@@ -377,9 +408,6 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
"__direct_callable__svm_node_bevel";
- group_descs[PG_CALL_AO_PASS].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
- group_descs[PG_CALL_AO_PASS].callables.moduleDC = optix_module;
- group_descs[PG_CALL_AO_PASS].callables.entryFunctionNameDC = "__direct_callable__ao_pass";
}
optix_assert(optixProgramGroupCreate(
@@ -407,6 +435,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
trace_css = std::max(trace_css,
stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
+ trace_css = std::max(
+ trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
+ trace_css = std::max(
+ trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
OptixPipelineLinkOptions link_options = {};
link_options.maxTraceDepth = 1;
@@ -415,7 +447,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
}
else {
- link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
+ link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
}
if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
@@ -432,6 +464,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
}
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
+ pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+ }
pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
@@ -471,6 +507,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipeline_groups.push_back(groups[PG_HITD_MOTION]);
pipeline_groups.push_back(groups[PG_HITS_MOTION]);
}
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
+ pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+ }
optix_assert(optixPipelineCreate(context,
&pipeline_options,
@@ -511,7 +551,7 @@ class OptiXDevice::DenoiseContext {
: denoise_params(task.params),
render_buffers(task.render_buffers),
buffer_params(task.buffer_params),
- guiding_buffer(device, "denoiser guiding passes buffer"),
+ guiding_buffer(device, "denoiser guiding passes buffer", true),
num_samples(task.num_samples)
{
num_input_passes = 1;
@@ -526,14 +566,28 @@ class OptiXDevice::DenoiseContext {
}
}
- const int num_guiding_passes = num_input_passes - 1;
+ if (denoise_params.temporally_stable) {
+ prev_output.device_pointer = render_buffers->buffer.device_pointer;
+
+ prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
+
+ prev_output.stride = buffer_params.stride;
+ prev_output.pass_stride = buffer_params.pass_stride;
+
+ num_input_passes += 1;
+ use_pass_flow = true;
+ pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
+ }
+
+ use_guiding_passes = (num_input_passes - 1) > 0;
- if (num_guiding_passes) {
+ if (use_guiding_passes) {
if (task.allow_inplace_modification) {
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
guiding_params.pass_albedo = pass_denoising_albedo;
guiding_params.pass_normal = pass_denoising_normal;
+ guiding_params.pass_flow = pass_motion;
guiding_params.stride = buffer_params.stride;
guiding_params.pass_stride = buffer_params.pass_stride;
@@ -548,6 +602,10 @@ class OptiXDevice::DenoiseContext {
guiding_params.pass_normal = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
+ if (use_pass_flow) {
+ guiding_params.pass_flow = guiding_params.pass_stride;
+ guiding_params.pass_stride += 2;
+ }
guiding_params.stride = buffer_params.width;
@@ -565,6 +623,16 @@ class OptiXDevice::DenoiseContext {
RenderBuffers *render_buffers = nullptr;
const BufferParams &buffer_params;
+ /* Previous output. */
+ struct {
+ device_ptr device_pointer = 0;
+
+ int offset = PASS_UNUSED;
+
+ int stride = -1;
+ int pass_stride = -1;
+ } prev_output;
+
/* Device-side storage of the guiding passes. */
device_only_memory<float> guiding_buffer;
@@ -574,6 +642,7 @@ class OptiXDevice::DenoiseContext {
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_albedo = PASS_UNUSED;
int pass_normal = PASS_UNUSED;
+ int pass_flow = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
@@ -581,8 +650,10 @@ class OptiXDevice::DenoiseContext {
/* Number of input passes. Including the color and extra auxiliary passes. */
int num_input_passes = 0;
+ bool use_guiding_passes = false;
bool use_pass_albedo = false;
bool use_pass_normal = false;
+ bool use_pass_flow = false;
int num_samples = 0;
@@ -591,6 +662,7 @@ class OptiXDevice::DenoiseContext {
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_denoising_albedo = PASS_UNUSED;
int pass_denoising_normal = PASS_UNUSED;
+ int pass_motion = PASS_UNUSED;
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
@@ -657,22 +729,24 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
- const_cast<int *>(&context.guiding_params.pass_stride),
- const_cast<int *>(&context.guiding_params.pass_albedo),
- const_cast<int *>(&context.guiding_params.pass_normal),
- &context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&context.pass_sample_count),
- const_cast<int *>(&context.pass_denoising_albedo),
- const_cast<int *>(&context.pass_denoising_normal),
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&context.num_samples)};
+ DeviceKernelArguments args(&context.guiding_params.device_pointer,
+ &context.guiding_params.pass_stride,
+ &context.guiding_params.pass_albedo,
+ &context.guiding_params.pass_normal,
+ &context.guiding_params.pass_flow,
+ &context.render_buffers->buffer.device_pointer,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &context.pass_sample_count,
+ &context.pass_denoising_albedo,
+ &context.pass_denoising_normal,
+ &context.pass_motion,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &context.num_samples);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
@@ -683,11 +757,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
- const_cast<int *>(&context.guiding_params.pass_stride),
- const_cast<int *>(&context.guiding_params.pass_albedo),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height)};
+ DeviceKernelArguments args(&context.guiding_params.device_pointer,
+ &context.guiding_params.pass_stride,
+ &context.guiding_params.pass_albedo,
+ &buffer_params.width,
+ &buffer_params.height);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
}
@@ -712,7 +786,7 @@ void OptiXDevice::denoise_pass(DenoiseContext &context, PassType pass_type)
return;
}
}
- else if (!context.albedo_replaced_with_fake) {
+ else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
context.albedo_replaced_with_fake = true;
if (!denoise_filter_guiding_set_fake_albedo(context)) {
LOG(ERROR) << "Error replacing real albedo with the fake one.";
@@ -768,7 +842,13 @@ void OptiXDevice::denoise_color_read(DenoiseContext &context, const DenoisePass
destination.num_components = 3;
destination.pixel_stride = context.buffer_params.pass_stride;
- pass_accessor.get_render_tile_pixels(context.render_buffers, context.buffer_params, destination);
+ BufferParams buffer_params = context.buffer_params;
+ buffer_params.window_x = 0;
+ buffer_params.window_y = 0;
+ buffer_params.window_width = buffer_params.width;
+ buffer_params.window_height = buffer_params.height;
+
+ pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
}
bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const DenoisePass &pass)
@@ -777,15 +857,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {&context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&pass.denoised_offset)};
+ DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &pass.denoised_offset);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
}
@@ -797,20 +877,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {&context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&context.num_samples),
- const_cast<int *>(&pass.noisy_offset),
- const_cast<int *>(&pass.denoised_offset),
- const_cast<int *>(&context.pass_sample_count),
- const_cast<int *>(&pass.num_components),
- const_cast<bool *>(&pass.use_compositing)};
+ DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &context.num_samples,
+ &pass.noisy_offset,
+ &pass.denoised_offset,
+ &context.pass_sample_count,
+ &pass.num_components,
+ &pass.use_compositing);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
}
@@ -834,7 +914,8 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
{
const bool recreate_denoiser = (denoiser_.optix_denoiser == nullptr) ||
(denoiser_.use_pass_albedo != context.use_pass_albedo) ||
- (denoiser_.use_pass_normal != context.use_pass_normal);
+ (denoiser_.use_pass_normal != context.use_pass_normal) ||
+ (denoiser_.use_pass_flow != context.use_pass_flow);
if (!recreate_denoiser) {
return true;
}
@@ -848,8 +929,14 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
OptixDenoiserOptions denoiser_options = {};
denoiser_options.guideAlbedo = context.use_pass_albedo;
denoiser_options.guideNormal = context.use_pass_normal;
+
+ OptixDenoiserModelKind model = OPTIX_DENOISER_MODEL_KIND_HDR;
+ if (context.use_pass_flow) {
+ model = OPTIX_DENOISER_MODEL_KIND_TEMPORAL;
+ }
+
const OptixResult result = optixDenoiserCreate(
- this->context, OPTIX_DENOISER_MODEL_KIND_HDR, &denoiser_options, &denoiser_.optix_denoiser);
+ this->context, model, &denoiser_options, &denoiser_.optix_denoiser);
if (result != OPTIX_SUCCESS) {
set_error("Failed to create OptiX denoiser");
@@ -859,6 +946,7 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
/* OptiX denoiser handle was created with the requested number of input passes. */
denoiser_.use_pass_albedo = context.use_pass_albedo;
denoiser_.use_pass_normal = context.use_pass_normal;
+ denoiser_.use_pass_flow = context.use_pass_flow;
/* OptiX denoiser has been created, but it needs configuration. */
denoiser_.is_configured = false;
@@ -868,41 +956,42 @@ bool OptiXDevice::denoise_create_if_needed(DenoiseContext &context)
bool OptiXDevice::denoise_configure_if_needed(DenoiseContext &context)
{
- if (denoiser_.is_configured && (denoiser_.configured_size.x == context.buffer_params.width &&
- denoiser_.configured_size.y == context.buffer_params.height)) {
+ /* Limit maximum tile size denoiser can be invoked with. */
+ const int2 tile_size = make_int2(min(context.buffer_params.width, 4096),
+ min(context.buffer_params.height, 4096));
+
+ if (denoiser_.is_configured &&
+ (denoiser_.configured_size.x == tile_size.x && denoiser_.configured_size.y == tile_size.y)) {
return true;
}
- const BufferParams &buffer_params = context.buffer_params;
-
- OptixDenoiserSizes sizes = {};
optix_assert(optixDenoiserComputeMemoryResources(
- denoiser_.optix_denoiser, buffer_params.width, buffer_params.height, &sizes));
-
- denoiser_.scratch_size = sizes.withOverlapScratchSizeInBytes;
- denoiser_.scratch_offset = sizes.stateSizeInBytes;
+ denoiser_.optix_denoiser, tile_size.x, tile_size.y, &denoiser_.sizes));
/* Allocate denoiser state if tile size has changed since last setup. */
- denoiser_.state.alloc_to_device(denoiser_.scratch_offset + denoiser_.scratch_size);
+ denoiser_.state.alloc_to_device(denoiser_.sizes.stateSizeInBytes +
+ denoiser_.sizes.withOverlapScratchSizeInBytes);
/* Initialize denoiser state for the current tile size. */
- const OptixResult result = optixDenoiserSetup(denoiser_.optix_denoiser,
- denoiser_.queue.stream(),
- buffer_params.width,
- buffer_params.height,
- denoiser_.state.device_pointer,
- denoiser_.scratch_offset,
- denoiser_.state.device_pointer +
- denoiser_.scratch_offset,
- denoiser_.scratch_size);
+ const OptixResult result = optixDenoiserSetup(
+ denoiser_.optix_denoiser,
+ 0, /* Work around bug in r495 drivers that causes artifacts when denoiser setup is called
+ on a stream that is not the default stream */
+ tile_size.x + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+ tile_size.y + denoiser_.sizes.overlapWindowSizeInPixels * 2,
+ denoiser_.state.device_pointer,
+ denoiser_.sizes.stateSizeInBytes,
+ denoiser_.state.device_pointer + denoiser_.sizes.stateSizeInBytes,
+ denoiser_.sizes.withOverlapScratchSizeInBytes);
if (result != OPTIX_SUCCESS) {
set_error("Failed to set up OptiX denoiser");
return false;
}
+ cuda_assert(cuCtxSynchronize());
+
denoiser_.is_configured = true;
- denoiser_.configured_size.x = buffer_params.width;
- denoiser_.configured_size.y = buffer_params.height;
+ denoiser_.configured_size = tile_size;
return true;
}
@@ -917,8 +1006,10 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
OptixImage2D color_layer = {0};
OptixImage2D albedo_layer = {0};
OptixImage2D normal_layer = {0};
+ OptixImage2D flow_layer = {0};
OptixImage2D output_layer = {0};
+ OptixImage2D prev_output_layer = {0};
/* Color pass. */
{
@@ -934,7 +1025,18 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
color_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
- device_vector<float> fake_albedo(this, "fake_albedo", MEM_READ_WRITE);
+ /* Previous output. */
+ if (context.prev_output.offset != PASS_UNUSED) {
+ const int64_t pass_stride_in_bytes = context.prev_output.pass_stride * sizeof(float);
+
+ prev_output_layer.data = context.prev_output.device_pointer +
+ context.prev_output.offset * sizeof(float);
+ prev_output_layer.width = width;
+ prev_output_layer.height = height;
+ prev_output_layer.rowStrideInBytes = pass_stride_in_bytes * context.prev_output.stride;
+ prev_output_layer.pixelStrideInBytes = pass_stride_in_bytes;
+ prev_output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
+ }
/* Optional albedo and color passes. */
if (context.num_input_passes > 1) {
@@ -959,33 +1061,47 @@ bool OptiXDevice::denoise_run(DenoiseContext &context, const DenoisePass &pass)
normal_layer.pixelStrideInBytes = pixel_stride_in_bytes;
normal_layer.format = OPTIX_PIXEL_FORMAT_FLOAT3;
}
+
+ if (context.use_pass_flow) {
+ flow_layer.data = d_guiding_buffer + context.guiding_params.pass_flow * sizeof(float);
+ flow_layer.width = width;
+ flow_layer.height = height;
+ flow_layer.rowStrideInBytes = row_stride_in_bytes;
+ flow_layer.pixelStrideInBytes = pixel_stride_in_bytes;
+ flow_layer.format = OPTIX_PIXEL_FORMAT_FLOAT2;
+ }
}
/* Denoise in-place of the noisy input in the render buffers. */
output_layer = color_layer;
- /* Finally run denoising. */
- OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
+ OptixDenoiserGuideLayer guide_layers = {};
+ guide_layers.albedo = albedo_layer;
+ guide_layers.normal = normal_layer;
+ guide_layers.flow = flow_layer;
+
OptixDenoiserLayer image_layers = {};
image_layers.input = color_layer;
+ image_layers.previousOutput = prev_output_layer;
image_layers.output = output_layer;
- OptixDenoiserGuideLayer guide_layers = {};
- guide_layers.albedo = albedo_layer;
- guide_layers.normal = normal_layer;
+ /* Finally run denoising. */
+ OptixDenoiserParams params = {}; /* All parameters are disabled/zero. */
- optix_assert(optixDenoiserInvoke(denoiser_.optix_denoiser,
- denoiser_.queue.stream(),
- &params,
- denoiser_.state.device_pointer,
- denoiser_.scratch_offset,
- &guide_layers,
- &image_layers,
- 1,
- 0,
- 0,
- denoiser_.state.device_pointer + denoiser_.scratch_offset,
- denoiser_.scratch_size));
+ optix_assert(optixUtilDenoiserInvokeTiled(denoiser_.optix_denoiser,
+ denoiser_.queue.stream(),
+ &params,
+ denoiser_.state.device_pointer,
+ denoiser_.sizes.stateSizeInBytes,
+ &guide_layers,
+ &image_layers,
+ 1,
+ denoiser_.state.device_pointer +
+ denoiser_.sizes.stateSizeInBytes,
+ denoiser_.sizes.withOverlapScratchSizeInBytes,
+ denoiser_.sizes.overlapWindowSizeInPixels,
+ denoiser_.configured_size.x,
+ denoiser_.configured_size.y));
return true;
}
@@ -995,6 +1111,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
const OptixBuildInput &build_input,
uint16_t num_motion_steps)
{
+ /* Allocate and build acceleration structures only one at a time, to prevent parallel builds
+ * from running out of memory (since both original and compacted acceleration structure memory
+ * may be allocated at the same time for the duration of this function). The builds would
+ * otherwise happen on the same CUDA stream anyway. */
+ static thread_mutex mutex;
+ thread_scoped_lock lock(mutex);
+
const CUDAContextScope scope(this);
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
@@ -1020,14 +1143,15 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
/* Allocate required output buffers. */
- device_only_memory<char> temp_mem(this, "optix temp as build mem");
+ device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
if (!temp_mem.device_pointer) {
/* Make sure temporary memory allocation succeeded. */
return false;
}
- device_only_memory<char> &out_data = bvh->as_data;
+ /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
+ device_only_memory<char> &out_data = *bvh->as_data;
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
assert(out_data.device == this);
out_data.alloc_to_device(sizes.outputSizeInBytes);
@@ -1075,12 +1199,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
/* There is no point compacting if the size does not change. */
if (compacted_size < sizes.outputSizeInBytes) {
- device_only_memory<char> compacted_data(this, "optix compacted as");
+ device_only_memory<char> compacted_data(this, "optix compacted as", false);
compacted_data.alloc_to_device(compacted_size);
- if (!compacted_data.device_pointer)
+ if (!compacted_data.device_pointer) {
/* Do not compact if memory allocation for compacted acceleration structure fails.
* Can just use the uncompacted one then, so succeed here regardless. */
return !have_error();
+ }
optix_assert(optixAccelCompact(
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
@@ -1091,6 +1216,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
std::swap(out_data.device_size, compacted_data.device_size);
std::swap(out_data.device_pointer, compacted_data.device_pointer);
+ /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
+ */
}
}
@@ -1118,7 +1245,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
operation = OPTIX_BUILD_OPERATION_UPDATE;
}
else {
- bvh_optix->as_data.free();
+ bvh_optix->as_data->free();
bvh_optix->traversable_handle = 0;
}
@@ -1173,20 +1300,27 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
int ka = max(k0 - 1, curve.first_key);
int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
+ index_data[i] = i * 4;
+ float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
+
+# if OPTIX_ABI_VERSION >= 55
+ v[0] = make_float4(keys[ka].x, keys[ka].y, keys[ka].z, curve_radius[ka]);
+ v[1] = make_float4(keys[k0].x, keys[k0].y, keys[k0].z, curve_radius[k0]);
+ v[2] = make_float4(keys[k1].x, keys[k1].y, keys[k1].z, curve_radius[k1]);
+ v[3] = make_float4(keys[kb].x, keys[kb].y, keys[kb].z, curve_radius[kb]);
+# else
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
const float4 pw = make_float4(
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
- /* Convert Catmull-Rom data to Bezier spline. */
+ /* Convert Catmull-Rom data to B-spline. */
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
- index_data[i] = i * 4;
- float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
v[0] = make_float4(
dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
v[1] = make_float4(
@@ -1195,6 +1329,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
v[3] = make_float4(
dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
+# endif
}
else {
BoundBox bounds = BoundBox::empty;
@@ -1236,7 +1371,11 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
OptixBuildInput build_input = {};
if (hair->curve_shape == CURVE_THICK) {
build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
+# if OPTIX_ABI_VERSION >= 55
+ build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
+# else
build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+# endif
build_input.curveArray.numPrimitives = num_segments;
build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
build_input.curveArray.numVertices = num_vertices;
@@ -1246,11 +1385,11 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
build_input.curveArray.indexStrideInBytes = sizeof(int);
build_input.curveArray.flag = build_flags;
- build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset;
+ build_input.curveArray.primitiveIndexOffset = hair->curve_segment_offset;
}
else {
/* Disable visibility test any-hit program, since it is already checked during
- * intersection. Those trace calls that require anyhit can force it with a ray flag. */
+ * intersection. Those trace calls that require any-hit can force it with a ray flag. */
build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
@@ -1259,7 +1398,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
build_input.customPrimitiveArray.flags = &build_flags;
build_input.customPrimitiveArray.numSbtRecords = 1;
- build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset;
+ build_input.customPrimitiveArray.primitiveIndexOffset = hair->curve_segment_offset;
}
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
@@ -1328,7 +1467,87 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
* buffers for that purpose. OptiX does not allow this to be zero though, so just pass in
* one and rely on that having the same meaning in this case. */
build_input.triangleArray.numSbtRecords = 1;
- build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
+ build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
+
+ if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
+ progress.set_error("Failed to build OptiX acceleration structure");
+ }
+ }
+ else if (geom->geometry_type == Geometry::POINTCLOUD) {
+ /* Build BLAS for points primitives. */
+ PointCloud *const pointcloud = static_cast<PointCloud *const>(geom);
+ const size_t num_points = pointcloud->num_points();
+ if (num_points == 0) {
+ return;
+ }
+
+ size_t num_motion_steps = 1;
+ Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (motion_blur && pointcloud->get_use_motion_blur() && motion_points) {
+ num_motion_steps = pointcloud->get_motion_steps();
+ }
+
+ device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
+ aabb_data.alloc(num_points * num_motion_steps);
+
+ /* Get AABBs for each motion step. */
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ /* The center step for motion vertices is not stored in the attribute. */
+ const float3 *points = pointcloud->get_points().data();
+ const float *radius = pointcloud->get_radius().data();
+ size_t center_step = (num_motion_steps - 1) / 2;
+ if (step != center_step) {
+ size_t attr_offset = (step > center_step) ? step - 1 : step;
+ /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
+ points = motion_points->data_float3() + attr_offset * num_points;
+ }
+
+ for (size_t i = 0; i < num_points; ++i) {
+ const PointCloud::Point point = pointcloud->get_point(i);
+ BoundBox bounds = BoundBox::empty;
+ point.bounds_grow(points, radius, bounds);
+
+ const size_t index = step * num_points + i;
+ aabb_data[index].minX = bounds.min.x;
+ aabb_data[index].minY = bounds.min.y;
+ aabb_data[index].minZ = bounds.min.z;
+ aabb_data[index].maxX = bounds.max.x;
+ aabb_data[index].maxY = bounds.max.y;
+ aabb_data[index].maxZ = bounds.max.z;
+ }
+ }
+
+ /* Upload AABB data to GPU. */
+ aabb_data.copy_to_device();
+
+ vector<device_ptr> aabb_ptrs;
+ aabb_ptrs.reserve(num_motion_steps);
+ for (size_t step = 0; step < num_motion_steps; ++step) {
+ aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb));
+ }
+
+ /* Disable visibility test any-hit program, since it is already checked during
+ * intersection. Those trace calls that require anyhit can force it with a ray flag.
+ * For those, force a single any-hit call, so shadow record-all behavior works correctly. */
+ unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
+ OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
+ OptixBuildInput build_input = {};
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
+# if OPTIX_ABI_VERSION < 23
+ build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.aabbArray.numPrimitives = num_points;
+ build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
+ build_input.aabbArray.flags = &build_flags;
+ build_input.aabbArray.numSbtRecords = 1;
+ build_input.aabbArray.primitiveIndexOffset = pointcloud->prim_offset;
+# else
+ build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.customPrimitiveArray.numPrimitives = num_points;
+ build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
+ build_input.customPrimitiveArray.flags = &build_flags;
+ build_input.customPrimitiveArray.numSbtRecords = 1;
+ build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset;
+# endif
if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
progress.set_error("Failed to build OptiX acceleration structure");
@@ -1339,9 +1558,9 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
unsigned int num_instances = 0;
unsigned int max_num_instances = 0xFFFFFFFF;
- bvh_optix->as_data.free();
+ bvh_optix->as_data->free();
bvh_optix->traversable_handle = 0;
- bvh_optix->motion_transform_data.free();
+ bvh_optix->motion_transform_data->free();
optixDeviceContextGetProperty(context,
OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
@@ -1374,8 +1593,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
- assert(bvh_optix->motion_transform_data.device == this);
- bvh_optix->motion_transform_data.alloc_to_device(total_motion_transform_size);
+ assert(bvh_optix->motion_transform_data->device == this);
+ bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
}
for (Object *ob : bvh->objects) {
@@ -1395,8 +1614,8 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
instance.transform[5] = 1.0f;
instance.transform[10] = 1.0f;
- /* Set user instance ID to object index (but leave low bit blank). */
- instance.instanceId = ob->get_device_index() << 1;
+ /* Set user instance ID to object index. */
+ instance.instanceId = ob->get_device_index();
/* Add some of the object visibility bits to the mask.
* __prim_visibility contains the combined visibility bits of all instances, so is not
@@ -1417,9 +1636,22 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
}
- else {
+ else if (ob->get_geometry()->geometry_type == Geometry::POINTCLOUD) {
+ /* Use the hit group that has an intersection program for point clouds. */
+ instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
+
+ /* Also skip point clouds in local trace calls. */
+ instance.visibilityMask |= 4;
+ }
+
+# if OPTIX_ABI_VERSION < 55
+ /* Cannot disable any-hit program for thick curves, since it needs to filter out end-caps. */
+ else
+# endif
+ {
/* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
- * since it needs to filter out endcaps there).
+ * since it needs to filter out end-caps there).
+
* It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
* programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
*/
@@ -1436,7 +1668,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
motion_transform_offset = align_up(motion_transform_offset,
OPTIX_TRANSFORM_BYTE_ALIGNMENT);
- CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data.device_pointer +
+ CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
motion_transform_offset;
motion_transform_offset += motion_transform_size;
@@ -1489,9 +1721,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
cuMemcpyHtoD(motion_transform_gpu, &motion_transform, motion_transform_size);
delete[] reinterpret_cast<uint8_t *>(&motion_transform);
- /* Disable instance transform if object uses motion transform already. */
- instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
-
/* Get traversable handle to motion transform. */
optixConvertPointerToTraversableHandle(context,
motion_transform_gpu,
@@ -1505,13 +1734,6 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
/* Set transform matrix. */
memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
}
- else {
- /* Disable instance transform if geometry already has it applied to vertex data. */
- instance.flags |= OPTIX_INSTANCE_FLAG_DISABLE_TRANSFORM;
- /* Non-instanced objects read ID from 'prim_object', so distinguish
- * them from instanced objects with the low bit set. */
- instance.instanceId |= 1;
- }
}
}
@@ -1573,7 +1795,7 @@ void OptiXDevice::const_copy_to(const char *name, void *host, size_t size)
return; \
}
KERNEL_TEX(IntegratorStateGPU, __integrator_state)
-# include "kernel/kernel_textures.h"
+# include "kernel/textures.h"
# undef KERNEL_TEX
}