Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r--intern/cycles/device/device_optix.cpp395
1 files changed, 294 insertions, 101 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index ececca3df53..1cc45983565 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -131,8 +131,12 @@ class OptiXDevice : public CUDADevice {
PG_RGEN,
PG_MISS,
PG_HITD, // Default hit group
- PG_HITL, // __BVH_LOCAL__ hit group
PG_HITS, // __SHADOW_RECORD_ALL__ hit group
+ PG_HITL, // __BVH_LOCAL__ hit group (only used for triangles)
+# if OPTIX_ABI_VERSION >= 36
+ PG_HITD_MOTION,
+ PG_HITS_MOTION,
+# endif
# ifdef WITH_CYCLES_DEBUG
PG_EXCP,
# endif
@@ -177,6 +181,7 @@ class OptiXDevice : public CUDADevice {
OptixDeviceContext context = NULL;
OptixModule optix_module = NULL; // All necessary OptiX kernels are in one module
+ OptixModule builtin_modules[2] = {};
OptixPipeline pipelines[NUM_PIPELINES] = {};
bool motion_blur = false;
@@ -264,6 +269,9 @@ class OptiXDevice : public CUDADevice {
// Unload modules
if (optix_module != NULL)
optixModuleDestroy(optix_module);
+ for (unsigned int i = 0; i < 2; ++i)
+ if (builtin_modules[i] != NULL)
+ optixModuleDestroy(builtin_modules[i]);
for (unsigned int i = 0; i < NUM_PIPELINES; ++i)
if (pipelines[i] != NULL)
optixPipelineDestroy(pipelines[i]);
@@ -338,6 +346,12 @@ class OptiXDevice : public CUDADevice {
optixModuleDestroy(optix_module);
optix_module = NULL;
}
+ for (unsigned int i = 0; i < 2; ++i) {
+ if (builtin_modules[i] != NULL) {
+ optixModuleDestroy(builtin_modules[i]);
+ builtin_modules[i] = NULL;
+ }
+ }
for (unsigned int i = 0; i < NUM_PIPELINES; ++i) {
if (pipelines[i] != NULL) {
optixPipelineDestroy(pipelines[i]);
@@ -369,6 +383,18 @@ class OptiXDevice : public CUDADevice {
# endif
pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h
+# if OPTIX_ABI_VERSION >= 36
+ pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
+ if (requested_features.use_hair) {
+ if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
+ }
+ else {
+ pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
+ }
+ }
+# endif
+
// Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
// This is necessary since objects may be reported to have motion if the Vector pass is
// active, but may still need to be rendered without motion blur if that isn't active as well
@@ -442,6 +468,34 @@ class OptiXDevice : public CUDADevice {
group_descs[PG_HITD].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
group_descs[PG_HITS].hitgroup.entryFunctionNameIS = "__intersection__curve_ribbon";
}
+
+# if OPTIX_ABI_VERSION >= 36
+ if (DebugFlags().optix.curves_api && requested_features.use_hair_thick) {
+ OptixBuiltinISOptions builtin_options;
+ builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+ builtin_options.usesMotionBlur = false;
+
+ check_result_optix_ret(optixBuiltinISModuleGet(
+ context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
+
+ group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
+ group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr;
+ group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
+ group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr;
+
+ if (motion_blur) {
+ builtin_options.usesMotionBlur = true;
+
+ check_result_optix_ret(optixBuiltinISModuleGet(
+ context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
+
+ group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
+ group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
+ group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
+ group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
+ }
+ }
+# endif
}
if (requested_features.use_subsurface || requested_features.use_shader_raytrace) {
@@ -493,8 +547,14 @@ class OptiXDevice : public CUDADevice {
unsigned int trace_css = stack_size[PG_HITD].cssCH;
// This is based on the maximum of closest-hit and any-hit/intersection programs
trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
- trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
+ trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
+# if OPTIX_ABI_VERSION >= 36
+ trace_css = std::max(trace_css,
+ stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
+ trace_css = std::max(trace_css,
+ stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
+# endif
OptixPipelineLinkOptions link_options;
link_options.maxTraceDepth = 1;
@@ -503,17 +563,23 @@ class OptiXDevice : public CUDADevice {
# else
link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
# endif
- link_options.overrideUsesMotionBlur = pipeline_options.usesMotionBlur;
+# if OPTIX_ABI_VERSION < 24
+ link_options.overrideUsesMotionBlur = motion_blur;
+# endif
{ // Create path tracing pipeline
OptixProgramGroup pipeline_groups[] = {
- groups[PG_RGEN],
- groups[PG_MISS],
- groups[PG_HITD],
- groups[PG_HITS],
- groups[PG_HITL],
+ groups[PG_RGEN],
+ groups[PG_MISS],
+ groups[PG_HITD],
+ groups[PG_HITS],
+ groups[PG_HITL],
+# if OPTIX_ABI_VERSION >= 36
+ groups[PG_HITD_MOTION],
+ groups[PG_HITS_MOTION],
+# endif
# ifdef WITH_CYCLES_DEBUG
- groups[PG_EXCP],
+ groups[PG_EXCP],
# endif
};
check_result_optix_ret(
@@ -530,8 +596,8 @@ class OptiXDevice : public CUDADevice {
const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
// Set stack size depending on pipeline options
- check_result_optix_ret(optixPipelineSetStackSize(
- pipelines[PIP_PATH_TRACE], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2)));
+ check_result_optix_ret(
+ optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2)));
}
// Only need to create shader evaluation pipeline if one of these features is used:
@@ -541,15 +607,19 @@ class OptiXDevice : public CUDADevice {
if (use_shader_eval_pipeline) { // Create shader evaluation pipeline
OptixProgramGroup pipeline_groups[] = {
- groups[PG_BAKE],
- groups[PG_DISP],
- groups[PG_BACK],
- groups[PG_MISS],
- groups[PG_HITD],
- groups[PG_HITS],
- groups[PG_HITL],
+ groups[PG_BAKE],
+ groups[PG_DISP],
+ groups[PG_BACK],
+ groups[PG_MISS],
+ groups[PG_HITD],
+ groups[PG_HITS],
+ groups[PG_HITL],
+# if OPTIX_ABI_VERSION >= 36
+ groups[PG_HITD_MOTION],
+ groups[PG_HITS_MOTION],
+# endif
# ifdef WITH_CYCLES_DEBUG
- groups[PG_EXCP],
+ groups[PG_EXCP],
# endif
};
check_result_optix_ret(
@@ -672,7 +742,11 @@ class OptiXDevice : public CUDADevice {
sbt_params.missRecordCount = 1;
sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
- sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS
+# if OPTIX_ABI_VERSION >= 36
+ sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
+# else
+ sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
+# endif
// Launch the ray generation program
check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
@@ -727,19 +801,18 @@ class OptiXDevice : public CUDADevice {
// 0 1 2
// 3 4 5
// 6 7 8 9
- RenderTile rtiles[10];
- rtiles[4] = rtile;
- task.map_neighbor_tiles(rtiles, this);
- rtile = rtiles[4]; // Tile may have been modified by mapping code
+ RenderTileNeighbors neighbors(rtile);
+ task.map_neighbor_tiles(neighbors, this);
+ RenderTile &center_tile = neighbors.tiles[RenderTileNeighbors::CENTER];
+ RenderTile &target_tile = neighbors.target;
+ rtile = center_tile; // Tile may have been modified by mapping code
// Calculate size of the tile to denoise (including overlap)
- int4 rect = make_int4(
- rtiles[4].x, rtiles[4].y, rtiles[4].x + rtiles[4].w, rtiles[4].y + rtiles[4].h);
+ int4 rect = center_tile.bounds();
// Overlap between tiles has to be at least 64 pixels
// TODO(pmours): Query this value from OptiX
rect = rect_expand(rect, 64);
- int4 clip_rect = make_int4(
- rtiles[3].x, rtiles[1].y, rtiles[5].x + rtiles[5].w, rtiles[7].y + rtiles[7].h);
+ int4 clip_rect = neighbors.bounds();
rect = rect_clip(rect, clip_rect);
int2 rect_size = make_int2(rect.z - rect.x, rect.w - rect.y);
int2 overlap_offset = make_int2(rtile.x - rect.x, rtile.y - rect.y);
@@ -760,14 +833,14 @@ class OptiXDevice : public CUDADevice {
device_only_memory<float> input(this, "denoiser input");
device_vector<TileInfo> tile_info_mem(this, "denoiser tile info", MEM_READ_WRITE);
- if ((!rtiles[0].buffer || rtiles[0].buffer == rtile.buffer) &&
- (!rtiles[1].buffer || rtiles[1].buffer == rtile.buffer) &&
- (!rtiles[2].buffer || rtiles[2].buffer == rtile.buffer) &&
- (!rtiles[3].buffer || rtiles[3].buffer == rtile.buffer) &&
- (!rtiles[5].buffer || rtiles[5].buffer == rtile.buffer) &&
- (!rtiles[6].buffer || rtiles[6].buffer == rtile.buffer) &&
- (!rtiles[7].buffer || rtiles[7].buffer == rtile.buffer) &&
- (!rtiles[8].buffer || rtiles[8].buffer == rtile.buffer)) {
+ bool contiguous_memory = true;
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ if (neighbors.tiles[i].buffer && neighbors.tiles[i].buffer != rtile.buffer) {
+ contiguous_memory = false;
+ }
+ }
+
+ if (contiguous_memory) {
// Tiles are in continous memory, so can just subtract overlap offset
input_ptr -= (overlap_offset.x + overlap_offset.y * rtile.stride) * pixel_stride;
// Stride covers the whole width of the image and not just a single tile
@@ -782,19 +855,19 @@ class OptiXDevice : public CUDADevice {
input_stride *= rect_size.x;
TileInfo *tile_info = tile_info_mem.alloc(1);
- for (int i = 0; i < 9; i++) {
- tile_info->offsets[i] = rtiles[i].offset;
- tile_info->strides[i] = rtiles[i].stride;
- tile_info->buffers[i] = rtiles[i].buffer;
+ for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
+ tile_info->offsets[i] = neighbors.tiles[i].offset;
+ tile_info->strides[i] = neighbors.tiles[i].stride;
+ tile_info->buffers[i] = neighbors.tiles[i].buffer;
}
- tile_info->x[0] = rtiles[3].x;
- tile_info->x[1] = rtiles[4].x;
- tile_info->x[2] = rtiles[5].x;
- tile_info->x[3] = rtiles[5].x + rtiles[5].w;
- tile_info->y[0] = rtiles[1].y;
- tile_info->y[1] = rtiles[4].y;
- tile_info->y[2] = rtiles[7].y;
- tile_info->y[3] = rtiles[7].y + rtiles[7].h;
+ tile_info->x[0] = neighbors.tiles[3].x;
+ tile_info->x[1] = neighbors.tiles[4].x;
+ tile_info->x[2] = neighbors.tiles[5].x;
+ tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
+ tile_info->y[0] = neighbors.tiles[1].y;
+ tile_info->y[1] = neighbors.tiles[4].y;
+ tile_info->y[2] = neighbors.tiles[7].y;
+ tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
tile_info_mem.copy_to_device();
void *args[] = {
@@ -804,7 +877,7 @@ class OptiXDevice : public CUDADevice {
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
device_only_memory<float> input_rgb(this, "denoiser input rgb");
- input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.optix_input_passes);
+ input_rgb.alloc_to_device(rect_size.x * rect_size.y * 3 * task.denoising.input_passes);
void *input_args[] = {&input_rgb.device_pointer,
&input_ptr,
@@ -813,7 +886,7 @@ class OptiXDevice : public CUDADevice {
&input_stride,
&task.pass_stride,
const_cast<int *>(pass_offset),
- &task.denoising.optix_input_passes,
+ &task.denoising.input_passes,
&rtile.sample};
launch_filter_kernel(
"kernel_cuda_filter_convert_to_rgb", rect_size.x, rect_size.y, input_args);
@@ -824,7 +897,7 @@ class OptiXDevice : public CUDADevice {
# endif
const bool recreate_denoiser = (denoiser == NULL) ||
- (task.denoising.optix_input_passes != denoiser_input_passes);
+ (task.denoising.input_passes != denoiser_input_passes);
if (recreate_denoiser) {
// Destroy existing handle before creating new one
if (denoiser != NULL) {
@@ -833,23 +906,29 @@ class OptiXDevice : public CUDADevice {
// Create OptiX denoiser handle on demand when it is first used
OptixDenoiserOptions denoiser_options;
- assert(task.denoising.optix_input_passes >= 1 && task.denoising.optix_input_passes <= 3);
+ assert(task.denoising.input_passes >= 1 && task.denoising.input_passes <= 3);
denoiser_options.inputKind = static_cast<OptixDenoiserInputKind>(
- OPTIX_DENOISER_INPUT_RGB + (task.denoising.optix_input_passes - 1));
+ OPTIX_DENOISER_INPUT_RGB + (task.denoising.input_passes - 1));
+# if OPTIX_ABI_VERSION < 28
denoiser_options.pixelFormat = OPTIX_PIXEL_FORMAT_FLOAT3;
+# endif
check_result_optix_ret(optixDenoiserCreate(context, &denoiser_options, &denoiser));
check_result_optix_ret(
optixDenoiserSetModel(denoiser, OPTIX_DENOISER_MODEL_KIND_HDR, NULL, 0));
// OptiX denoiser handle was created with the requested number of input passes
- denoiser_input_passes = task.denoising.optix_input_passes;
+ denoiser_input_passes = task.denoising.input_passes;
}
OptixDenoiserSizes sizes = {};
check_result_optix_ret(
optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes));
+# if OPTIX_ABI_VERSION < 28
const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
+# else
+ const size_t scratch_size = sizes.withOverlapScratchSizeInBytes;
+# endif
const size_t scratch_offset = sizes.stateSizeInBytes;
// Allocate denoiser state if tile size has changed since last setup
@@ -897,10 +976,10 @@ class OptiXDevice : public CUDADevice {
int2 output_offset = overlap_offset;
overlap_offset = make_int2(0, 0); // Not supported by denoiser API, so apply manually
# else
- output_layers[0].data = rtiles[9].buffer + pixel_offset;
- output_layers[0].width = rtiles[9].w;
- output_layers[0].height = rtiles[9].h;
- output_layers[0].rowStrideInBytes = rtiles[9].stride * pixel_stride;
+ output_layers[0].data = target_tile.buffer + pixel_offset;
+ output_layers[0].width = target_tile.w;
+ output_layers[0].height = target_tile.h;
+ output_layers[0].rowStrideInBytes = target_tile.stride * pixel_stride;
output_layers[0].pixelStrideInBytes = pixel_stride;
# endif
output_layers[0].format = OPTIX_PIXEL_FORMAT_FLOAT3;
@@ -913,7 +992,7 @@ class OptiXDevice : public CUDADevice {
denoiser_state.device_pointer,
scratch_offset,
input_layers,
- task.denoising.optix_input_passes,
+ task.denoising.input_passes,
overlap_offset.x,
overlap_offset.y,
output_layers,
@@ -922,26 +1001,26 @@ class OptiXDevice : public CUDADevice {
# if OPTIX_DENOISER_NO_PIXEL_STRIDE
void *output_args[] = {&input_ptr,
- &rtiles[9].buffer,
+ &target_tile.buffer,
&output_offset.x,
&output_offset.y,
&rect_size.x,
&rect_size.y,
- &rtiles[9].x,
- &rtiles[9].y,
- &rtiles[9].w,
- &rtiles[9].h,
- &rtiles[9].offset,
- &rtiles[9].stride,
+ &target_tile.x,
+ &target_tile.y,
+ &target_tile.w,
+ &target_tile.h,
+ &target_tile.offset,
+ &target_tile.stride,
&task.pass_stride,
&rtile.sample};
launch_filter_kernel(
- "kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args);
+ "kernel_cuda_filter_convert_from_rgb", target_tile.w, target_tile.h, output_args);
# endif
check_result_cuda_ret(cuStreamSynchronize(0));
- task.unmap_neighbor_tiles(rtiles, this);
+ task.unmap_neighbor_tiles(neighbors, this);
}
else {
// Run CUDA denoising kernels
@@ -993,7 +1072,11 @@ class OptiXDevice : public CUDADevice {
sbt_params.missRecordCount = 1;
sbt_params.hitgroupRecordBase = sbt_data.device_pointer + PG_HITD * sizeof(SbtRecord);
sbt_params.hitgroupRecordStrideInBytes = sizeof(SbtRecord);
- sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITL, PG_HITS
+# if OPTIX_ABI_VERSION >= 36
+ sbt_params.hitgroupRecordCount = 5; // PG_HITD(_MOTION), PG_HITS(_MOTION), PG_HITL
+# else
+ sbt_params.hitgroupRecordCount = 3; // PG_HITD, PG_HITS, PG_HITL
+# endif
check_result_optix(optixLaunch(pipelines[PIP_SHADER_EVAL],
cuda_stream[thread_index],
@@ -1070,7 +1153,7 @@ class OptiXDevice : public CUDADevice {
&build_input,
1,
temp_mem.device_pointer,
- temp_mem.device_size,
+ sizes.tempSizeInBytes,
out_data,
sizes.outputSizeInBytes,
&out_handle,
@@ -1142,7 +1225,6 @@ class OptiXDevice : public CUDADevice {
continue;
}
- const size_t num_curves = hair->num_curves();
const size_t num_segments = hair->num_segments();
size_t num_motion_steps = 1;
@@ -1152,7 +1234,18 @@ class OptiXDevice : public CUDADevice {
}
device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY);
- aabb_data.alloc(num_segments * num_motion_steps);
+# if OPTIX_ABI_VERSION >= 36
+ device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY);
+ device_vector<float4> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY);
+ // Four control points for each curve segment
+ const size_t num_vertices = num_segments * 4;
+ if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+ index_data.alloc(num_segments);
+ vertex_data.alloc(num_vertices * num_motion_steps);
+ }
+ else
+# endif
+ aabb_data.alloc(num_segments * num_motion_steps);
// Get AABBs for each motion step
for (size_t step = 0; step < num_motion_steps; ++step) {
@@ -1165,44 +1258,127 @@ class OptiXDevice : public CUDADevice {
keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size();
}
- size_t i = step * num_segments;
- for (size_t j = 0; j < num_curves; ++j) {
- const Hair::Curve c = hair->get_curve(j);
-
- for (size_t k = 0; k < c.num_segments(); ++i, ++k) {
- BoundBox bounds = BoundBox::empty;
- c.bounds_grow(k, keys, hair->curve_radius.data(), bounds);
-
- aabb_data[i].minX = bounds.min.x;
- aabb_data[i].minY = bounds.min.y;
- aabb_data[i].minZ = bounds.min.z;
- aabb_data[i].maxX = bounds.max.x;
- aabb_data[i].maxY = bounds.max.y;
- aabb_data[i].maxZ = bounds.max.z;
+ for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
+ const Hair::Curve curve = hair->get_curve(j);
+
+ for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
+# if OPTIX_ABI_VERSION >= 36
+ if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+ int k0 = curve.first_key + segment;
+ int k1 = k0 + 1;
+ int ka = max(k0 - 1, curve.first_key);
+ int kb = min(k1 + 1, curve.first_key + curve.num_keys - 1);
+
+ const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
+ const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
+ const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
+ const float4 pw = make_float4(hair->curve_radius[ka],
+ hair->curve_radius[k0],
+ hair->curve_radius[k1],
+ hair->curve_radius[kb]);
+
+ // Convert Catmull-Rom data to Bezier spline
+ static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
+ static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
+ static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
+ static const float4 cr2bsp3 = make_float4(-2, +5, -4, +7) / 6.f;
+
+ index_data[i] = i * 4;
+ float4 *const v = vertex_data.data() + step * num_vertices + index_data[i];
+ v[0] = make_float4(
+ dot(cr2bsp0, px), dot(cr2bsp0, py), dot(cr2bsp0, pz), dot(cr2bsp0, pw));
+ v[1] = make_float4(
+ dot(cr2bsp1, px), dot(cr2bsp1, py), dot(cr2bsp1, pz), dot(cr2bsp1, pw));
+ v[2] = make_float4(
+ dot(cr2bsp2, px), dot(cr2bsp2, py), dot(cr2bsp2, pz), dot(cr2bsp2, pw));
+ v[3] = make_float4(
+ dot(cr2bsp3, px), dot(cr2bsp3, py), dot(cr2bsp3, pz), dot(cr2bsp3, pw));
+ }
+ else
+# endif
+ {
+ BoundBox bounds = BoundBox::empty;
+ curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds);
+
+ const size_t index = step * num_segments + i;
+ aabb_data[index].minX = bounds.min.x;
+ aabb_data[index].minY = bounds.min.y;
+ aabb_data[index].minZ = bounds.min.z;
+ aabb_data[index].maxX = bounds.max.x;
+ aabb_data[index].maxY = bounds.max.y;
+ aabb_data[index].maxZ = bounds.max.z;
+ }
}
}
}
// Upload AABB data to GPU
aabb_data.copy_to_device();
+# if OPTIX_ABI_VERSION >= 36
+ index_data.copy_to_device();
+ vertex_data.copy_to_device();
+# endif
vector<device_ptr> aabb_ptrs;
aabb_ptrs.reserve(num_motion_steps);
+# if OPTIX_ABI_VERSION >= 36
+ vector<device_ptr> width_ptrs;
+ vector<device_ptr> vertex_ptrs;
+ width_ptrs.reserve(num_motion_steps);
+ vertex_ptrs.reserve(num_motion_steps);
+# endif
for (size_t step = 0; step < num_motion_steps; ++step) {
aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb));
+# if OPTIX_ABI_VERSION >= 36
+ const device_ptr base_ptr = vertex_data.device_pointer +
+ step * num_vertices * sizeof(float4);
+ width_ptrs.push_back(base_ptr + 3 * sizeof(float)); // Offset by vertex size
+ vertex_ptrs.push_back(base_ptr);
+# endif
}
- // Disable visibility test anyhit program, since it is already checked during intersection
- // Those trace calls that require anyhit can force it with OPTIX_RAY_FLAG_ENFORCE_ANYHIT
- unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
+ // Force a single any-hit call, so shadow record-all behavior works correctly
+ unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
OptixBuildInput build_input = {};
- build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
- build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
- build_input.aabbArray.numPrimitives = num_segments;
- build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
- build_input.aabbArray.flags = &build_flags;
- build_input.aabbArray.numSbtRecords = 1;
- build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
+# if OPTIX_ABI_VERSION >= 36
+ if (DebugFlags().optix.curves_api && hair->curve_shape == CURVE_THICK) {
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
+ build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
+ build_input.curveArray.numPrimitives = num_segments;
+ build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
+ build_input.curveArray.numVertices = num_vertices;
+ build_input.curveArray.vertexStrideInBytes = sizeof(float4);
+ build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
+ build_input.curveArray.widthStrideInBytes = sizeof(float4);
+ build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
+ build_input.curveArray.indexStrideInBytes = sizeof(int);
+ build_input.curveArray.flag = build_flags;
+ build_input.curveArray.primitiveIndexOffset = hair->optix_prim_offset;
+ }
+ else
+# endif
+ {
+ // Disable visibility test any-hit program, since it is already checked during
+ // intersection. Those trace calls that require anyhit can force it with a ray flag.
+ build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
+
+ build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
+# if OPTIX_ABI_VERSION < 23
+ build_input.aabbArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.aabbArray.numPrimitives = num_segments;
+ build_input.aabbArray.strideInBytes = sizeof(OptixAabb);
+ build_input.aabbArray.flags = &build_flags;
+ build_input.aabbArray.numSbtRecords = 1;
+ build_input.aabbArray.primitiveIndexOffset = hair->optix_prim_offset;
+# else
+ build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
+ build_input.customPrimitiveArray.numPrimitives = num_segments;
+ build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
+ build_input.customPrimitiveArray.flags = &build_flags;
+ build_input.customPrimitiveArray.numSbtRecords = 1;
+ build_input.customPrimitiveArray.primitiveIndexOffset = hair->optix_prim_offset;
+# endif
+ }
// Allocate memory for new BLAS and build it
OptixTraversableHandle handle;
@@ -1257,8 +1433,8 @@ class OptiXDevice : public CUDADevice {
vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3));
}
- // No special build flags for triangle primitives
- unsigned int build_flags = OPTIX_GEOMETRY_FLAG_NONE;
+ // Force a single any-hit call, so shadow record-all behavior works correctly
+ unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
OptixBuildInput build_input = {};
build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
@@ -1324,9 +1500,26 @@ class OptiXDevice : public CUDADevice {
// Set user instance ID to object index
instance.instanceId = ob->get_device_index();
- // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
- // See 'scene_intersect_volume' in bvh.h
- instance.visibilityMask = (ob->geometry->has_volume ? 3 : 1);
+ // Have to have at least one bit in the mask, or else instance would always be culled
+ instance.visibilityMask = 1;
+
+ if (ob->geometry->has_volume) {
+ // Volumes have a special bit set in the visibility mask so a trace can mask only volumes
+ instance.visibilityMask |= 2;
+ }
+
+ if (ob->geometry->type == Geometry::HAIR) {
+ // Same applies to curves (so they can be skipped in local trace calls)
+ instance.visibilityMask |= 4;
+
+# if OPTIX_ABI_VERSION >= 36
+ if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api &&
+ static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) {
+ // Select between motion blur and non-motion blur built-in intersection module
+ instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
+ }
+# endif
+ }
// Insert motion traversable if object has motion
if (motion_blur && ob->use_motion()) {