Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/device_optix.cpp')
-rw-r--r--intern/cycles/device/device_optix.cpp187
1 files changed, 109 insertions, 78 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 43b1fb30baf..95234845f98 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -18,6 +18,7 @@
#ifdef WITH_OPTIX
# include "bvh/bvh.h"
+# include "bvh/bvh_optix.h"
# include "device/cuda/device_cuda.h"
# include "device/device_denoising.h"
# include "device/device_intern.h"
@@ -137,9 +138,6 @@ class OptiXDevice : public CUDADevice {
PG_HITD_MOTION,
PG_HITS_MOTION,
# endif
-# ifdef WITH_CYCLES_DEBUG
- PG_EXCP,
-# endif
PG_BAKE, // kernel_bake_evaluate
PG_DISP, // kernel_displace_evaluate
PG_BACK, // kernel_background_evaluate
@@ -232,6 +230,9 @@ class OptiXDevice : public CUDADevice {
}
};
# endif
+# if OPTIX_ABI_VERSION >= 41 && defined(WITH_CYCLES_DEBUG)
+ options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
+# endif
check_result_optix(optixDeviceContextCreate(cuContext, &options, &context));
# ifdef WITH_CYCLES_LOGGING
check_result_optix(optixDeviceContextSetLogCallback(
@@ -368,6 +369,12 @@ class OptiXDevice : public CUDADevice {
module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
# endif
+
+# if OPTIX_ABI_VERSION >= 41
+ module_options.boundValues = nullptr;
+ module_options.numBoundValues = 0;
+# endif
+
OptixPipelineCompileOptions pipeline_options;
// Default to no motion blur and two-level graph, since it is the fastest option
pipeline_options.usesMotionBlur = false;
@@ -375,12 +382,7 @@ class OptiXDevice : public CUDADevice {
OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
pipeline_options.numPayloadValues = 6;
pipeline_options.numAttributeValues = 2; // u, v
-# ifdef WITH_CYCLES_DEBUG
- pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW |
- OPTIX_EXCEPTION_FLAG_TRACE_DEPTH;
-# else
pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
-# endif
pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h
# if OPTIX_ABI_VERSION >= 36
@@ -505,12 +507,6 @@ class OptiXDevice : public CUDADevice {
group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit";
}
-# ifdef WITH_CYCLES_DEBUG
- group_descs[PG_EXCP].kind = OPTIX_PROGRAM_GROUP_KIND_EXCEPTION;
- group_descs[PG_EXCP].exception.module = optix_module;
- group_descs[PG_EXCP].exception.entryFunctionName = "__exception__kernel_optix_exception";
-# endif
-
if (requested_features.use_baking) {
group_descs[PG_BAKE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
group_descs[PG_BAKE].raygen.module = optix_module;
@@ -578,9 +574,6 @@ class OptiXDevice : public CUDADevice {
groups[PG_HITD_MOTION],
groups[PG_HITS_MOTION],
# endif
-# ifdef WITH_CYCLES_DEBUG
- groups[PG_EXCP],
-# endif
};
check_result_optix_ret(
optixPipelineCreate(context,
@@ -618,9 +611,6 @@ class OptiXDevice : public CUDADevice {
groups[PG_HITD_MOTION],
groups[PG_HITS_MOTION],
# endif
-# ifdef WITH_CYCLES_DEBUG
- groups[PG_EXCP],
-# endif
};
check_result_optix_ret(
optixPipelineCreate(context,
@@ -734,9 +724,6 @@ class OptiXDevice : public CUDADevice {
OptixShaderBindingTable sbt_params = {};
sbt_params.raygenRecord = sbt_data.device_pointer + PG_RGEN * sizeof(SbtRecord);
-# ifdef WITH_CYCLES_DEBUG
- sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord);
-# endif
sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord);
sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.missRecordCount = 1;
@@ -1064,9 +1051,6 @@ class OptiXDevice : public CUDADevice {
OptixShaderBindingTable sbt_params = {};
sbt_params.raygenRecord = sbt_data.device_pointer + rgen_index * sizeof(SbtRecord);
-# ifdef WITH_CYCLES_DEBUG
- sbt_params.exceptionRecord = sbt_data.device_pointer + PG_EXCP * sizeof(SbtRecord);
-# endif
sbt_params.missRecordBase = sbt_data.device_pointer + PG_MISS * sizeof(SbtRecord);
sbt_params.missRecordStrideInBytes = sizeof(SbtRecord);
sbt_params.missRecordCount = 1;
@@ -1095,23 +1079,23 @@ class OptiXDevice : public CUDADevice {
bool build_optix_bvh(const OptixBuildInput &build_input,
uint16_t num_motion_steps,
- OptixTraversableHandle &out_handle)
+ OptixTraversableHandle &out_handle,
+ CUdeviceptr &out_data,
+ OptixBuildOperation operation)
{
- out_handle = 0;
-
const CUDAContextScope scope(cuContext);
// Compute memory usage
OptixAccelBufferSizes sizes = {};
OptixAccelBuildOptions options;
- options.operation = OPTIX_BUILD_OPERATION_BUILD;
+ options.operation = operation;
if (background) {
// Prefer best performance and lowest memory consumption in background
options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
}
else {
// Prefer fast updates in viewport
- options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD;
+ options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
}
options.motionOptions.numKeys = num_motion_steps;
@@ -1136,8 +1120,10 @@ class OptiXDevice : public CUDADevice {
move_textures_to_host(size - free, false);
}
- CUdeviceptr out_data = 0;
- check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+ if (operation == OPTIX_BUILD_OPERATION_BUILD) {
+ check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+ }
+
as_mem.push_back(out_data);
// Finally build the acceleration structure
@@ -1204,23 +1190,49 @@ class OptiXDevice : public CUDADevice {
unordered_map<Geometry *, OptixTraversableHandle> geometry;
geometry.reserve(bvh->geometry.size());
- // Free all previous acceleration structures
+ // Free all previous acceleration structures which can not be refit
+ std::set<CUdeviceptr> refit_mem;
+
+ for (Geometry *geom : bvh->geometry) {
+ if (static_cast<BVHOptiX *>(geom->bvh)->do_refit) {
+ refit_mem.insert(static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle);
+ }
+ }
+
for (CUdeviceptr mem : as_mem) {
- cuMemFree(mem);
+ if (refit_mem.find(mem) == refit_mem.end()) {
+ cuMemFree(mem);
+ }
}
+
as_mem.clear();
// Build bottom level acceleration structures (BLAS)
// Note: Always keep this logic in sync with bvh_optix.cpp!
for (Object *ob : bvh->objects) {
// Skip geometry for which acceleration structure already exists
- Geometry *geom = ob->geometry;
+ Geometry *geom = ob->get_geometry();
if (geometry.find(geom) != geometry.end())
continue;
- if (geom->type == Geometry::HAIR) {
+ OptixTraversableHandle handle;
+ OptixBuildOperation operation;
+ CUdeviceptr out_data;
+ // Refit is only possible in viewport for now.
+ if (static_cast<BVHOptiX *>(geom->bvh)->do_refit && !background) {
+ out_data = static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle;
+ handle = static_cast<BVHOptiX *>(geom->bvh)->optix_handle;
+ operation = OPTIX_BUILD_OPERATION_UPDATE;
+ }
+ else {
+ out_data = 0;
+ handle = 0;
+ operation = OPTIX_BUILD_OPERATION_BUILD;
+ }
+
+ if (geom->geometry_type == Geometry::HAIR) {
// Build BLAS for curve primitives
- Hair *const hair = static_cast<Hair *const>(ob->geometry);
+ Hair *const hair = static_cast<Hair *const>(ob->get_geometry());
if (hair->num_curves() == 0) {
continue;
}
@@ -1229,8 +1241,8 @@ class OptiXDevice : public CUDADevice {
size_t num_motion_steps = 1;
Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (motion_blur && hair->use_motion_blur && motion_keys) {
- num_motion_steps = hair->motion_steps;
+ if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
+ num_motion_steps = hair->get_motion_steps();
}
device_vector<OptixAabb> aabb_data(this, "temp_aabb_data", MEM_READ_ONLY);
@@ -1250,16 +1262,19 @@ class OptiXDevice : public CUDADevice {
// Get AABBs for each motion step
for (size_t step = 0; step < num_motion_steps; ++step) {
// The center step for motion vertices is not stored in the attribute
- const float3 *keys = hair->curve_keys.data();
+ const float3 *keys = hair->get_curve_keys().data();
size_t center_step = (num_motion_steps - 1) / 2;
if (step != center_step) {
size_t attr_offset = (step > center_step) ? step - 1 : step;
// Technically this is a float4 array, but sizeof(float3) is the same as sizeof(float4)
- keys = motion_keys->data_float3() + attr_offset * hair->curve_keys.size();
+ keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
}
for (size_t j = 0, i = 0; j < hair->num_curves(); ++j) {
const Hair::Curve curve = hair->get_curve(j);
+# if OPTIX_ABI_VERSION >= 36
+ const array<float> &curve_radius = hair->get_curve_radius();
+# endif
for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
# if OPTIX_ABI_VERSION >= 36
@@ -1272,10 +1287,8 @@ class OptiXDevice : public CUDADevice {
const float4 px = make_float4(keys[ka].x, keys[k0].x, keys[k1].x, keys[kb].x);
const float4 py = make_float4(keys[ka].y, keys[k0].y, keys[k1].y, keys[kb].y);
const float4 pz = make_float4(keys[ka].z, keys[k0].z, keys[k1].z, keys[kb].z);
- const float4 pw = make_float4(hair->curve_radius[ka],
- hair->curve_radius[k0],
- hair->curve_radius[k1],
- hair->curve_radius[kb]);
+ const float4 pw = make_float4(
+ curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
// Convert Catmull-Rom data to Bezier spline
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
@@ -1298,7 +1311,7 @@ class OptiXDevice : public CUDADevice {
# endif
{
BoundBox bounds = BoundBox::empty;
- curve.bounds_grow(segment, keys, hair->curve_radius.data(), bounds);
+ curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
const size_t index = step * num_segments + i;
aabb_data[index].minX = bounds.min.x;
@@ -1381,37 +1394,41 @@ class OptiXDevice : public CUDADevice {
}
// Allocate memory for new BLAS and build it
- OptixTraversableHandle handle;
- if (build_optix_bvh(build_input, num_motion_steps, handle)) {
- geometry.insert({ob->geometry, handle});
+ if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
+ geometry.insert({ob->get_geometry(), handle});
+ static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data;
+ static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle;
+ static_cast<BVHOptiX *>(geom->bvh)->do_refit = false;
}
else {
return false;
}
}
- else if (geom->type == Geometry::MESH || geom->type == Geometry::VOLUME) {
+ else if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
// Build BLAS for triangle primitives
- Mesh *const mesh = static_cast<Mesh *const>(ob->geometry);
+ Mesh *const mesh = static_cast<Mesh *const>(ob->get_geometry());
if (mesh->num_triangles() == 0) {
continue;
}
- const size_t num_verts = mesh->verts.size();
+ const size_t num_verts = mesh->get_verts().size();
size_t num_motion_steps = 1;
Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
- if (motion_blur && mesh->use_motion_blur && motion_keys) {
- num_motion_steps = mesh->motion_steps;
+ if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
+ num_motion_steps = mesh->get_motion_steps();
}
device_vector<int> index_data(this, "temp_index_data", MEM_READ_ONLY);
- index_data.alloc(mesh->triangles.size());
- memcpy(index_data.data(), mesh->triangles.data(), mesh->triangles.size() * sizeof(int));
+ index_data.alloc(mesh->get_triangles().size());
+ memcpy(index_data.data(),
+ mesh->get_triangles().data(),
+ mesh->get_triangles().size() * sizeof(int));
device_vector<float3> vertex_data(this, "temp_vertex_data", MEM_READ_ONLY);
vertex_data.alloc(num_verts * num_motion_steps);
for (size_t step = 0; step < num_motion_steps; ++step) {
- const float3 *verts = mesh->verts.data();
+ const float3 *verts = mesh->get_verts().data();
size_t center_step = (num_motion_steps - 1) / 2;
// The center step for motion vertices is not stored in the attribute
@@ -1453,9 +1470,11 @@ class OptiXDevice : public CUDADevice {
build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
// Allocate memory for new BLAS and build it
- OptixTraversableHandle handle;
- if (build_optix_bvh(build_input, num_motion_steps, handle)) {
- geometry.insert({ob->geometry, handle});
+ if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
+ geometry.insert({ob->get_geometry(), handle});
+ static_cast<BVHOptiX *>(geom->bvh)->optix_data_handle = out_data;
+ static_cast<BVHOptiX *>(geom->bvh)->optix_handle = handle;
+ static_cast<BVHOptiX *>(geom->bvh)->do_refit = false;
}
else {
return false;
@@ -1464,8 +1483,10 @@ class OptiXDevice : public CUDADevice {
}
// Fill instance descriptions
+# if OPTIX_ABI_VERSION < 41
device_vector<OptixAabb> aabbs(this, "tlas_aabbs", MEM_READ_ONLY);
aabbs.alloc(bvh->objects.size());
+# endif
device_vector<OptixInstance> instances(this, "tlas_instances", MEM_READ_ONLY);
instances.alloc(bvh->objects.size());
@@ -1475,12 +1496,13 @@ class OptiXDevice : public CUDADevice {
continue;
// Create separate instance for triangle/curve meshes of an object
- auto handle_it = geometry.find(ob->geometry);
+ const auto handle_it = geometry.find(ob->get_geometry());
if (handle_it == geometry.end()) {
continue;
}
OptixTraversableHandle handle = handle_it->second;
+# if OPTIX_ABI_VERSION < 41
OptixAabb &aabb = aabbs[num_instances];
aabb.minX = ob->bounds.min.x;
aabb.minY = ob->bounds.min.y;
@@ -1488,6 +1510,7 @@ class OptiXDevice : public CUDADevice {
aabb.maxX = ob->bounds.max.x;
aabb.maxY = ob->bounds.max.y;
aabb.maxZ = ob->bounds.max.z;
+# endif
OptixInstance &instance = instances[num_instances++];
memset(&instance, 0, sizeof(instance));
@@ -1503,18 +1526,19 @@ class OptiXDevice : public CUDADevice {
// Have to have at least one bit in the mask, or else instance would always be culled
instance.visibilityMask = 1;
- if (ob->geometry->has_volume) {
+ if (ob->get_geometry()->has_volume) {
// Volumes have a special bit set in the visibility mask so a trace can mask only volumes
instance.visibilityMask |= 2;
}
- if (ob->geometry->type == Geometry::HAIR) {
+ if (ob->get_geometry()->geometry_type == Geometry::HAIR) {
// Same applies to curves (so they can be skipped in local trace calls)
instance.visibilityMask |= 4;
# if OPTIX_ABI_VERSION >= 36
- if (motion_blur && ob->geometry->has_motion_blur() && DebugFlags().optix.curves_api &&
- static_cast<const Hair *>(ob->geometry)->curve_shape == CURVE_THICK) {
+ if (motion_blur && ob->get_geometry()->has_motion_blur() &&
+ DebugFlags().optix.curves_api &&
+ static_cast<const Hair *>(ob->get_geometry())->curve_shape == CURVE_THICK) {
// Select between motion blur and non-motion blur built-in intersection module
instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
}
@@ -1523,7 +1547,7 @@ class OptiXDevice : public CUDADevice {
// Insert motion traversable if object has motion
if (motion_blur && ob->use_motion()) {
- size_t motion_keys = max(ob->motion.size(), 2) - 2;
+ size_t motion_keys = max(ob->get_motion().size(), 2) - 2;
size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
motion_keys * sizeof(OptixSRTData);
@@ -1537,16 +1561,17 @@ class OptiXDevice : public CUDADevice {
OptixSRTMotionTransform &motion_transform = *reinterpret_cast<OptixSRTMotionTransform *>(
new uint8_t[motion_transform_size]);
motion_transform.child = handle;
- motion_transform.motionOptions.numKeys = ob->motion.size();
+ motion_transform.motionOptions.numKeys = ob->get_motion().size();
motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
motion_transform.motionOptions.timeBegin = 0.0f;
motion_transform.motionOptions.timeEnd = 1.0f;
OptixSRTData *const srt_data = motion_transform.srtData;
- array<DecomposedTransform> decomp(ob->motion.size());
- transform_motion_decompose(decomp.data(), ob->motion.data(), ob->motion.size());
+ array<DecomposedTransform> decomp(ob->get_motion().size());
+ transform_motion_decompose(
+ decomp.data(), ob->get_motion().data(), ob->get_motion().size());
- for (size_t i = 0; i < ob->motion.size(); ++i) {
+ for (size_t i = 0; i < ob->get_motion().size(); ++i) {
// Scale
srt_data[i].sx = decomp[i].y.w; // scale.x.x
srt_data[i].sy = decomp[i].z.w; // scale.y.y
@@ -1593,9 +1618,9 @@ class OptiXDevice : public CUDADevice {
else {
instance.traversableHandle = handle;
- if (ob->geometry->is_instanced()) {
+ if (ob->get_geometry()->is_instanced()) {
// Set transform matrix
- memcpy(instance.transform, &ob->tfm, sizeof(instance.transform));
+ memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
}
else {
// Disable instance transform if geometry already has it applied to vertex data
@@ -1608,20 +1633,26 @@ class OptiXDevice : public CUDADevice {
}
// Upload instance descriptions
+# if OPTIX_ABI_VERSION < 41
aabbs.resize(num_instances);
aabbs.copy_to_device();
+# endif
instances.resize(num_instances);
instances.copy_to_device();
// Build top-level acceleration structure (TLAS)
OptixBuildInput build_input = {};
build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
- build_input.instanceArray.instances = instances.device_pointer;
- build_input.instanceArray.numInstances = num_instances;
+# if OPTIX_ABI_VERSION < 41 // Instance AABBs no longer need to be set since OptiX 7.2
build_input.instanceArray.aabbs = aabbs.device_pointer;
build_input.instanceArray.numAabbs = num_instances;
+# endif
+ build_input.instanceArray.instances = instances.device_pointer;
+ build_input.instanceArray.numInstances = num_instances;
- return build_optix_bvh(build_input, 0, tlas_handle);
+ CUdeviceptr out_data = 0;
+ tlas_handle = 0;
+ return build_optix_bvh(build_input, 0, tlas_handle, out_data, OPTIX_BUILD_OPERATION_BUILD);
}
void const_copy_to(const char *name, void *host, size_t size) override
@@ -1725,8 +1756,8 @@ bool device_optix_init()
const OptixResult result = optixInit();
if (result == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {
- VLOG(1) << "OptiX initialization failed because driver does not support ABI version "
- << OPTIX_ABI_VERSION;
+ VLOG(1) << "OptiX initialization failed because the installed NVIDIA driver is too old. "
+ "Please update to the latest driver first!";
return false;
}
else if (result != OPTIX_SUCCESS) {