Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2022-05-30 19:04:14 +0300
committerBrecht Van Lommel <brecht@blender.org>2022-05-31 18:24:43 +0300
commitf2cd7e08fed02fdf02060c17c943e15e85638cb5 (patch)
tree27e4ea7864f1716979ab9e27e60cdcdf361566f2 /intern/cycles/device
parent52cb24a7796c1043dab076d3ef025b4c8545c1ef (diff)
Fix Cycles MNEE not working for Metal
Move MNEE to own kernel, separate from shader ray-tracing. This does introduce the limitation that a shader can't use both MNEE and AO/bevel, but that seems like the better trade-off for now. We can experiment with bigger kernel organization changes later. Differential Revision: https://developer.blender.org/D15070
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/cuda/device_impl.cpp2
-rw-r--r--intern/cycles/device/hip/device_impl.cpp2
-rw-r--r--intern/cycles/device/kernel.cpp2
-rw-r--r--intern/cycles/device/metal/kernel.mm3
-rw-r--r--intern/cycles/device/metal/queue.mm1
-rw-r--r--intern/cycles/device/optix/device_impl.cpp59
-rw-r--r--intern/cycles/device/optix/device_impl.h3
-rw-r--r--intern/cycles/device/optix/queue.cpp8
8 files changed, 73 insertions, 7 deletions
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index 6908ae5ead3..c9326a62f48 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -457,6 +457,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
/* Use the biggest kernel for estimation. */
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+ (kernel_features & KERNEL_FEATURE_MNEE) ?
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 7159277b325..d27e9ddbedf 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -420,6 +420,8 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
/* Use the biggest kernel for estimation. */
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+ (kernel_features & KERNEL_FEATURE_MNEE) ?
+ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
/* Launch kernel, using just 1 block appears sufficient to reserve memory for all
diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp
index 072731a2af5..96a99cd62cd 100644
--- a/intern/cycles/device/kernel.cpp
+++ b/intern/cycles/device/kernel.cpp
@@ -33,6 +33,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
return "integrator_shade_surface";
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
return "integrator_shade_surface_raytrace";
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
+ return "integrator_shade_surface_mnee";
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
return "integrator_shade_volume";
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 9555ca03c8e..a3c4839c21f 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -489,7 +489,8 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
- i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+ i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
kernel_function_list = function_list;
}
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 1686ab95ffa..df2b3321cf6 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -265,6 +265,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
break;
default:
bvhMetalRT = nil;
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 8830d8c44ac..9576643cff6 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -432,9 +432,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
{ /* Load and compile PTX module with OptiX kernels. */
- string ptx_data, ptx_filename = path_get((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
- "lib/kernel_optix_shader_raytrace.ptx" :
- "lib/kernel_optix.ptx");
+ string ptx_data, ptx_filename = path_get(
+ (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
+ "lib/kernel_optix_shader_raytrace.ptx" :
+ "lib/kernel_optix.ptx");
if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
if (!getenv("OPTIX_ROOT_DIR")) {
set_error(
@@ -444,7 +445,9 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
}
ptx_filename = compile_kernel(
kernel_features,
- (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? "kernel_shader_raytrace" : "kernel",
+ (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
+ "kernel_shader_raytrace" :
+ "kernel",
"optix",
true);
}
@@ -582,6 +585,14 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
"__direct_callable__svm_node_bevel";
}
+ /* MNEE. */
+ if (kernel_features & KERNEL_FEATURE_MNEE) {
+ group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+ group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
+ group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
+ "__raygen__kernel_optix_integrator_shade_surface_mnee";
+ }
+
optix_assert(optixProgramGroupCreate(
context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
@@ -663,6 +674,46 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
}
+ if (kernel_features & KERNEL_FEATURE_MNEE) {
+ /* Create MNEE pipeline. */
+ vector<OptixProgramGroup> pipeline_groups;
+ pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+ pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
+ pipeline_groups.push_back(groups[PG_MISS]);
+ pipeline_groups.push_back(groups[PG_HITD]);
+ pipeline_groups.push_back(groups[PG_HITS]);
+ pipeline_groups.push_back(groups[PG_HITL]);
+ pipeline_groups.push_back(groups[PG_HITV]);
+ if (motion_blur) {
+ pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+ pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+ }
+ if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+ pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
+ pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+ }
+ pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
+ pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
+
+ optix_assert(optixPipelineCreate(context,
+ &pipeline_options,
+ &link_options,
+ pipeline_groups.data(),
+ pipeline_groups.size(),
+ nullptr,
+ 0,
+ &pipelines[PIP_SHADE_MNEE]));
+
+ /* Combine ray generation and trace continuation stack size. */
+ const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG +
+ link_options.maxTraceDepth * trace_css;
+ const unsigned int dss = 0;
+
+ /* Set stack size depending on pipeline options. */
+ optix_assert(
+ optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2));
+ }
+
{ /* Create intersection-only pipeline. */
vector<OptixProgramGroup> pipeline_groups;
pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h
index 1f53c729c3f..817afdc8384 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -24,6 +24,7 @@ enum {
PG_RGEN_INTERSECT_SUBSURFACE,
PG_RGEN_INTERSECT_VOLUME_STACK,
PG_RGEN_SHADE_SURFACE_RAYTRACE,
+ PG_RGEN_SHADE_SURFACE_MNEE,
PG_MISS,
PG_HITD, /* Default hit group. */
PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
@@ -46,7 +47,7 @@ static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
/* List of OptiX pipelines. */
-enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
+enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
/* A single shader binding table entry. */
struct SbtRecord {
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index d635512c58a..366bf95269d 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -28,6 +28,7 @@ void OptiXDeviceQueue::init_execution()
static bool is_optix_specific_kernel(DeviceKernel kernel)
{
return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
@@ -63,7 +64,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
cuda_stream_));
if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
- kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
@@ -82,6 +84,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
break;
+ case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
+ pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
+ sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
+ break;
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
pipeline = optix_device->pipelines[PIP_INTERSECT];
sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);