Fix Cycles MNEE not working for Metal

Move MNEE to own kernel, separate from shader ray-tracing. This does introduce the limitation that a shader can't use both MNEE and AO/bevel, but that seems like the better trade-off for now. We can experiment with bigger kernel organization changes later. Differential Revision: https://developer.blender.org/D15070
author: Brecht Van Lommel <brecht@blender.org> 2022-05-30 19:04:14 +0300
committer: Brecht Van Lommel <brecht@blender.org> 2022-05-31 18:24:43 +0300
commit: f2cd7e08fed02fdf02060c17c943e15e85638cb5 (patch)
tree: 27e4ea7864f1716979ab9e27e60cdcdf361566f2 /intern/cycles/device
parent: 52cb24a7796c1043dab076d3ef025b4c8545c1ef (diff)
8 files changed, 73 insertions, 7 deletions
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index 6908ae5ead3..c9326a62f48 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -457,6 +457,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
     /* Use the biggest kernel for estimation. */
     const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
                                          DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+                                     (kernel_features & KERNEL_FEATURE_MNEE) ?
+                                         DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
                                          DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
 
     /* Launch kernel, using just 1 block appears sufficient to reserve memory for all
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 7159277b325..d27e9ddbedf 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -420,6 +420,8 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
     /* Use the biggest kernel for estimation. */
     const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
                                          DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
+                                     (kernel_features & KERNEL_FEATURE_MNEE) ?
+                                         DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
                                          DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
 
     /* Launch kernel, using just 1 block appears sufficient to reserve memory for all
diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp
index 072731a2af5..96a99cd62cd 100644
--- a/intern/cycles/device/kernel.cpp
+++ b/intern/cycles/device/kernel.cpp
@@ -33,6 +33,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
       return "integrator_shade_surface";
     case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
       return "integrator_shade_surface_raytrace";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
+      return "integrator_shade_surface_mnee";
     case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
       return "integrator_shade_volume";
     case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 9555ca03c8e..a3c4839c21f 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -489,7 +489,8 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
             i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
             i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
             i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK ||
-            i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+            i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+            i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
           kernel_function_list = function_list;
         }
 
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 1686ab95ffa..df2b3321cf6 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -265,6 +265,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
         case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
         case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+        case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
           break;
         default:
           bvhMetalRT = nil;
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 8830d8c44ac..9576643cff6 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -432,9 +432,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   }
 
   { /* Load and compile PTX module with OptiX kernels. */
-    string ptx_data, ptx_filename = path_get((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
-                                                 "lib/kernel_optix_shader_raytrace.ptx" :
-                                                 "lib/kernel_optix.ptx");
+    string ptx_data, ptx_filename = path_get(
+                         (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
+                             "lib/kernel_optix_shader_raytrace.ptx" :
+                             "lib/kernel_optix.ptx");
     if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
       if (!getenv("OPTIX_ROOT_DIR")) {
         set_error(
@@ -444,7 +445,9 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
       }
       ptx_filename = compile_kernel(
           kernel_features,
-          (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? "kernel_shader_raytrace" : "kernel",
+          (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
+              "kernel_shader_raytrace" :
+              "kernel",
           "optix",
           true);
     }
@@ -582,6 +585,14 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
         "__direct_callable__svm_node_bevel";
   }
 
+  /* MNEE. */
+  if (kernel_features & KERNEL_FEATURE_MNEE) {
+    group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+    group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
+    group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
+        "__raygen__kernel_optix_integrator_shade_surface_mnee";
+  }
+
   optix_assert(optixProgramGroupCreate(
       context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
 
@@ -663,6 +674,46 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
         pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
   }
 
+  if (kernel_features & KERNEL_FEATURE_MNEE) {
+    /* Create MNEE pipeline. */
+    vector<OptixProgramGroup> pipeline_groups;
+    pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+    pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
+    pipeline_groups.push_back(groups[PG_MISS]);
+    pipeline_groups.push_back(groups[PG_HITD]);
+    pipeline_groups.push_back(groups[PG_HITS]);
+    pipeline_groups.push_back(groups[PG_HITL]);
+    pipeline_groups.push_back(groups[PG_HITV]);
+    if (motion_blur) {
+      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+    }
+    if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
+      pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
+      pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
+    }
+    pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
+    pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
+
+    optix_assert(optixPipelineCreate(context,
+                                     &pipeline_options,
+                                     &link_options,
+                                     pipeline_groups.data(),
+                                     pipeline_groups.size(),
+                                     nullptr,
+                                     0,
+                                     &pipelines[PIP_SHADE_MNEE]));
+
+    /* Combine ray generation and trace continuation stack size. */
+    const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG +
+                             link_options.maxTraceDepth * trace_css;
+    const unsigned int dss = 0;
+
+    /* Set stack size depending on pipeline options. */
+    optix_assert(
+        optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2));
+  }
+
   { /* Create intersection-only pipeline. */
     vector<OptixProgramGroup> pipeline_groups;
     pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h
index 1f53c729c3f..817afdc8384 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -24,6 +24,7 @@ enum {
   PG_RGEN_INTERSECT_SUBSURFACE,
   PG_RGEN_INTERSECT_VOLUME_STACK,
   PG_RGEN_SHADE_SURFACE_RAYTRACE,
+  PG_RGEN_SHADE_SURFACE_MNEE,
   PG_MISS,
   PG_HITD, /* Default hit group. */
   PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
@@ -46,7 +47,7 @@ static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
 static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
 
 /* List of OptiX pipelines. */
-enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
+enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES };
 
 /* A single shader binding table entry. */
 struct SbtRecord {
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index d635512c58a..366bf95269d 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -28,6 +28,7 @@ void OptiXDeviceQueue::init_execution()
 static bool is_optix_specific_kernel(DeviceKernel kernel)
 {
   return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+          kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE ||
           kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
           kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
           kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
@@ -63,7 +64,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
                         cuda_stream_));
 
   if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
-      kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+      kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+      kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) {
     cuda_device_assert(
         cuda_device_,
         cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
@@ -82,6 +84,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel,
       pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
       sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
       break;
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE:
+      pipeline = optix_device->pipelines[PIP_SHADE_MNEE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord);
+      break;
     case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
       pipeline = optix_device->pipelines[PIP_INTERSECT];
       sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
author	Brecht Van Lommel <brecht@blender.org>	2022-05-30 19:04:14 +0300
committer	Brecht Van Lommel <brecht@blender.org>	2022-05-31 18:24:43 +0300
commit	f2cd7e08fed02fdf02060c17c943e15e85638cb5 (patch)
tree	27e4ea7864f1716979ab9e27e60cdcdf361566f2 /intern/cycles/device
parent	52cb24a7796c1043dab076d3ef025b4c8545c1ef (diff)