From f2cd7e08fed02fdf02060c17c943e15e85638cb5 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 30 May 2022 18:04:14 +0200 Subject: Fix Cycles MNEE not working for Metal Move MNEE to own kernel, separate from shader ray-tracing. This does introduce the limitation that a shader can't use both MNEE and AO/bevel, but that seems like the better trade-off for now. We can experiment with bigger kernel organization changes later. Differential Revision: https://developer.blender.org/D15070 --- intern/cycles/device/cuda/device_impl.cpp | 2 + intern/cycles/device/hip/device_impl.cpp | 2 + intern/cycles/device/kernel.cpp | 2 + intern/cycles/device/metal/kernel.mm | 3 +- intern/cycles/device/metal/queue.mm | 1 + intern/cycles/device/optix/device_impl.cpp | 59 ++++++++++++++++++++-- intern/cycles/device/optix/device_impl.h | 3 +- intern/cycles/device/optix/queue.cpp | 8 ++- intern/cycles/integrator/path_trace_work_gpu.cpp | 16 +++++- intern/cycles/integrator/path_trace_work_gpu.h | 1 + intern/cycles/kernel/device/gpu/kernel.h | 15 ++++++ .../kernel/device/optix/kernel_shader_raytrace.cu | 8 +++ intern/cycles/kernel/integrator/init_from_bake.h | 7 ++- .../cycles/kernel/integrator/intersect_closest.h | 31 +++++++++--- intern/cycles/kernel/integrator/megakernel.h | 3 ++ intern/cycles/kernel/integrator/shade_surface.h | 10 +++- intern/cycles/kernel/integrator/subsurface.h | 9 +++- intern/cycles/kernel/types.h | 7 +++ intern/cycles/scene/scene.cpp | 3 +- 19 files changed, 167 insertions(+), 23 deletions(-) (limited to 'intern/cycles') diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 6908ae5ead3..c9326a62f48 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -457,6 +457,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features) /* Use the biggest kernel for estimation. */ const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE : + (kernel_features & KERNEL_FEATURE_MNEE) ? + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE : DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE; /* Launch kernel, using just 1 block appears sufficient to reserve memory for all diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 7159277b325..d27e9ddbedf 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -420,6 +420,8 @@ void HIPDevice::reserve_local_memory(const uint kernel_features) /* Use the biggest kernel for estimation. */ const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE : + (kernel_features & KERNEL_FEATURE_MNEE) ? + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE : DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE; /* Launch kernel, using just 1 block appears sufficient to reserve memory for all diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp index 072731a2af5..96a99cd62cd 100644 --- a/intern/cycles/device/kernel.cpp +++ b/intern/cycles/device/kernel.cpp @@ -33,6 +33,8 @@ const char *device_kernel_as_string(DeviceKernel kernel) return "integrator_shade_surface"; case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: return "integrator_shade_surface_raytrace"; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + return "integrator_shade_surface_mnee"; case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: return "integrator_shade_volume"; case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL: diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 9555ca03c8e..a3c4839c21f 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -489,7 +489,8 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || i == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || - i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { + i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + i == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) { kernel_function_list = function_list; } diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index 1686ab95ffa..df2b3321cf6 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -265,6 +265,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: break; default: bvhMetalRT = nil; diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 8830d8c44ac..9576643cff6 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -432,9 +432,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features) } { /* Load and compile PTX module with OptiX kernels. */ - string ptx_data, ptx_filename = path_get((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? - "lib/kernel_optix_shader_raytrace.ptx" : - "lib/kernel_optix.ptx"); + string ptx_data, ptx_filename = path_get( + (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ? + "lib/kernel_optix_shader_raytrace.ptx" : + "lib/kernel_optix.ptx"); if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { if (!getenv("OPTIX_ROOT_DIR")) { set_error( @@ -444,7 +445,9 @@ bool OptiXDevice::load_kernels(const uint kernel_features) } ptx_filename = compile_kernel( kernel_features, - (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? "kernel_shader_raytrace" : "kernel", + (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ? + "kernel_shader_raytrace" : + "kernel", "optix", true); } @@ -582,6 +585,14 @@ bool OptiXDevice::load_kernels(const uint kernel_features) "__direct_callable__svm_node_bevel"; } + /* MNEE. */ + if (kernel_features & KERNEL_FEATURE_MNEE) { + group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; + group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module; + group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName = + "__raygen__kernel_optix_integrator_shade_surface_mnee"; + } + optix_assert(optixProgramGroupCreate( context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups)); @@ -663,6 +674,46 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2)); } + if (kernel_features & KERNEL_FEATURE_MNEE) { + /* Create MNEE pipeline. */ + vector pipeline_groups; + pipeline_groups.reserve(NUM_PROGRAM_GROUPS); + pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]); + pipeline_groups.push_back(groups[PG_MISS]); + pipeline_groups.push_back(groups[PG_HITD]); + pipeline_groups.push_back(groups[PG_HITS]); + pipeline_groups.push_back(groups[PG_HITL]); + pipeline_groups.push_back(groups[PG_HITV]); + if (motion_blur) { + pipeline_groups.push_back(groups[PG_HITD_MOTION]); + pipeline_groups.push_back(groups[PG_HITS_MOTION]); + } + if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { + pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); + pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); + } + pipeline_groups.push_back(groups[PG_CALL_SVM_AO]); + pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]); + + optix_assert(optixPipelineCreate(context, + &pipeline_options, + &link_options, + pipeline_groups.data(), + pipeline_groups.size(), + nullptr, + 0, + &pipelines[PIP_SHADE_MNEE])); + + /* Combine ray generation and trace continuation stack size. */ + const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG + + link_options.maxTraceDepth * trace_css; + const unsigned int dss = 0; + + /* Set stack size depending on pipeline options. */ + optix_assert( + optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2)); + } + { /* Create intersection-only pipeline. */ vector pipeline_groups; pipeline_groups.reserve(NUM_PROGRAM_GROUPS); diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h index 1f53c729c3f..817afdc8384 100644 --- a/intern/cycles/device/optix/device_impl.h +++ b/intern/cycles/device/optix/device_impl.h @@ -24,6 +24,7 @@ enum { PG_RGEN_INTERSECT_SUBSURFACE, PG_RGEN_INTERSECT_VOLUME_STACK, PG_RGEN_SHADE_SURFACE_RAYTRACE, + PG_RGEN_SHADE_SURFACE_MNEE, PG_MISS, PG_HITD, /* Default hit group. */ PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */ @@ -46,7 +47,7 @@ static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO; static const int NUM_CALLABLE_PROGRAM_GROUPS = 2; /* List of OptiX pipelines. */ -enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES }; +enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES }; /* A single shader binding table entry. */ struct SbtRecord { diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp index d635512c58a..366bf95269d 100644 --- a/intern/cycles/device/optix/queue.cpp +++ b/intern/cycles/device/optix/queue.cpp @@ -28,6 +28,7 @@ void OptiXDeviceQueue::init_execution() static bool is_optix_specific_kernel(DeviceKernel kernel) { return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE || kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || @@ -63,7 +64,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, cuda_stream_)); if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) { cuda_device_assert( cuda_device_, cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer), @@ -82,6 +84,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE]; sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord); break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + pipeline = optix_device->pipelines[PIP_SHADE_MNEE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord); + break; case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: pipeline = optix_device->pipelines[PIP_INTERSECT]; sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord); diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index 8306460d607..ede81705ae8 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -65,6 +65,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device, integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE), integrator_shader_raytrace_sort_counter_( device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE), + integrator_shader_mnee_sort_counter_( + device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE), integrator_shader_sort_prefix_sum_( device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE), integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE), @@ -188,6 +190,9 @@ void PathTraceWorkGPU::alloc_integrator_sorting() integrator_shader_raytrace_sort_counter_.alloc(max_shaders); integrator_shader_raytrace_sort_counter_.zero_to_device(); + integrator_shader_mnee_sort_counter_.alloc(max_shaders); + integrator_shader_mnee_sort_counter_.zero_to_device(); + integrator_shader_sort_prefix_sum_.alloc(max_shaders); integrator_shader_sort_prefix_sum_.zero_to_device(); @@ -195,6 +200,8 @@ void PathTraceWorkGPU::alloc_integrator_sorting() (int *)integrator_shader_sort_counter_.device_pointer; integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] = (int *)integrator_shader_raytrace_sort_counter_.device_pointer; + integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] = + (int *)integrator_shader_mnee_sort_counter_.device_pointer; } } @@ -327,6 +334,7 @@ void PathTraceWorkGPU::enqueue_reset() queue_->zero_to_device(integrator_queue_counter_); queue_->zero_to_device(integrator_shader_sort_counter_); queue_->zero_to_device(integrator_shader_raytrace_sort_counter_); + queue_->zero_to_device(integrator_shader_mnee_sort_counter_); /* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the * counter on the host side because `zero_to_device()` is not doing it. */ @@ -450,6 +458,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: { /* Shading kernels with integrator state and render buffer. */ DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size); @@ -1080,13 +1089,15 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits() bool PathTraceWorkGPU::kernel_uses_sorting(DeviceKernel kernel) { return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE); } bool PathTraceWorkGPU::kernel_creates_shadow_paths(DeviceKernel kernel) { return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE || kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } @@ -1094,7 +1105,8 @@ bool PathTraceWorkGPU::kernel_creates_ao_paths(DeviceKernel kernel) { return (device_scene_->data.kernel_features & KERNEL_FEATURE_AO) && (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE); } bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel) diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h index 90f8b8a4509..4c10a221a30 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.h +++ b/intern/cycles/integrator/path_trace_work_gpu.h @@ -133,6 +133,7 @@ class PathTraceWorkGPU : public PathTraceWork { /* Shader sorting. */ device_vector integrator_shader_sort_counter_; device_vector integrator_shader_raytrace_sort_counter_; + device_vector integrator_shader_mnee_sort_counter_; device_vector integrator_shader_sort_prefix_sum_; /* Path split. */ device_vector integrator_next_main_path_index_; diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 328c58e7905..6405e365847 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -269,6 +269,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) } ccl_gpu_kernel_postfix +ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) + ccl_gpu_kernel_signature(integrator_shade_surface_mnee, + ccl_global const int *path_index_array, + ccl_global float *render_buffer, + const int work_size) +{ + const int global_index = ccl_gpu_global_id_x(); + + if (global_index < work_size) { + const int state = (path_index_array) ? path_index_array[global_index] : global_index; + ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer)); + } +} +ccl_gpu_kernel_postfix + ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_volume, ccl_global const int *path_index_array, diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu index e2c5d2ff024..3bd57bc0f1a 100644 --- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu +++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu @@ -15,3 +15,11 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytr global_index; integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer); } + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : + global_index; + integrator_shade_surface_mnee(nullptr, path_index, __params.render_buffer); +} diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index 293c1d243f8..0db4241b6e3 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -243,9 +243,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Setup next kernel to execute. */ const bool use_caustics = kernel_data.integrator.use_caustics && (object_flag & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index b8ce625c11b..2dfac44b414 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -125,9 +125,12 @@ ccl_device_forceinline void integrator_split_shadow_catcher( const int flags = kernel_tex_fetch(__shaders, shader).flags; const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { @@ -150,9 +153,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche const int object_flags = intersection_get_object_flags(kg, &isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } @@ -222,8 +229,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel( const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; - if (use_raytrace_kernel) { + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } @@ -272,9 +283,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } diff --git a/intern/cycles/kernel/integrator/megakernel.h b/intern/cycles/kernel/integrator/megakernel.h index a0c15794470..17ae13ad23f 100644 --- a/intern/cycles/kernel/integrator/megakernel.h +++ b/intern/cycles/kernel/integrator/megakernel.h @@ -77,6 +77,9 @@ ccl_device void integrator_megakernel(KernelGlobals kg, case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: integrator_shade_surface_raytrace(kg, state, render_buffer); break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + integrator_shade_surface_mnee(kg, state, render_buffer); + break; case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: integrator_shade_light(kg, state, render_buffer); break; diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 896e81b80ff..ce1398859b7 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -137,7 +137,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, # ifdef __MNEE__ int mnee_vertex_count = 0; - IF_KERNEL_NODES_FEATURE(RAYTRACE) + IF_KERNEL_FEATURE(MNEE) { if (ls.lamp != LAMP_NONE) { /* Is this a caustic light? */ @@ -631,4 +631,12 @@ ccl_device_forceinline void integrator_shade_surface_raytrace( kg, state, render_buffer); } +ccl_device_forceinline void integrator_shade_surface_mnee( + KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + integrator_shade_surface<(KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE) | + KERNEL_FEATURE_MNEE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE>(kg, state, render_buffer); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index 2391cc2356d..b449f807290 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -174,9 +174,14 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, + shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 01df7948241..80eccd6d41f 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1572,6 +1572,7 @@ typedef enum DeviceKernel { DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL, @@ -1689,6 +1690,9 @@ enum KernelFeatureFlag : uint32_t { KERNEL_FEATURE_AO_PASS = (1U << 25U), KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U), KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE), + + /* MNEE. */ + KERNEL_FEATURE_MNEE = (1U << 27U), }; /* Shader node feature mask, to specialize shader evaluation for kernels. */ @@ -1714,9 +1718,12 @@ enum KernelFeatureFlag : uint32_t { * are different depending on the main, shadow or null path. For GPU we don't have * C++17 everywhere so can't use it. */ #ifdef __KERNEL_CPU__ +# define IF_KERNEL_FEATURE(feature) \ + if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #else +# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #endif diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index b35242139ea..8b5604eba72 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -550,7 +550,7 @@ void Scene::update_kernel_features() dscene.data.integrator.use_caustics = false; if (has_caustics_caster && has_caustics_receiver && has_caustics_light) { dscene.data.integrator.use_caustics = true; - kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE; + kernel_features |= KERNEL_FEATURE_MNEE; } if (bake_manager->get_baking()) { @@ -597,6 +597,7 @@ static void log_kernel_features(const uint features) << "\n"; VLOG(2) << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE) << "\n"; + VLOG(2) << "Use MNEE" << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n"; VLOG(2) << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT) << "\n"; VLOG(2) << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n"; VLOG(2) << "Use Path Tracing " << string_from_bool(features & KERNEL_FEATURE_PATH_TRACING) -- cgit v1.2.3