From f2cd7e08fed02fdf02060c17c943e15e85638cb5 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Mon, 30 May 2022 18:04:14 +0200 Subject: Fix Cycles MNEE not working for Metal Move MNEE to own kernel, separate from shader ray-tracing. This does introduce the limitation that a shader can't use both MNEE and AO/bevel, but that seems like the better trade-off for now. We can experiment with bigger kernel organization changes later. Differential Revision: https://developer.blender.org/D15070 --- intern/cycles/kernel/device/gpu/kernel.h | 15 +++++++++++ .../kernel/device/optix/kernel_shader_raytrace.cu | 8 ++++++ intern/cycles/kernel/integrator/init_from_bake.h | 7 +++-- .../cycles/kernel/integrator/intersect_closest.h | 31 ++++++++++++++++------ intern/cycles/kernel/integrator/megakernel.h | 3 +++ intern/cycles/kernel/integrator/shade_surface.h | 10 ++++++- intern/cycles/kernel/integrator/subsurface.h | 9 +++++-- intern/cycles/kernel/types.h | 7 +++++ 8 files changed, 77 insertions(+), 13 deletions(-) (limited to 'intern/cycles/kernel') diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 328c58e7905..6405e365847 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -269,6 +269,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) } ccl_gpu_kernel_postfix +ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) + ccl_gpu_kernel_signature(integrator_shade_surface_mnee, + ccl_global const int *path_index_array, + ccl_global float *render_buffer, + const int work_size) +{ + const int global_index = ccl_gpu_global_id_x(); + + if (global_index < work_size) { + const int state = (path_index_array) ? path_index_array[global_index] : global_index; + ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer)); + } +} +ccl_gpu_kernel_postfix + ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_volume, ccl_global const int *path_index_array, diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu index e2c5d2ff024..3bd57bc0f1a 100644 --- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu +++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu @@ -15,3 +15,11 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytr global_index; integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer); } + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : + global_index; + integrator_shade_surface_mnee(nullptr, path_index, __params.render_buffer); +} diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index 293c1d243f8..0db4241b6e3 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -243,9 +243,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Setup next kernel to execute. */ const bool use_caustics = kernel_data.integrator.use_caustics && (object_flag & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index b8ce625c11b..2dfac44b414 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -125,9 +125,12 @@ ccl_device_forceinline void integrator_split_shadow_catcher( const int flags = kernel_tex_fetch(__shaders, shader).flags; const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { @@ -150,9 +153,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche const int object_flags = intersection_get_object_flags(kg, &isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } @@ -222,8 +229,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel( const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; - if (use_raytrace_kernel) { + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } @@ -272,9 +283,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } diff --git a/intern/cycles/kernel/integrator/megakernel.h b/intern/cycles/kernel/integrator/megakernel.h index a0c15794470..17ae13ad23f 100644 --- a/intern/cycles/kernel/integrator/megakernel.h +++ b/intern/cycles/kernel/integrator/megakernel.h @@ -77,6 +77,9 @@ ccl_device void integrator_megakernel(KernelGlobals kg, case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: integrator_shade_surface_raytrace(kg, state, render_buffer); break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + integrator_shade_surface_mnee(kg, state, render_buffer); + break; case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: integrator_shade_light(kg, state, render_buffer); break; diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 896e81b80ff..ce1398859b7 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -137,7 +137,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, # ifdef __MNEE__ int mnee_vertex_count = 0; - IF_KERNEL_NODES_FEATURE(RAYTRACE) + IF_KERNEL_FEATURE(MNEE) { if (ls.lamp != LAMP_NONE) { /* Is this a caustic light? */ @@ -631,4 +631,12 @@ ccl_device_forceinline void integrator_shade_surface_raytrace( kg, state, render_buffer); } +ccl_device_forceinline void integrator_shade_surface_mnee( + KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + integrator_shade_surface<(KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE) | + KERNEL_FEATURE_MNEE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE>(kg, state, render_buffer); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index 2391cc2356d..b449f807290 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -174,9 +174,14 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, + shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 01df7948241..80eccd6d41f 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1572,6 +1572,7 @@ typedef enum DeviceKernel { DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL, @@ -1689,6 +1690,9 @@ enum KernelFeatureFlag : uint32_t { KERNEL_FEATURE_AO_PASS = (1U << 25U), KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U), KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE), + + /* MNEE. */ + KERNEL_FEATURE_MNEE = (1U << 27U), }; /* Shader node feature mask, to specialize shader evaluation for kernels. */ @@ -1714,9 +1718,12 @@ enum KernelFeatureFlag : uint32_t { * are different depending on the main, shadow or null path. For GPU we don't have * C++17 everywhere so can't use it. */ #ifdef __KERNEL_CPU__ +# define IF_KERNEL_FEATURE(feature) \ + if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #else +# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #endif -- cgit v1.2.3