diff options
Diffstat (limited to 'intern/cycles')
19 files changed, 167 insertions, 23 deletions
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 75177566901..cb7e909a2d5 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -457,6 +457,8 @@ void CUDADevice::reserve_local_memory(const uint kernel_features) /* Use the biggest kernel for estimation. */ const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE : + (kernel_features & KERNEL_FEATURE_MNEE) ? + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE : DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE; /* Launch kernel, using just 1 block appears sufficient to reserve memory for all diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index f8fdb86ca29..ea68c821166 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -420,6 +420,8 @@ void HIPDevice::reserve_local_memory(const uint kernel_features) /* Use the biggest kernel for estimation. */ const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE : + (kernel_features & KERNEL_FEATURE_MNEE) ? + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE : DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE; /* Launch kernel, using just 1 block appears sufficient to reserve memory for all diff --git a/intern/cycles/device/kernel.cpp b/intern/cycles/device/kernel.cpp index 072731a2af5..96a99cd62cd 100644 --- a/intern/cycles/device/kernel.cpp +++ b/intern/cycles/device/kernel.cpp @@ -33,6 +33,8 @@ const char *device_kernel_as_string(DeviceKernel kernel) return "integrator_shade_surface"; case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: return "integrator_shade_surface_raytrace"; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + return "integrator_shade_surface_mnee"; case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: return "integrator_shade_volume"; case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL: diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 304efc813ec..fec4cd80466 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -35,7 +35,8 @@ bool kernel_has_intersection(DeviceKernel device_kernel) device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK || - device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); + device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + device_kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE); } struct ShaderCache { diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index ec10e091b25..c1dab5b0d8f 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -270,6 +270,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: break; default: bvhMetalRT = nil; diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 9fc265bc327..ad2a361a056 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -452,9 +452,10 @@ bool OptiXDevice::load_kernels(const uint kernel_features) } { /* Load and compile PTX module with OptiX kernels. */ - string ptx_data, ptx_filename = path_get((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? - "lib/kernel_optix_shader_raytrace.ptx" : - "lib/kernel_optix.ptx"); + string ptx_data, ptx_filename = path_get( + (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ? + "lib/kernel_optix_shader_raytrace.ptx" : + "lib/kernel_optix.ptx"); if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { if (!getenv("OPTIX_ROOT_DIR")) { set_error( @@ -464,7 +465,9 @@ bool OptiXDevice::load_kernels(const uint kernel_features) } ptx_filename = compile_kernel( kernel_features, - (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ? "kernel_shader_raytrace" : "kernel", + (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ? + "kernel_shader_raytrace" : + "kernel", "optix", true); } @@ -620,6 +623,14 @@ bool OptiXDevice::load_kernels(const uint kernel_features) "__direct_callable__svm_node_bevel"; } + /* MNEE. */ + if (kernel_features & KERNEL_FEATURE_MNEE) { + group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; + group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module; + group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName = + "__raygen__kernel_optix_integrator_shade_surface_mnee"; + } + optix_assert(optixProgramGroupCreate( context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups)); @@ -701,6 +712,46 @@ bool OptiXDevice::load_kernels(const uint kernel_features) pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2)); } + if (kernel_features & KERNEL_FEATURE_MNEE) { + /* Create MNEE pipeline. */ + vector<OptixProgramGroup> pipeline_groups; + pipeline_groups.reserve(NUM_PROGRAM_GROUPS); + pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]); + pipeline_groups.push_back(groups[PG_MISS]); + pipeline_groups.push_back(groups[PG_HITD]); + pipeline_groups.push_back(groups[PG_HITS]); + pipeline_groups.push_back(groups[PG_HITL]); + pipeline_groups.push_back(groups[PG_HITV]); + if (motion_blur) { + pipeline_groups.push_back(groups[PG_HITD_MOTION]); + pipeline_groups.push_back(groups[PG_HITS_MOTION]); + } + if (kernel_features & KERNEL_FEATURE_POINTCLOUD) { + pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]); + pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]); + } + pipeline_groups.push_back(groups[PG_CALL_SVM_AO]); + pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]); + + optix_assert(optixPipelineCreate(context, + &pipeline_options, + &link_options, + pipeline_groups.data(), + pipeline_groups.size(), + nullptr, + 0, + &pipelines[PIP_SHADE_MNEE])); + + /* Combine ray generation and trace continuation stack size. */ + const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG + + link_options.maxTraceDepth * trace_css; + const unsigned int dss = 0; + + /* Set stack size depending on pipeline options. */ + optix_assert( + optixPipelineSetStackSize(pipelines[PIP_SHADE_MNEE], 0, dss, css, motion_blur ? 3 : 2)); + } + { /* Create intersection-only pipeline. */ vector<OptixProgramGroup> pipeline_groups; pipeline_groups.reserve(NUM_PROGRAM_GROUPS); diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h index 1f53c729c3f..817afdc8384 100644 --- a/intern/cycles/device/optix/device_impl.h +++ b/intern/cycles/device/optix/device_impl.h @@ -24,6 +24,7 @@ enum { PG_RGEN_INTERSECT_SUBSURFACE, PG_RGEN_INTERSECT_VOLUME_STACK, PG_RGEN_SHADE_SURFACE_RAYTRACE, + PG_RGEN_SHADE_SURFACE_MNEE, PG_MISS, PG_HITD, /* Default hit group. */ PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */ @@ -46,7 +47,7 @@ static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO; static const int NUM_CALLABLE_PROGRAM_GROUPS = 2; /* List of OptiX pipelines. */ -enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES }; +enum { PIP_SHADE_RAYTRACE, PIP_SHADE_MNEE, PIP_INTERSECT, NUM_PIPELINES }; /* A single shader binding table entry. */ struct SbtRecord { diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp index d635512c58a..366bf95269d 100644 --- a/intern/cycles/device/optix/queue.cpp +++ b/intern/cycles/device/optix/queue.cpp @@ -28,6 +28,7 @@ void OptiXDeviceQueue::init_execution() static bool is_optix_specific_kernel(DeviceKernel kernel) { return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE || kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW || kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE || @@ -63,7 +64,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, cuda_stream_)); if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE) { cuda_device_assert( cuda_device_, cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer), @@ -82,6 +84,10 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE]; sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord); break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + pipeline = optix_device->pipelines[PIP_SHADE_MNEE]; + sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_MNEE * sizeof(SbtRecord); + break; case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: pipeline = optix_device->pipelines[PIP_INTERSECT]; sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord); diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index 8306460d607..ede81705ae8 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -65,6 +65,8 @@ PathTraceWorkGPU::PathTraceWorkGPU(Device *device, integrator_shader_sort_counter_(device, "integrator_shader_sort_counter", MEM_READ_WRITE), integrator_shader_raytrace_sort_counter_( device, "integrator_shader_raytrace_sort_counter", MEM_READ_WRITE), + integrator_shader_mnee_sort_counter_( + device, "integrator_shader_mnee_sort_counter", MEM_READ_WRITE), integrator_shader_sort_prefix_sum_( device, "integrator_shader_sort_prefix_sum", MEM_READ_WRITE), integrator_next_main_path_index_(device, "integrator_next_main_path_index", MEM_READ_WRITE), @@ -188,6 +190,9 @@ void PathTraceWorkGPU::alloc_integrator_sorting() integrator_shader_raytrace_sort_counter_.alloc(max_shaders); integrator_shader_raytrace_sort_counter_.zero_to_device(); + integrator_shader_mnee_sort_counter_.alloc(max_shaders); + integrator_shader_mnee_sort_counter_.zero_to_device(); + integrator_shader_sort_prefix_sum_.alloc(max_shaders); integrator_shader_sort_prefix_sum_.zero_to_device(); @@ -195,6 +200,8 @@ void PathTraceWorkGPU::alloc_integrator_sorting() (int *)integrator_shader_sort_counter_.device_pointer; integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] = (int *)integrator_shader_raytrace_sort_counter_.device_pointer; + integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] = + (int *)integrator_shader_mnee_sort_counter_.device_pointer; } } @@ -327,6 +334,7 @@ void PathTraceWorkGPU::enqueue_reset() queue_->zero_to_device(integrator_queue_counter_); queue_->zero_to_device(integrator_shader_sort_counter_); queue_->zero_to_device(integrator_shader_raytrace_sort_counter_); + queue_->zero_to_device(integrator_shader_mnee_sort_counter_); /* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the * counter on the host side because `zero_to_device()` is not doing it. */ @@ -450,6 +458,7 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW: case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME: { /* Shading kernels with integrator state and render buffer. */ DeviceKernelArguments args(&d_path_index, &buffers_->buffer.device_pointer, &work_size); @@ -1080,13 +1089,15 @@ int PathTraceWorkGPU::shadow_catcher_count_possible_splits() bool PathTraceWorkGPU::kernel_uses_sorting(DeviceKernel kernel) { return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE); } bool PathTraceWorkGPU::kernel_creates_shadow_paths(DeviceKernel kernel) { return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE || kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME); } @@ -1094,7 +1105,8 @@ bool PathTraceWorkGPU::kernel_creates_ao_paths(DeviceKernel kernel) { return (device_scene_->data.kernel_features & KERNEL_FEATURE_AO) && (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE || - kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE); + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE); } bool PathTraceWorkGPU::kernel_is_shadow_path(DeviceKernel kernel) diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h index 90f8b8a4509..4c10a221a30 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.h +++ b/intern/cycles/integrator/path_trace_work_gpu.h @@ -133,6 +133,7 @@ class PathTraceWorkGPU : public PathTraceWork { /* Shader sorting. */ device_vector<int> integrator_shader_sort_counter_; device_vector<int> integrator_shader_raytrace_sort_counter_; + device_vector<int> integrator_shader_mnee_sort_counter_; device_vector<int> integrator_shader_sort_prefix_sum_; /* Path split. */ device_vector<int> integrator_next_main_path_index_; diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 328c58e7905..6405e365847 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -270,6 +270,21 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_postfix ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) + ccl_gpu_kernel_signature(integrator_shade_surface_mnee, + ccl_global const int *path_index_array, + ccl_global float *render_buffer, + const int work_size) +{ + const int global_index = ccl_gpu_global_id_x(); + + if (global_index < work_size) { + const int state = (path_index_array) ? path_index_array[global_index] : global_index; + ccl_gpu_kernel_call(integrator_shade_surface_mnee(NULL, state, render_buffer)); + } +} +ccl_gpu_kernel_postfix + +ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_shade_volume, ccl_global const int *path_index_array, ccl_global float *render_buffer, diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu index e2c5d2ff024..3bd57bc0f1a 100644 --- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu +++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu @@ -15,3 +15,11 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytr global_index; integrator_shade_surface_raytrace(nullptr, path_index, __params.render_buffer); } + +extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_mnee() +{ + const int global_index = optixGetLaunchIndex().x; + const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : + global_index; + integrator_shade_surface_mnee(nullptr, path_index, __params.render_buffer); +} diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h index 293c1d243f8..0db4241b6e3 100644 --- a/intern/cycles/kernel/integrator/init_from_bake.h +++ b/intern/cycles/kernel/integrator/init_from_bake.h @@ -243,9 +243,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg, /* Setup next kernel to execute. */ const bool use_caustics = kernel_data.integrator.use_caustics && (object_flag & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader_index); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader_index); } else { diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index b8ce625c11b..2dfac44b414 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -125,9 +125,12 @@ ccl_device_forceinline void integrator_split_shadow_catcher( const int flags = kernel_tex_fetch(__shaders, shader).flags; const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_INIT_SORTED(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } else { @@ -150,9 +153,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_shadow_catche const int object_flags = intersection_get_object_flags(kg, &isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } @@ -222,8 +229,12 @@ ccl_device_forceinline void integrator_intersect_next_kernel( const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; - if (use_raytrace_kernel) { + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } @@ -272,9 +283,13 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( const int object_flags = intersection_get_object_flags(kg, isect); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED( + current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED( current_kernel, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); } diff --git a/intern/cycles/kernel/integrator/megakernel.h b/intern/cycles/kernel/integrator/megakernel.h index a0c15794470..17ae13ad23f 100644 --- a/intern/cycles/kernel/integrator/megakernel.h +++ b/intern/cycles/kernel/integrator/megakernel.h @@ -77,6 +77,9 @@ ccl_device void integrator_megakernel(KernelGlobals kg, case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: integrator_shade_surface_raytrace(kg, state, render_buffer); break; + case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE: + integrator_shade_surface_mnee(kg, state, render_buffer); + break; case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT: integrator_shade_light(kg, state, render_buffer); break; diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index 896e81b80ff..ce1398859b7 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -137,7 +137,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg, # ifdef __MNEE__ int mnee_vertex_count = 0; - IF_KERNEL_NODES_FEATURE(RAYTRACE) + IF_KERNEL_FEATURE(MNEE) { if (ls.lamp != LAMP_NONE) { /* Is this a caustic light? */ @@ -631,4 +631,12 @@ ccl_device_forceinline void integrator_shade_surface_raytrace( kg, state, render_buffer); } +ccl_device_forceinline void integrator_shade_surface_mnee( + KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + integrator_shade_surface<(KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE) | + KERNEL_FEATURE_MNEE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE>(kg, state, render_buffer); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h index 2391cc2356d..b449f807290 100644 --- a/intern/cycles/kernel/integrator/subsurface.h +++ b/intern/cycles/kernel/integrator/subsurface.h @@ -174,9 +174,14 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat const int object_flags = intersection_get_object_flags(kg, &ss_isect.hits[0]); const bool use_caustics = kernel_data.integrator.use_caustics && (object_flags & SD_OBJECT_CAUSTICS); - const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE) || use_caustics; + const bool use_raytrace_kernel = (shader_flags & SD_HAS_RAYTRACE); - if (use_raytrace_kernel) { + if (use_caustics) { + INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, + shader); + } + else if (use_raytrace_kernel) { INTEGRATOR_PATH_NEXT_SORTED(DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, shader); diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 01df7948241..80eccd6d41f 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1572,6 +1572,7 @@ typedef enum DeviceKernel { DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE, DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE, + DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW, DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL, @@ -1689,6 +1690,9 @@ enum KernelFeatureFlag : uint32_t { KERNEL_FEATURE_AO_PASS = (1U << 25U), KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U), KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE), + + /* MNEE. */ + KERNEL_FEATURE_MNEE = (1U << 27U), }; /* Shader node feature mask, to specialize shader evaluation for kernels. */ @@ -1714,9 +1718,12 @@ enum KernelFeatureFlag : uint32_t { * are different depending on the main, shadow or null path. For GPU we don't have * C++17 everywhere so can't use it. */ #ifdef __KERNEL_CPU__ +# define IF_KERNEL_FEATURE(feature) \ + if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #else +# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U) # define IF_KERNEL_NODES_FEATURE(feature) \ if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U) #endif diff --git a/intern/cycles/scene/scene.cpp b/intern/cycles/scene/scene.cpp index b35242139ea..8b5604eba72 100644 --- a/intern/cycles/scene/scene.cpp +++ b/intern/cycles/scene/scene.cpp @@ -550,7 +550,7 @@ void Scene::update_kernel_features() dscene.data.integrator.use_caustics = false; if (has_caustics_caster && has_caustics_receiver && has_caustics_light) { dscene.data.integrator.use_caustics = true; - kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE; + kernel_features |= KERNEL_FEATURE_MNEE; } if (bake_manager->get_baking()) { @@ -597,6 +597,7 @@ static void log_kernel_features(const uint features) << "\n"; VLOG(2) << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE) << "\n"; + VLOG(2) << "Use MNEE" << string_from_bool(features & KERNEL_FEATURE_MNEE) << "\n"; VLOG(2) << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT) << "\n"; VLOG(2) << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n"; VLOG(2) << "Use Path Tracing " << string_from_bool(features & KERNEL_FEATURE_PATH_TRACING) |