diff options
author | Sergey Sharybin <sergey@blender.org> | 2021-11-11 17:29:35 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey@blender.org> | 2021-11-11 17:29:35 +0300 |
commit | ce395c84a34225a820002ad551bee324b072f034 (patch) | |
tree | b981bdd0a69272abb6c0aaf1e7cb4f2530f7e254 /intern | |
parent | 06a74e78169ff60082716c0bd85c0b76de6bb885 (diff) | |
parent | d26d3cfe193793728cac77be9b44463a84a0f57e (diff) |
Merge branch 'blender-v3.0-release'
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/cpu/kernel.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/optix/queue.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/integrator/path_trace_work_gpu.cpp | 10 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel_arch.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel_arch_impl.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/device/optix/kernel.cu | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/film/passes.h | 34 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/intersect_closest.h | 23 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/megakernel.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_surface.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shade_volume.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/integrator/shadow_catcher.h | 25 |
13 files changed, 60 insertions, 54 deletions
diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h index 406bd07ab3d..2d1de975c2b 100644 --- a/intern/cycles/device/cpu/kernel.h +++ b/intern/cycles/device/cpu/kernel.h @@ -42,7 +42,7 @@ class CPUKernels { IntegratorInitFunction integrator_init_from_camera; IntegratorInitFunction integrator_init_from_bake; - IntegratorFunction integrator_intersect_closest; + IntegratorShadeFunction integrator_intersect_closest; IntegratorFunction integrator_intersect_shadow; IntegratorFunction integrator_intersect_subsurface; IntegratorFunction integrator_intersect_volume_stack; diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp index f5bfd916ccf..e3946d94f5d 100644 --- a/intern/cycles/device/optix/queue.cpp +++ b/intern/cycles/device/optix/queue.cpp @@ -73,7 +73,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a sizeof(device_ptr), cuda_stream_)); - if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { + if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST || + kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) { cuda_device_assert( cuda_device_, cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer), diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp index 2263c9892f4..956aa6a8c90 100644 --- a/intern/cycles/integrator/path_trace_work_gpu.cpp +++ b/intern/cycles/integrator/path_trace_work_gpu.cpp @@ -439,7 +439,15 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num DCHECK_LE(work_size, max_num_paths_); switch (kernel) { - case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: { + /* Closest ray intersection kernels with integrator state and render buffer. */ + void *d_render_buffer = (void *)buffers_->buffer.device_pointer; + void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)}; + + queue_->enqueue(kernel, work_size, args); + break; + } + case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW: case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE: case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: { diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index 2f9a3f7c59d..61f62f3136b 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -37,7 +37,7 @@ KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera); KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake); -KERNEL_INTEGRATOR_FUNCTION(intersect_closest); +KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest); KERNEL_INTEGRATOR_FUNCTION(intersect_shadow); KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface); KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack); diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 1ea5002e300..747c47c34c9 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera) DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake) -DEFINE_INTEGRATOR_KERNEL(intersect_closest) +DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest) DEFINE_INTEGRATOR_KERNEL(intersect_subsurface) DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack) DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background) diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index d63cd0e8262..dd0c6dd6893 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -131,13 +131,14 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) ccl_gpu_kernel_signature(integrator_intersect_closest, ccl_global const int *path_index_array, + ccl_global float *render_buffer, const int work_size) { const int global_index = ccl_gpu_global_id_x(); if (global_index < work_size) { const int state = (path_index_array) ? path_index_array[global_index] : global_index; - ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state)); + ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state, render_buffer)); } } diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index 6989219cd9f..b987aa7a817 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -57,7 +57,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest() const int global_index = optixGetLaunchIndex().x; const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] : global_index; - integrator_intersect_closest(nullptr, path_index); + integrator_intersect_closest(nullptr, path_index, __params.render_buffer); } extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow() diff --git a/intern/cycles/kernel/film/passes.h b/intern/cycles/kernel/film/passes.h index 22b4b779a17..77761709a78 100644 --- a/intern/cycles/kernel/film/passes.h +++ b/intern/cycles/kernel/film/passes.h @@ -160,40 +160,6 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals } #endif /* __DENOISING_FEATURES__ */ -#ifdef __SHADOW_CATCHER__ - -/* Write shadow catcher passes on a bounce from the shadow catcher object. */ -ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( - KernelGlobals kg, - IntegratorState state, - ccl_private const ShaderData *sd, - ccl_global float *ccl_restrict render_buffer) -{ - if (!kernel_data.integrator.has_shadow_catcher) { - return; - } - - kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); - kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); - - if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) { - return; - } - - ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer); - - /* Count sample for the shadow catcher object. */ - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); - - /* Since the split is done, the sample does not contribute to the matte, so accumulate it as - * transparency to the matte. */ - const float3 throughput = INTEGRATOR_STATE(state, path, throughput); - kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, - average(throughput)); -} - -#endif /* __SHADOW_CATCHER__ */ - ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer, size_t depth, float id, diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h index 5522b46205b..366bfba7aca 100644 --- a/intern/cycles/kernel/integrator/intersect_closest.h +++ b/intern/cycles/kernel/integrator/intersect_closest.h @@ -88,7 +88,10 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg, #ifdef __SHADOW_CATCHER__ /* Split path if a shadow catcher was hit. */ ccl_device_forceinline void integrator_split_shadow_catcher( - KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect) + KernelGlobals kg, + IntegratorState state, + ccl_private const Intersection *ccl_restrict isect, + ccl_global float *ccl_restrict render_buffer) { /* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two * paths from here. */ @@ -97,6 +100,8 @@ ccl_device_forceinline void integrator_split_shadow_catcher( return; } + kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer); + /* Mark state as having done a shadow catcher split so that it stops contributing to * the shadow catcher matte pass, but keeps contributing to the combined pass. */ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT; @@ -191,6 +196,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect, + ccl_global float *ccl_restrict render_buffer, const bool hit) { /* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */ @@ -233,7 +239,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel( #ifdef __SHADOW_CATCHER__ /* Handle shadow catcher. */ - integrator_split_shadow_catcher(kg, state, isect); + integrator_split_shadow_catcher(kg, state, isect, render_buffer); #endif } else { @@ -253,7 +259,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel( * volume shading and termination testing have already been done. */ template<uint32_t current_kernel> ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( - KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect) + KernelGlobals kg, + IntegratorState state, + ccl_private const Intersection *ccl_restrict isect, + ccl_global float *ccl_restrict render_buffer) { if (isect->prim != PRIM_NONE) { /* Hit a surface, continue with light or surface kernel. */ @@ -278,7 +287,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( #ifdef __SHADOW_CATCHER__ /* Handle shadow catcher. */ - integrator_split_shadow_catcher(kg, state, isect); + integrator_split_shadow_catcher(kg, state, isect, render_buffer); #endif return; } @@ -290,7 +299,9 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume( } } -ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state) +ccl_device void integrator_intersect_closest(KernelGlobals kg, + IntegratorState state, + ccl_global float *ccl_restrict render_buffer) { PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST); @@ -341,7 +352,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState s /* Setup up next kernel to be executed. */ integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>( - kg, state, &isect, hit); + kg, state, &isect, render_buffer, hit); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/integrator/megakernel.h b/intern/cycles/kernel/integrator/megakernel.h index d8cc794dc7a..43313400a11 100644 --- a/intern/cycles/kernel/integrator/megakernel.h +++ b/intern/cycles/kernel/integrator/megakernel.h @@ -76,7 +76,7 @@ ccl_device void integrator_megakernel(KernelGlobals kg, if (queued_kernel) { switch (queued_kernel) { case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: - integrator_intersect_closest(kg, state); + integrator_intersect_closest(kg, state, render_buffer); break; case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND: integrator_shade_background(kg, state, render_buffer); diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h index e6fe5a87120..3c84dcc3728 100644 --- a/intern/cycles/kernel/integrator/shade_surface.h +++ b/intern/cycles/kernel/integrator/shade_surface.h @@ -492,10 +492,6 @@ ccl_device bool integrate_surface(KernelGlobals kg, kernel_write_denoising_features_surface(kg, state, &sd, render_buffer); #endif -#ifdef __SHADOW_CATCHER__ - kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer); -#endif - /* Direct light. */ PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT); integrate_surface_direct_light(kg, state, &sd, &rng_state); diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 86d712cdf32..b4d02b1b664 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -1024,7 +1024,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg, else { /* Continue to background, light or surface. */ integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>( - kg, state, &isect); + kg, state, &isect, render_buffer); return; } #endif /* __VOLUME__ */ diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h index ac55678c9cb..4d72aced3fc 100644 --- a/intern/cycles/kernel/integrator/shadow_catcher.h +++ b/intern/cycles/kernel/integrator/shadow_catcher.h @@ -16,6 +16,7 @@ #pragma once +#include "kernel/film/write_passes.h" #include "kernel/integrator/path_state.h" #include "kernel/integrator/state_util.h" @@ -47,7 +48,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals return false; } - if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) { + if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) { return false; } @@ -88,6 +89,28 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t return path_flag & PATH_RAY_SHADOW_CATCHER_PASS; } +/* Write shadow catcher passes on a bounce from the shadow catcher object. */ +ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data( + KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer) +{ + kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED); + kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED); + + const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index); + const uint64_t render_buffer_offset = (uint64_t)render_pixel_index * + kernel_data.film.pass_stride; + ccl_global float *buffer = render_buffer + render_buffer_offset; + + /* Count sample for the shadow catcher object. */ + kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f); + + /* Since the split is done, the sample does not contribute to the matte, so accumulate it as + * transparency to the matte. */ + const float3 throughput = INTEGRATOR_STATE(state, path, throughput); + kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, + average(throughput)); +} + #endif /* __SHADOW_CATCHER__ */ CCL_NAMESPACE_END |