Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey@blender.org>2021-11-10 19:18:55 +0300
committerSergey Sharybin <sergey@blender.org>2021-11-11 17:21:35 +0300
commitd26d3cfe193793728cac77be9b44463a84a0f57e (patch)
treebd949dffbe4e8476f008dcd855a941bd9e030a28
parent9be6880d020eb2a6891c07e0b8794400f20f5464 (diff)
Fix T92868: Cycles catcher with transparency crashes
The issue was caused by splitting happening twice. Fixed by checking for split flag which is assigned to the both states during split. The tricky part was to write catcher data at the moment of split: the transparency and shadow catcher sample count is to be accumulated at that point. Now it is happening in the `intersect_closest` kernel. The downside is that render buffer is to be passed to the kernel, but the benefit is that extra split bounce check is not needed now. Had to move the passes write to shadow catcher header, since include of `film/passes.h` causes all the fun of requirement to have BSDF data structures available. Differential Revision: https://developer.blender.org/D13177
-rw-r--r--intern/cycles/device/cpu/kernel.h2
-rw-r--r--intern/cycles/device/optix/queue.cpp3
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.cpp10
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch.h2
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch_impl.h2
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h6
-rw-r--r--intern/cycles/kernel/device/optix/kernel.cu2
-rw-r--r--intern/cycles/kernel/film/passes.h34
-rw-r--r--intern/cycles/kernel/integrator/intersect_closest.h23
-rw-r--r--intern/cycles/kernel/integrator/megakernel.h2
-rw-r--r--intern/cycles/kernel/integrator/shade_surface.h4
-rw-r--r--intern/cycles/kernel/integrator/shade_volume.h2
-rw-r--r--intern/cycles/kernel/integrator/shadow_catcher.h25
13 files changed, 62 insertions, 55 deletions
diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h
index 406bd07ab3d..2d1de975c2b 100644
--- a/intern/cycles/device/cpu/kernel.h
+++ b/intern/cycles/device/cpu/kernel.h
@@ -42,7 +42,7 @@ class CPUKernels {
IntegratorInitFunction integrator_init_from_camera;
IntegratorInitFunction integrator_init_from_bake;
- IntegratorFunction integrator_intersect_closest;
+ IntegratorShadeFunction integrator_intersect_closest;
IntegratorFunction integrator_intersect_shadow;
IntegratorFunction integrator_intersect_subsurface;
IntegratorFunction integrator_intersect_volume_stack;
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index f5bfd916ccf..e3946d94f5d 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -73,7 +73,8 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
sizeof(device_ptr),
cuda_stream_));
- if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+ if (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+ kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
cuda_device_assert(
cuda_device_,
cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index dfc1362ab09..b9784f68f56 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -437,7 +437,15 @@ void PathTraceWorkGPU::enqueue_path_iteration(DeviceKernel kernel, const int num
DCHECK_LE(work_size, max_num_paths_);
switch (kernel) {
- case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
+ case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST: {
+ /* Closest ray intersection kernels with integrator state and render buffer. */
+ void *d_render_buffer = (void *)buffers_->buffer.device_pointer;
+ void *args[] = {&d_path_index, &d_render_buffer, const_cast<int *>(&work_size)};
+
+ queue_->enqueue(kernel, work_size, args);
+ break;
+ }
+
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK: {
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h
index 2f9a3f7c59d..61f62f3136b 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch.h
@@ -37,7 +37,7 @@
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
-KERNEL_INTEGRATOR_FUNCTION(intersect_closest);
+KERNEL_INTEGRATOR_SHADE_FUNCTION(intersect_closest);
KERNEL_INTEGRATOR_FUNCTION(intersect_shadow);
KERNEL_INTEGRATOR_FUNCTION(intersect_subsurface);
KERNEL_INTEGRATOR_FUNCTION(intersect_volume_stack);
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 1ea5002e300..747c47c34c9 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -112,7 +112,7 @@ CCL_NAMESPACE_BEGIN
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
-DEFINE_INTEGRATOR_KERNEL(intersect_closest)
+DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 844bbf90f67..56fcc38b907 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -116,13 +116,15 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
- kernel_gpu_integrator_intersect_closest(const int *path_index_array, const int work_size)
+ kernel_gpu_integrator_intersect_closest(const int *path_index_array,
+ ccl_global float *render_buffer,
+ const int work_size)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
- integrator_intersect_closest(NULL, state);
+ integrator_intersect_closest(NULL, state, render_buffer);
}
}
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 6989219cd9f..b987aa7a817 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -57,7 +57,7 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
const int global_index = optixGetLaunchIndex().x;
const int path_index = (__params.path_index_array) ? __params.path_index_array[global_index] :
global_index;
- integrator_intersect_closest(nullptr, path_index);
+ integrator_intersect_closest(nullptr, path_index, __params.render_buffer);
}
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_shadow()
diff --git a/intern/cycles/kernel/film/passes.h b/intern/cycles/kernel/film/passes.h
index 22b4b779a17..77761709a78 100644
--- a/intern/cycles/kernel/film/passes.h
+++ b/intern/cycles/kernel/film/passes.h
@@ -160,40 +160,6 @@ ccl_device_forceinline void kernel_write_denoising_features_volume(KernelGlobals
}
#endif /* __DENOISING_FEATURES__ */
-#ifdef __SHADOW_CATCHER__
-
-/* Write shadow catcher passes on a bounce from the shadow catcher object. */
-ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
- KernelGlobals kg,
- IntegratorState state,
- ccl_private const ShaderData *sd,
- ccl_global float *ccl_restrict render_buffer)
-{
- if (!kernel_data.integrator.has_shadow_catcher) {
- return;
- }
-
- kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
- kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
- if (!kernel_shadow_catcher_is_path_split_bounce(kg, state, sd->object_flag)) {
- return;
- }
-
- ccl_global float *buffer = kernel_pass_pixel_render_buffer(kg, state, render_buffer);
-
- /* Count sample for the shadow catcher object. */
- kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
-
- /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
- * transparency to the matte. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
- average(throughput));
-}
-
-#endif /* __SHADOW_CATCHER__ */
-
ccl_device_inline size_t kernel_write_id_pass(ccl_global float *ccl_restrict buffer,
size_t depth,
float id,
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
index 5522b46205b..366bfba7aca 100644
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -88,7 +88,10 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
#ifdef __SHADOW_CATCHER__
/* Split path if a shadow catcher was hit. */
ccl_device_forceinline void integrator_split_shadow_catcher(
- KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
+ KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const Intersection *ccl_restrict isect,
+ ccl_global float *ccl_restrict render_buffer)
{
/* Test if we hit a shadow catcher object, and potentially split the path to continue tracing two
* paths from here. */
@@ -97,6 +100,8 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
return;
}
+ kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
+
/* Mark state as having done a shadow catcher split so that it stops contributing to
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SHADOW_CATCHER_HIT;
@@ -191,6 +196,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
KernelGlobals kg,
IntegratorState state,
ccl_private const Intersection *ccl_restrict isect,
+ ccl_global float *ccl_restrict render_buffer,
const bool hit)
{
/* Continue with volume kernel if we are inside a volume, regardless if we hit anything. */
@@ -233,7 +239,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
#ifdef __SHADOW_CATCHER__
/* Handle shadow catcher. */
- integrator_split_shadow_catcher(kg, state, isect);
+ integrator_split_shadow_catcher(kg, state, isect, render_buffer);
#endif
}
else {
@@ -253,7 +259,10 @@ ccl_device_forceinline void integrator_intersect_next_kernel(
* volume shading and termination testing have already been done. */
template<uint32_t current_kernel>
ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
- KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
+ KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const Intersection *ccl_restrict isect,
+ ccl_global float *ccl_restrict render_buffer)
{
if (isect->prim != PRIM_NONE) {
/* Hit a surface, continue with light or surface kernel. */
@@ -278,7 +287,7 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
#ifdef __SHADOW_CATCHER__
/* Handle shadow catcher. */
- integrator_split_shadow_catcher(kg, state, isect);
+ integrator_split_shadow_catcher(kg, state, isect, render_buffer);
#endif
return;
}
@@ -290,7 +299,9 @@ ccl_device_forceinline void integrator_intersect_next_kernel_after_volume(
}
}
-ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState state)
+ccl_device void integrator_intersect_closest(KernelGlobals kg,
+ IntegratorState state,
+ ccl_global float *ccl_restrict render_buffer)
{
PROFILING_INIT(kg, PROFILING_INTERSECT_CLOSEST);
@@ -341,7 +352,7 @@ ccl_device void integrator_intersect_closest(KernelGlobals kg, IntegratorState s
/* Setup up next kernel to be executed. */
integrator_intersect_next_kernel<DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST>(
- kg, state, &isect, hit);
+ kg, state, &isect, render_buffer, hit);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/megakernel.h b/intern/cycles/kernel/integrator/megakernel.h
index d8cc794dc7a..43313400a11 100644
--- a/intern/cycles/kernel/integrator/megakernel.h
+++ b/intern/cycles/kernel/integrator/megakernel.h
@@ -76,7 +76,7 @@ ccl_device void integrator_megakernel(KernelGlobals kg,
if (queued_kernel) {
switch (queued_kernel) {
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
- integrator_intersect_closest(kg, state);
+ integrator_intersect_closest(kg, state, render_buffer);
break;
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
integrator_shade_background(kg, state, render_buffer);
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index e6fe5a87120..3c84dcc3728 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -492,10 +492,6 @@ ccl_device bool integrate_surface(KernelGlobals kg,
kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
#endif
-#ifdef __SHADOW_CATCHER__
- kernel_write_shadow_catcher_bounce_data(kg, state, &sd, render_buffer);
-#endif
-
/* Direct light. */
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
integrate_surface_direct_light(kg, state, &sd, &rng_state);
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index 86d712cdf32..b4d02b1b664 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -1024,7 +1024,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
else {
/* Continue to background, light or surface. */
integrator_intersect_next_kernel_after_volume<DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME>(
- kg, state, &isect);
+ kg, state, &isect, render_buffer);
return;
}
#endif /* __VOLUME__ */
diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h
index ac55678c9cb..4d72aced3fc 100644
--- a/intern/cycles/kernel/integrator/shadow_catcher.h
+++ b/intern/cycles/kernel/integrator/shadow_catcher.h
@@ -16,6 +16,7 @@
#pragma once
+#include "kernel/film/write_passes.h"
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/state_util.h"
@@ -47,7 +48,7 @@ ccl_device_inline bool kernel_shadow_catcher_is_path_split_bounce(KernelGlobals
return false;
}
- if (path_flag & PATH_RAY_SHADOW_CATCHER_PASS) {
+ if (path_flag & PATH_RAY_SHADOW_CATCHER_HIT) {
return false;
}
@@ -88,6 +89,28 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
}
+/* Write shadow catcher passes on a bounce from the shadow catcher object. */
+ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
+ KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+ kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
+ kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + render_buffer_offset;
+
+ /* Count sample for the shadow catcher object. */
+ kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
+
+ /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
+ * transparency to the matte. */
+ const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
+ average(throughput));
+}
+
#endif /* __SHADOW_CATCHER__ */
CCL_NAMESPACE_END