Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-09-20 17:16:11 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-10-19 16:11:09 +0300
commitfd77a28031daff3122ded3a1cb37a7fb44feedf6 (patch)
treef54967b7f5f1175555aa21d613137fe436d7fc8c /intern/cycles/kernel
parentd06828f0b8ebb083de59fd2cb8c5f8fe6af1da22 (diff)
Cycles: bake transparent shadows for hair
These transparent shadows can be expansive to evaluate. Especially on the GPU they can lead to poor occupancy when only some pixels require many kernel launches to trace and evaluate many layers of transparency. Baked transparency allows tracing a single ray in many cases by accumulating the throughput directly in the intersection program without recording hits or evaluating shaders. Transparency is baked at curve vertices and interpolated, for most shaders this will look practically the same as actual shader evaluation. Fixes T91428, performance regression with spring demo file due to transparent hair, and makes it render significantly faster than Blender 2.93. Differential Revision: https://developer.blender.org/D12880
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/bvh/bvh.h29
-rw-r--r--intern/cycles/kernel/bvh/bvh_embree.h8
-rw-r--r--intern/cycles/kernel/bvh/bvh_shadow_all.h85
-rw-r--r--intern/cycles/kernel/bvh/bvh_util.h28
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch.h5
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch_impl.h13
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h16
-rw-r--r--intern/cycles/kernel/device/optix/kernel.cu38
-rw-r--r--intern/cycles/kernel/geom/geom_shader_data.h75
-rw-r--r--intern/cycles/kernel/integrator/integrator_intersect_shadow.h17
-rw-r--r--intern/cycles/kernel/integrator/integrator_shade_shadow.h10
-rw-r--r--intern/cycles/kernel/integrator/integrator_state.h13
-rw-r--r--intern/cycles/kernel/kernel_bake.h21
-rw-r--r--intern/cycles/kernel/kernel_types.h11
14 files changed, 294 insertions, 75 deletions
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 0d9ba7e6369..813ac15711e 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -367,12 +367,13 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
ccl_private const Ray *ray,
uint visibility,
uint max_hits,
- ccl_private uint *num_hits)
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
{
# ifdef __KERNEL_OPTIX__
uint p0 = state;
- uint p1 = 0; /* Unused */
- uint p2 = 0; /* Number of hits. */
+ uint p1 = __float_as_uint(1.0f); /* Throughput. */
+ uint p2 = 0; /* Number of hits. */
uint p3 = max_hits;
uint p4 = visibility;
uint p5 = false;
@@ -382,7 +383,6 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
ray_mask = 0xFF;
}
- *num_hits = 0; /* Initialize hit count to zero. */
optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
ray->P,
ray->D,
@@ -402,12 +402,14 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
p4,
p5);
- *num_hits = p2;
+ *num_recorded_hits = uint16_unpack_from_uint_0(p2);
+ *throughput = __uint_as_float(p1);
return p5;
# else /* __KERNEL_OPTIX__ */
if (!scene_intersect_valid(ray)) {
- *num_hits = 0;
+ *num_recorded_hits = 0;
+ *throughput = 1.0f;
return false;
}
@@ -422,7 +424,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
- *num_hits = ctx.num_hits;
+ *num_recorded_hits = ctx.num_recorded_hits;
+ *throughput = ctx.throughput;
return ctx.opaque_hit;
}
# endif /* __EMBREE__ */
@@ -431,21 +434,25 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
if (kernel_data.bvh.have_motion) {
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
- return bvh_intersect_shadow_all_hair_motion(kg, ray, state, visibility, max_hits, num_hits);
+ return bvh_intersect_shadow_all_hair_motion(
+ kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
- return bvh_intersect_shadow_all_motion(kg, ray, state, visibility, max_hits, num_hits);
+ return bvh_intersect_shadow_all_motion(
+ kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
- return bvh_intersect_shadow_all_hair(kg, ray, state, visibility, max_hits, num_hits);
+ return bvh_intersect_shadow_all_hair(
+ kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
# endif /* __HAIR__ */
- return bvh_intersect_shadow_all(kg, ray, state, visibility, max_hits, num_hits);
+ return bvh_intersect_shadow_all(
+ kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
# endif /* __KERNEL_OPTIX__ */
}
#endif /* __SHADOW_RECORD_ALL__ */
diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index 4f85e8bee4b..321e0f28dae 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -40,8 +40,10 @@ struct CCLIntersectContext {
/* for shadow rays */
Intersection *isect_s;
- int max_hits;
- int num_hits;
+ uint max_hits;
+ uint num_hits;
+ uint num_recorded_hits;
+ float throughput;
float max_t;
bool opaque_hit;
@@ -56,6 +58,8 @@ struct CCLIntersectContext {
type = type_;
max_hits = 1;
num_hits = 0;
+ num_recorded_hits = 0;
+ throughput = 1.0f;
max_t = FLT_MAX;
opaque_hit = false;
isect_s = NULL;
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index b997235b6e4..049c6a03fe0 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -41,7 +41,8 @@ ccl_device_inline
IntegratorShadowState state,
const uint visibility,
const uint max_hits,
- ccl_private uint *num_hits)
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
{
/* todo:
* - likely and unlikely for if() statements
@@ -61,6 +62,7 @@ ccl_device_inline
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
int object = OBJECT_NONE;
+ uint num_hits = 0;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@@ -77,7 +79,8 @@ ccl_device_inline
* otherwise. */
float t_world_to_instance = 1.0f;
- *num_hits = 0;
+ *num_recorded_hits = 0;
+ *throughput = 1.0f;
/* traversal loop */
do {
@@ -212,42 +215,62 @@ ccl_device_inline
* the primitive has a transparent shadow shader? */
const int flags = intersection_get_shader_flags(kg, isect.prim, isect.type);
- if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || max_hits == 0) {
+ if (!(flags & SD_HAS_TRANSPARENT_SHADOW) || num_hits >= max_hits) {
/* If no transparent shadows, all light is blocked and we can
* stop immediately. */
return true;
}
- /* Increase the number of hits, possibly beyond max_hits, we will
- * simply not record those and only keep the max_hits closest. */
- uint record_index = (*num_hits)++;
-
- if (record_index >= max_hits - 1) {
- /* If maximum number of hits reached, find the intersection with
- * the largest distance to potentially replace when another hit
- * is found. */
- const int num_recorded_hits = min(max_hits, record_index);
- float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
- int max_recorded_hit = 0;
-
- for (int i = 1; i < num_recorded_hits; i++) {
- const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
- if (isect_t > max_recorded_t) {
- max_recorded_t = isect_t;
- max_recorded_hit = i;
- }
+ num_hits++;
+
+ bool record_intersection = true;
+
+ /* Always use baked shadow transparency for curves. */
+ if (isect.type & PRIMITIVE_ALL_CURVE) {
+ *throughput *= intersection_curve_shadow_transparency(
+ kg, isect.object, isect.prim, isect.u);
+
+ if (*throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ return true;
+ }
+ else {
+ record_intersection = false;
}
+ }
- if (record_index >= max_hits) {
- record_index = max_recorded_hit;
+ if (record_intersection) {
+ /* Increase the number of hits, possibly beyond max_hits, we will
+ * simply not record those and only keep the max_hits closest. */
+ uint record_index = (*num_recorded_hits)++;
+
+ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (record_index >= max_record_hits - 1) {
+ /* If maximum number of hits reached, find the intersection with
+ * the largest distance to potentially replace when another hit
+ * is found. */
+ const int num_recorded_hits = min(max_record_hits, record_index);
+ float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+ int max_recorded_hit = 0;
+
+ for (int i = 1; i < num_recorded_hits; i++) {
+ const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+ if (isect_t > max_recorded_t) {
+ max_recorded_t = isect_t;
+ max_recorded_hit = i;
+ }
+ }
+
+ if (record_index >= max_record_hits) {
+ record_index = max_recorded_hit;
+ }
+
+ /* Limit the ray distance and stop counting hits beyond this. */
+ t_max_world = max(max_recorded_t, isect.t);
+ t_max_current = t_max_world * t_world_to_instance;
}
- /* Limit the ray distance and stop counting hits beyond this. */
- t_max_world = max(max_recorded_t, isect.t);
- t_max_current = t_max_world * t_world_to_instance;
+ integrator_state_write_shadow_isect(state, &isect, record_index);
}
-
- integrator_state_write_shadow_isect(state, &isect, record_index);
}
prim_addr++;
@@ -304,9 +327,11 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals kg,
IntegratorShadowState state,
const uint visibility,
const uint max_hits,
- ccl_private uint *num_hits)
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
{
- return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, state, visibility, max_hits, num_hits);
+ return BVH_FUNCTION_FULL_NAME(BVH)(
+ kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
#undef BVH_FUNCTION_NAME
diff --git a/intern/cycles/kernel/bvh/bvh_util.h b/intern/cycles/kernel/bvh/bvh_util.h
index 869311b38e2..309f0eeb1e2 100644
--- a/intern/cycles/kernel/bvh/bvh_util.h
+++ b/intern/cycles/kernel/bvh/bvh_util.h
@@ -195,4 +195,32 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
}
+/* Transparent Shadows */
+
+/* Cut-off value to stop transparent shadow tracing when practically opaque. */
+#define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f
+
+ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
+ const int object,
+ const int prim,
+ const float u)
+{
+ /* Find attribute. */
+ const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY);
+ if (offset == ATTR_STD_NOT_FOUND) {
+ /* If no shadow transparency attribute, assume opaque. */
+ return 0.0f;
+ }
+
+ /* Interpolate transparency between curve keys. */
+ const KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
+ const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type);
+ const int k1 = k0 + 1;
+
+ const float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
+ const float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
+
+ return (1.0f - u) * f0 + u * f1;
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h
index ae7fab65100..7a438b58e73 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch.h
@@ -64,6 +64,11 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg,
const KernelShaderEvalInput *input,
float *output,
const int offset);
+void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
+ const KernelGlobalsCPU *kg,
+ const KernelShaderEvalInput *input,
+ float *output,
+ const int offset);
/* --------------------------------------------------------------------
* Adaptive sampling.
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 2b0eea4fb61..ac606c768db 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -150,6 +150,19 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *k
#endif
}
+void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
+ const KernelGlobalsCPU *kg,
+ const KernelShaderEvalInput *input,
+ float *output,
+ const int offset)
+{
+#ifdef KERNEL_STUB
+ STUB_ASSERT(KERNEL_ARCH, shader_eval_curve_shadow_transparency);
+#else
+ kernel_curve_shadow_transparency_evaluate(kg, input, output, offset);
+#endif
+}
+
/* --------------------------------------------------------------------
* Adaptive sampling.
*/
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 6b4d79ed5b7..b6df74e835a 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -621,7 +621,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
-/* Background Shader Evaluation */
+/* Background */
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
kernel_gpu_shader_eval_background(KernelShaderEvalInput *input,
@@ -635,6 +635,20 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
}
}
+/* Curve Shadow Transparency */
+
+ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
+ kernel_gpu_shader_eval_curve_shadow_transparency(KernelShaderEvalInput *input,
+ float *output,
+ const int offset,
+ const int work_size)
+{
+ int i = ccl_gpu_global_id_x();
+ if (i < work_size) {
+ kernel_curve_shadow_transparency_evaluate(NULL, input, output, offset + i);
+ }
+}
+
/* --------------------------------------------------------------------
* Denoising.
*/
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 574f66ab708..a3bafb9846c 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -210,29 +210,50 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
optixSetPayload_5(true);
return optixTerminateRay();
# else
- const int max_hits = optixGetPayload_3();
+ const uint max_hits = optixGetPayload_3();
+ const uint num_hits_packed = optixGetPayload_2();
+ const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
+ const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
/* If no transparent shadows, all light is blocked and we can stop immediately. */
- if (max_hits == 0 ||
+ if (num_hits >= max_hits ||
!(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
optixSetPayload_5(true);
return optixTerminateRay();
}
+ /* Always use baked shadow transparency for curves. */
+ if (type & PRIMITIVE_ALL_CURVE) {
+ float throughput = __uint_as_float(optixGetPayload_1());
+ throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
+ optixSetPayload_1(__float_as_uint(throughput));
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
+
+ if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ optixSetPayload_4(true);
+ return optixTerminateRay();
+ }
+ else {
+ /* Continue tracing. */
+ optixIgnoreIntersection();
+ return;
+ }
+ }
+
/* Record transparent intersection. */
- const int num_hits = optixGetPayload_2();
- int record_index = num_hits;
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
- optixSetPayload_2(num_hits + 1);
+ uint record_index = num_recorded_hits;
const IntegratorShadowState state = optixGetPayload_0();
- if (record_index >= max_hits) {
+ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (record_index >= max_record_hits) {
/* If maximum number of hits reached, find a hit to replace. */
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
- int max_recorded_hit = 0;
+ uint max_recorded_hit = 0;
- for (int i = 1; i < max_hits; i++) {
+ for (int i = 1; i < max_record_hits; i++) {
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
if (isect_t > max_recorded_t) {
max_recorded_t = isect_t;
@@ -256,6 +277,7 @@ extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+ /* Continue tracing. */
optixIgnoreIntersection();
# endif /* __TRANSPARENT_SHADOWS__ */
#endif /* __SHADOW_RECORD_ALL__ */
diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h
index e6a5b8f7923..46bda2b656c 100644
--- a/intern/cycles/kernel/geom/geom_shader_data.h
+++ b/intern/cycles/kernel/geom/geom_shader_data.h
@@ -279,6 +279,81 @@ ccl_device void shader_setup_from_displace(KernelGlobals kg,
LAMP_NONE);
}
+/* ShaderData setup for point on curve. */
+
+ccl_device void shader_setup_from_curve(KernelGlobals kg,
+ ccl_private ShaderData *ccl_restrict sd,
+ int object,
+ int prim,
+ int segment,
+ float u)
+{
+ /* Primitive */
+ sd->type = PRIMITIVE_PACK_SEGMENT(PRIMITIVE_CURVE_THICK, segment);
+ sd->lamp = LAMP_NONE;
+ sd->prim = prim;
+ sd->u = u;
+ sd->v = 0.0f;
+ sd->time = 0.5f;
+ sd->ray_length = 0.0f;
+
+ /* Shader */
+ sd->shader = kernel_tex_fetch(__curves, prim).shader_id;
+ sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags;
+
+ /* Object */
+ sd->object = object;
+ sd->object_flag = kernel_tex_fetch(__object_flag, sd->object);
+#ifdef __OBJECT_MOTION__
+ shader_setup_object_transforms(kg, sd, sd->time);
+#endif
+
+ /* Get control points. */
+ KernelCurve kcurve = kernel_tex_fetch(__curves, prim);
+
+ int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+ int ka = max(k0 - 1, kcurve.first_key);
+ int kb = min(k1 + 1, kcurve.first_key + kcurve.num_keys - 1);
+
+ float4 P_curve[4];
+
+ P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+
+ /* Interpolate position and tangent. */
+ sd->P = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
+#ifdef __DPDU__
+ sd->dPdu = float4_to_float3(catmull_rom_basis_derivative(P_curve, sd->u));
+#endif
+
+ /* Transform into world space */
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ object_position_transform_auto(kg, sd, &sd->P);
+#ifdef __DPDU__
+ object_dir_transform_auto(kg, sd, &sd->dPdu);
+#endif
+ }
+
+ /* No view direction, normals or bitangent. */
+ sd->I = zero_float3();
+ sd->N = zero_float3();
+ sd->Ng = zero_float3();
+#ifdef __DPDU__
+ sd->dPdv = zero_float3();
+#endif
+
+ /* No ray differentials currently. */
+#ifdef __RAY_DIFFERENTIALS__
+ sd->dP = differential3_zero();
+ sd->dI = differential3_zero();
+ sd->du = differential_zero();
+ sd->dv = differential_zero();
+#endif
+}
+
/* ShaderData setup from ray into background */
ccl_device_inline void shader_setup_from_background(KernelGlobals kg,
diff --git a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
index d5c6ec145f0..90422445fad 100644
--- a/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_intersect_shadow.h
@@ -115,18 +115,25 @@ ccl_device bool integrate_intersect_shadow_transparent(KernelGlobals kg,
{
/* Limit the number hits to the max transparent bounces allowed and the size that we
* have available in the integrator state. */
- const uint max_transparent_hits = integrate_shadow_max_transparent_hits(kg, state);
- const uint max_hits = min(max_transparent_hits, (uint)INTEGRATOR_SHADOW_ISECT_SIZE);
+ const uint max_hits = integrate_shadow_max_transparent_hits(kg, state);
uint num_hits = 0;
- bool opaque_hit = scene_intersect_shadow_all(kg, state, ray, visibility, max_hits, &num_hits);
+ float throughput = 1.0f;
+ bool opaque_hit = scene_intersect_shadow_all(
+ kg, state, ray, visibility, max_hits, &num_hits, &throughput);
+
+ /* Computed throughput from baked shadow transparency, where we can bypass recording
+ * intersections and shader evaluation. */
+ if (throughput != 1.0f) {
+ INTEGRATOR_STATE_WRITE(state, shadow_path, throughput) *= throughput;
+ }
/* If number of hits exceed the transparent bounces limit, make opaque. */
- if (num_hits > max_transparent_hits) {
+ if (num_hits > max_hits) {
opaque_hit = true;
}
if (!opaque_hit) {
- uint num_recorded_hits = min(num_hits, max_hits);
+ const uint num_recorded_hits = min(num_hits, min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE));
if (num_recorded_hits > 0) {
sort_shadow_intersections(state, num_recorded_hits);
diff --git a/intern/cycles/kernel/integrator/integrator_shade_shadow.h b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
index 94900754b76..2d056a0b76f 100644
--- a/intern/cycles/kernel/integrator/integrator_shade_shadow.h
+++ b/intern/cycles/kernel/integrator/integrator_shade_shadow.h
@@ -23,7 +23,7 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline bool shadow_intersections_has_remaining(const int num_hits)
+ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits)
{
return num_hits >= INTEGRATOR_SHADOW_ISECT_SIZE;
}
@@ -105,12 +105,12 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
IntegratorShadowState state,
- const int num_hits)
+ const uint num_hits)
{
/* Accumulate shadow for transparent surfaces. */
- const int num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ const uint num_recorded_hits = min(num_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
- for (int hit = 0; hit < num_recorded_hits + 1; hit++) {
+ for (uint hit = 0; hit < num_recorded_hits + 1; hit++) {
/* Volume shaders. */
if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
# ifdef __VOLUME__
@@ -162,7 +162,7 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
ccl_global float *ccl_restrict render_buffer)
{
PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SETUP);
- const int num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
+ const uint num_hits = INTEGRATOR_STATE(state, shadow_path, num_hits);
#ifdef __TRANSPARENT_SHADOWS__
/* Evaluate transparent shadows. */
diff --git a/intern/cycles/kernel/integrator/integrator_state.h b/intern/cycles/kernel/integrator/integrator_state.h
index 84f34c6b986..4f21ab35d1f 100644
--- a/intern/cycles/kernel/integrator/integrator_state.h
+++ b/intern/cycles/kernel/integrator/integrator_state.h
@@ -48,19 +48,6 @@
CCL_NAMESPACE_BEGIN
-/* Constants
- *
- * TODO: these could be made dynamic depending on the features used in the scene. */
-
-#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024
-#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4
-
-#ifdef __KERNEL_CPU__
-# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
-#else
-# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
-#endif
-
/* Data structures */
/* Integrator State
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 1473e9ba8bf..30a41b9d3e3 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -96,4 +96,25 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
output[offset * 3 + 2] += color.z;
}
+ccl_device void kernel_curve_shadow_transparency_evaluate(
+ KernelGlobals kg,
+ ccl_global const KernelShaderEvalInput *input,
+ ccl_global float *output,
+ const int offset)
+{
+ /* Setup shader data. */
+ const KernelShaderEvalInput in = input[offset];
+
+ ShaderData sd;
+ shader_setup_from_curve(kg, &sd, in.object, in.prim, __float_as_int(in.v), in.u);
+
+ /* Evaluate transparency. */
+ shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW &
+ ~(KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_NODE_LIGHT_PATH)>(
+ kg, INTEGRATOR_STATE_NULL, &sd, NULL, PATH_RAY_SHADOW);
+
+ /* Write output. */
+ output[offset] = clamp(average(shader_bsdf_transparency(kg, &sd)), 0.0f, 1.0f);
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index edae158f403..fa8453b99cb 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -61,6 +61,15 @@ CCL_NAMESPACE_BEGIN
#define ID_NONE (0.0f)
#define PASS_UNUSED (~0)
+#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
+#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
+
+#ifdef __KERNEL_CPU__
+# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
+#else
+# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
+#endif
+
/* Kernel features */
#define __SOBOL__
#define __DPDU__
@@ -582,6 +591,7 @@ typedef enum AttributeStandard {
ATTR_STD_VOLUME_VELOCITY,
ATTR_STD_POINTINESS,
ATTR_STD_RANDOM_PER_ISLAND,
+ ATTR_STD_SHADOW_TRANSPARENCY,
ATTR_STD_NUM,
ATTR_STD_NOT_FOUND = ~0
@@ -1452,6 +1462,7 @@ typedef enum DeviceKernel {
DEVICE_KERNEL_SHADER_EVAL_DISPLACE,
DEVICE_KERNEL_SHADER_EVAL_BACKGROUND,
+ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
#define DECLARE_FILM_CONVERT_KERNEL(variant) \
DEVICE_KERNEL_FILM_CONVERT_##variant, DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA