Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel/kernel_passes.h')
-rw-r--r--intern/cycles/kernel/kernel_passes.h231
1 files changed, 84 insertions, 147 deletions
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index fff7f4cfdb7..644cc173571 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -16,19 +16,23 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sample, float value)
+#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
+#define __ATOMIC_PASS_WRITE__
+#endif
+
+ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
{
ccl_global float *buf = buffer;
-#if defined(__SPLIT_KERNEL__)
+#ifdef __ATOMIC_PASS_WRITE__
atomic_add_and_fetch_float(buf, value);
#else
- *buf = (sample == 0)? value: *buf + value;
-#endif /* __SPLIT_KERNEL__ */
+ *buf += value;
+#endif
}
-ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sample, float3 value)
+ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value)
{
-#if defined(__SPLIT_KERNEL__)
+#ifdef __ATOMIC_PASS_WRITE__
ccl_global float *buf_x = buffer + 0;
ccl_global float *buf_y = buffer + 1;
ccl_global float *buf_z = buffer + 2;
@@ -38,13 +42,13 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sa
atomic_add_and_fetch_float(buf_z, value.z);
#else
ccl_global float3 *buf = (ccl_global float3*)buffer;
- *buf = (sample == 0)? value: *buf + value;
-#endif /* __SPLIT_KERNEL__ */
+ *buf += value;
+#endif
}
-ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sample, float4 value)
+ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value)
{
-#if defined(__SPLIT_KERNEL__)
+#ifdef __ATOMIC_PASS_WRITE__
ccl_global float *buf_x = buffer + 0;
ccl_global float *buf_y = buffer + 1;
ccl_global float *buf_z = buffer + 2;
@@ -56,58 +60,35 @@ ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sa
atomic_add_and_fetch_float(buf_w, value.w);
#else
ccl_global float4 *buf = (ccl_global float4*)buffer;
- *buf = (sample == 0)? value: *buf + value;
-#endif /* __SPLIT_KERNEL__ */
+ *buf += value;
+#endif
}
#ifdef __DENOISING_FEATURES__
-ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, int sample, float value)
+ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value)
{
- kernel_write_pass_float(buffer, sample, value);
+ kernel_write_pass_float(buffer, value);
/* The online one-pass variance update that's used for the megakernel can't easily be implemented
* with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
-# ifdef __SPLIT_KERNEL__
- kernel_write_pass_float(buffer+1, sample, value*value);
-# else
- if(sample == 0) {
- kernel_write_pass_float(buffer+1, sample, 0.0f);
- }
- else {
- float new_mean = buffer[0] * (1.0f / (sample + 1));
- float old_mean = (buffer[0] - value) * (1.0f / sample);
- kernel_write_pass_float(buffer+1, sample, (value - new_mean) * (value - old_mean));
- }
-# endif
+ kernel_write_pass_float(buffer+1, value*value);
}
-# if defined(__SPLIT_KERNEL__)
+# ifdef __ATOMIC_PASS_WRITE__
# define kernel_write_pass_float3_unaligned kernel_write_pass_float3
# else
-ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, int sample, float3 value)
+ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value)
{
- buffer[0] = (sample == 0)? value.x: buffer[0] + value.x;
- buffer[1] = (sample == 0)? value.y: buffer[1] + value.y;
- buffer[2] = (sample == 0)? value.z: buffer[2] + value.z;
+ buffer[0] += value.x;
+ buffer[1] += value.y;
+ buffer[2] += value.z;
}
# endif
-ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, int sample, float3 value)
+ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value)
{
- kernel_write_pass_float3_unaligned(buffer, sample, value);
-# ifdef __SPLIT_KERNEL__
- kernel_write_pass_float3_unaligned(buffer+3, sample, value*value);
-# else
- if(sample == 0) {
- kernel_write_pass_float3_unaligned(buffer+3, sample, make_float3(0.0f, 0.0f, 0.0f));
- }
- else {
- float3 sum = make_float3(buffer[0], buffer[1], buffer[2]);
- float3 new_mean = sum * (1.0f / (sample + 1));
- float3 old_mean = (sum - value) * (1.0f / sample);
- kernel_write_pass_float3_unaligned(buffer+3, sample, (value - new_mean) * (value - old_mean));
- }
-# endif
+ kernel_write_pass_float3_unaligned(buffer, value);
+ kernel_write_pass_float3_unaligned(buffer+3, value*value);
}
ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer,
@@ -121,22 +102,11 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob
path_total = ensure_finite(path_total);
path_total_shaded = ensure_finite(path_total_shaded);
- kernel_write_pass_float(buffer, sample/2, path_total);
- kernel_write_pass_float(buffer+1, sample/2, path_total_shaded);
+ kernel_write_pass_float(buffer, path_total);
+ kernel_write_pass_float(buffer+1, path_total_shaded);
float value = path_total_shaded / max(path_total, 1e-7f);
-# ifdef __SPLIT_KERNEL__
- kernel_write_pass_float(buffer+2, sample/2, value*value);
-# else
- if(sample < 2) {
- kernel_write_pass_float(buffer+2, sample/2, 0.0f);
- }
- else {
- float old_value = (buffer[1] - path_total_shaded) / max(buffer[0] - path_total, 1e-7f);
- float new_value = buffer[1] / max(buffer[0], 1e-7f);
- kernel_write_pass_float(buffer+2, sample, (value - new_value) * (value - old_value));
- }
-# endif
+ kernel_write_pass_float(buffer+2, value*value);
}
#endif /* __DENOISING_FEATURES__ */
@@ -197,28 +167,23 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
#ifdef __KERNEL_DEBUG__
ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
ccl_global float *buffer,
- PathRadiance *L,
- int sample)
+ PathRadiance *L)
{
int flag = kernel_data.film.pass_flag;
if(flag & PASS_BVH_TRAVERSED_NODES) {
kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
- sample,
L->debug_data.num_bvh_traversed_nodes);
}
if(flag & PASS_BVH_TRAVERSED_INSTANCES) {
kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
- sample,
L->debug_data.num_bvh_traversed_instances);
}
if(flag & PASS_BVH_INTERSECTIONS) {
kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
- sample,
L->debug_data.num_bvh_intersections);
}
if(flag & PASS_RAY_BOUNCES) {
kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
- sample,
L->debug_data.num_ray_bounces);
}
}
@@ -243,35 +208,33 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
kernel_data.film.pass_alpha_threshold == 0.0f ||
average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
{
- int sample = state->sample;
-
- if(sample == 0) {
+ if(state->sample == 0) {
if(flag & PASS_DEPTH) {
float depth = camera_distance(kg, sd->P);
- kernel_write_pass_float(buffer + kernel_data.film.pass_depth, sample, depth);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
}
if(flag & PASS_OBJECT_ID) {
float id = object_pass_id(kg, sd->object);
- kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, sample, id);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
}
if(flag & PASS_MATERIAL_ID) {
float id = shader_pass_id(kg, sd);
- kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, sample, id);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
}
}
if(flag & PASS_NORMAL) {
float3 normal = shader_bsdf_average_normal(kg, sd);
- kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, sample, normal);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
}
if(flag & PASS_UV) {
float3 uv = primitive_uv(kg, sd);
- kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, sample, uv);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
}
if(flag & PASS_MOTION) {
float4 speed = primitive_motion_vector(kg, sd);
- kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, sample, speed);
- kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, sample, 1.0f);
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
}
state->flag |= PATH_RAY_SINGLE_PASS_DONE;
@@ -314,7 +277,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
#endif
}
-ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, int sample)
+ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L)
{
#ifdef __PASSES__
int flag = kernel_data.film.pass_flag;
@@ -323,116 +286,90 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global f
return;
if(flag & PASS_DIFFUSE_INDIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, sample, L->indirect_diffuse);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
if(flag & PASS_GLOSSY_INDIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, sample, L->indirect_glossy);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
if(flag & PASS_TRANSMISSION_INDIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, sample, L->indirect_transmission);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission);
if(flag & PASS_SUBSURFACE_INDIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, sample, L->indirect_subsurface);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, L->indirect_subsurface);
if(flag & PASS_DIFFUSE_DIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, sample, L->direct_diffuse);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
if(flag & PASS_GLOSSY_DIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, sample, L->direct_glossy);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
if(flag & PASS_TRANSMISSION_DIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, sample, L->direct_transmission);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission);
if(flag & PASS_SUBSURFACE_DIRECT)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, sample, L->direct_subsurface);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, L->direct_subsurface);
if(flag & PASS_EMISSION)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, sample, L->emission);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
if(flag & PASS_BACKGROUND)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_background, sample, L->background);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
if(flag & PASS_AO)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, sample, L->ao);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
if(flag & PASS_DIFFUSE_COLOR)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, sample, L->color_diffuse);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
if(flag & PASS_GLOSSY_COLOR)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, sample, L->color_glossy);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
if(flag & PASS_TRANSMISSION_COLOR)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, sample, L->color_transmission);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission);
if(flag & PASS_SUBSURFACE_COLOR)
- kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, sample, L->color_subsurface);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
if(flag & PASS_SHADOW) {
float4 shadow = L->shadow;
shadow.w = kernel_data.film.pass_shadow_scale;
- kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, sample, shadow);
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
}
if(flag & PASS_MIST)
- kernel_write_pass_float(buffer + kernel_data.film.pass_mist, sample, 1.0f - L->mist);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
#endif
}
ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *buffer,
int sample, PathRadiance *L)
{
- if(L) {
- float alpha;
- float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
+ float alpha;
+ float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
- kernel_write_pass_float4(buffer, sample, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
+ kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
- kernel_write_light_passes(kg, buffer, L, sample);
+ kernel_write_light_passes(kg, buffer, L);
#ifdef __DENOISING_FEATURES__
- if(kernel_data.film.pass_denoising_data) {
+ if(kernel_data.film.pass_denoising_data) {
# ifdef __SHADOW_TRICKS__
- kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, average(L->path_total), average(L->path_total_shaded));
+ kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, average(L->path_total), average(L->path_total_shaded));
# else
- kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
+ kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
# endif
- if(kernel_data.film.pass_denoising_clean) {
- float3 noisy, clean;
- path_radiance_split_denoising(kg, L, &noisy, &clean);
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
- sample, noisy);
- kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean,
- sample, clean);
- }
- else {
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
- sample, ensure_finite3(L_sum));
- }
-
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
- sample, L->denoising_normal);
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
- sample, L->denoising_albedo);
- kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
- sample, L->denoising_depth);
+ if(kernel_data.film.pass_denoising_clean) {
+ float3 noisy, clean;
+ path_radiance_split_denoising(kg, L, &noisy, &clean);
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
+ noisy);
+ kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean,
+ clean);
}
+ else {
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
+ ensure_finite3(L_sum));
+ }
+
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
+ L->denoising_normal);
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
+ L->denoising_albedo);
+ kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
+ L->denoising_depth);
+ }
#endif /* __DENOISING_FEATURES__ */
#ifdef __KERNEL_DEBUG__
- kernel_write_debug_passes(kg, buffer, L, sample);
+ kernel_write_debug_passes(kg, buffer, L);
#endif
- }
- else {
- kernel_write_pass_float4(buffer, sample, make_float4(0.0f, 0.0f, 0.0f, 0.0f));
-
-#ifdef __DENOISING_FEATURES__
- if(kernel_data.film.pass_denoising_data) {
- kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
-
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR,
- sample, make_float3(0.0f, 0.0f, 0.0f));
-
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL,
- sample, make_float3(0.0f, 0.0f, 0.0f));
- kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO,
- sample, make_float3(0.0f, 0.0f, 0.0f));
- kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH,
- sample, 0.0f);
-
- if(kernel_data.film.pass_denoising_clean) {
- kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean,
- sample, make_float3(0.0f, 0.0f, 0.0f));
- }
- }
-#endif /* __DENOISING_FEATURES__ */
- }
}
CCL_NAMESPACE_END