diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_passes.h')
-rw-r--r-- | intern/cycles/kernel/kernel_passes.h | 231 |
1 files changed, 84 insertions, 147 deletions
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index fff7f4cfdb7..644cc173571 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -16,19 +16,23 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, int sample, float value) +#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__) +#define __ATOMIC_PASS_WRITE__ +#endif + +ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) { ccl_global float *buf = buffer; -#if defined(__SPLIT_KERNEL__) +#ifdef __ATOMIC_PASS_WRITE__ atomic_add_and_fetch_float(buf, value); #else - *buf = (sample == 0)? value: *buf + value; -#endif /* __SPLIT_KERNEL__ */ + *buf += value; +#endif } -ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sample, float3 value) +ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value) { -#if defined(__SPLIT_KERNEL__) +#ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; ccl_global float *buf_y = buffer + 1; ccl_global float *buf_z = buffer + 2; @@ -38,13 +42,13 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, int sa atomic_add_and_fetch_float(buf_z, value.z); #else ccl_global float3 *buf = (ccl_global float3*)buffer; - *buf = (sample == 0)? value: *buf + value; -#endif /* __SPLIT_KERNEL__ */ + *buf += value; +#endif } -ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sample, float4 value) +ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value) { -#if defined(__SPLIT_KERNEL__) +#ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; ccl_global float *buf_y = buffer + 1; ccl_global float *buf_z = buffer + 2; @@ -56,58 +60,35 @@ ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, int sa atomic_add_and_fetch_float(buf_w, value.w); #else ccl_global float4 *buf = (ccl_global float4*)buffer; - *buf = (sample == 0)? value: *buf + value; -#endif /* __SPLIT_KERNEL__ */ + *buf += value; +#endif } #ifdef __DENOISING_FEATURES__ -ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, int sample, float value) +ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value) { - kernel_write_pass_float(buffer, sample, value); + kernel_write_pass_float(buffer, value); /* The online one-pass variance update that's used for the megakernel can't easily be implemented * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ -# ifdef __SPLIT_KERNEL__ - kernel_write_pass_float(buffer+1, sample, value*value); -# else - if(sample == 0) { - kernel_write_pass_float(buffer+1, sample, 0.0f); - } - else { - float new_mean = buffer[0] * (1.0f / (sample + 1)); - float old_mean = (buffer[0] - value) * (1.0f / sample); - kernel_write_pass_float(buffer+1, sample, (value - new_mean) * (value - old_mean)); - } -# endif + kernel_write_pass_float(buffer+1, value*value); } -# if defined(__SPLIT_KERNEL__) +# ifdef __ATOMIC_PASS_WRITE__ # define kernel_write_pass_float3_unaligned kernel_write_pass_float3 # else -ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, int sample, float3 value) +ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value) { - buffer[0] = (sample == 0)? value.x: buffer[0] + value.x; - buffer[1] = (sample == 0)? value.y: buffer[1] + value.y; - buffer[2] = (sample == 0)? value.z: buffer[2] + value.z; + buffer[0] += value.x; + buffer[1] += value.y; + buffer[2] += value.z; } # endif -ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, int sample, float3 value) +ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value) { - kernel_write_pass_float3_unaligned(buffer, sample, value); -# ifdef __SPLIT_KERNEL__ - kernel_write_pass_float3_unaligned(buffer+3, sample, value*value); -# else - if(sample == 0) { - kernel_write_pass_float3_unaligned(buffer+3, sample, make_float3(0.0f, 0.0f, 0.0f)); - } - else { - float3 sum = make_float3(buffer[0], buffer[1], buffer[2]); - float3 new_mean = sum * (1.0f / (sample + 1)); - float3 old_mean = (sum - value) * (1.0f / sample); - kernel_write_pass_float3_unaligned(buffer+3, sample, (value - new_mean) * (value - old_mean)); - } -# endif + kernel_write_pass_float3_unaligned(buffer, value); + kernel_write_pass_float3_unaligned(buffer+3, value*value); } ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer, @@ -121,22 +102,11 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_glob path_total = ensure_finite(path_total); path_total_shaded = ensure_finite(path_total_shaded); - kernel_write_pass_float(buffer, sample/2, path_total); - kernel_write_pass_float(buffer+1, sample/2, path_total_shaded); + kernel_write_pass_float(buffer, path_total); + kernel_write_pass_float(buffer+1, path_total_shaded); float value = path_total_shaded / max(path_total, 1e-7f); -# ifdef __SPLIT_KERNEL__ - kernel_write_pass_float(buffer+2, sample/2, value*value); -# else - if(sample < 2) { - kernel_write_pass_float(buffer+2, sample/2, 0.0f); - } - else { - float old_value = (buffer[1] - path_total_shaded) / max(buffer[0] - path_total, 1e-7f); - float new_value = buffer[1] / max(buffer[0], 1e-7f); - kernel_write_pass_float(buffer+2, sample, (value - new_value) * (value - old_value)); - } -# endif + kernel_write_pass_float(buffer+2, value*value); } #endif /* __DENOISING_FEATURES__ */ @@ -197,28 +167,23 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, #ifdef __KERNEL_DEBUG__ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, ccl_global float *buffer, - PathRadiance *L, - int sample) + PathRadiance *L) { int flag = kernel_data.film.pass_flag; if(flag & PASS_BVH_TRAVERSED_NODES) { kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes, - sample, L->debug_data.num_bvh_traversed_nodes); } if(flag & PASS_BVH_TRAVERSED_INSTANCES) { kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, - sample, L->debug_data.num_bvh_traversed_instances); } if(flag & PASS_BVH_INTERSECTIONS) { kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections, - sample, L->debug_data.num_bvh_intersections); } if(flag & PASS_RAY_BOUNCES) { kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, - sample, L->debug_data.num_ray_bounces); } } @@ -243,35 +208,33 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl kernel_data.film.pass_alpha_threshold == 0.0f || average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { - int sample = state->sample; - - if(sample == 0) { + if(state->sample == 0) { if(flag & PASS_DEPTH) { float depth = camera_distance(kg, sd->P); - kernel_write_pass_float(buffer + kernel_data.film.pass_depth, sample, depth); + kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); } if(flag & PASS_OBJECT_ID) { float id = object_pass_id(kg, sd->object); - kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, sample, id); + kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); } if(flag & PASS_MATERIAL_ID) { float id = shader_pass_id(kg, sd); - kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, sample, id); + kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); } } if(flag & PASS_NORMAL) { float3 normal = shader_bsdf_average_normal(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, sample, normal); + kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); } if(flag & PASS_UV) { float3 uv = primitive_uv(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, sample, uv); + kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); } if(flag & PASS_MOTION) { float4 speed = primitive_motion_vector(kg, sd); - kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, sample, speed); - kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, sample, 1.0f); + kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); + kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); } state->flag |= PATH_RAY_SINGLE_PASS_DONE; @@ -314,7 +277,7 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl #endif } -ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, int sample) +ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L) { #ifdef __PASSES__ int flag = kernel_data.film.pass_flag; @@ -323,116 +286,90 @@ ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global f return; if(flag & PASS_DIFFUSE_INDIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, sample, L->indirect_diffuse); + kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse); if(flag & PASS_GLOSSY_INDIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, sample, L->indirect_glossy); + kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy); if(flag & PASS_TRANSMISSION_INDIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, sample, L->indirect_transmission); + kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission); if(flag & PASS_SUBSURFACE_INDIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, sample, L->indirect_subsurface); + kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, L->indirect_subsurface); if(flag & PASS_DIFFUSE_DIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, sample, L->direct_diffuse); + kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse); if(flag & PASS_GLOSSY_DIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, sample, L->direct_glossy); + kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy); if(flag & PASS_TRANSMISSION_DIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, sample, L->direct_transmission); + kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission); if(flag & PASS_SUBSURFACE_DIRECT) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, sample, L->direct_subsurface); + kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, L->direct_subsurface); if(flag & PASS_EMISSION) - kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, sample, L->emission); + kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission); if(flag & PASS_BACKGROUND) - kernel_write_pass_float3(buffer + kernel_data.film.pass_background, sample, L->background); + kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background); if(flag & PASS_AO) - kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, sample, L->ao); + kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao); if(flag & PASS_DIFFUSE_COLOR) - kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, sample, L->color_diffuse); + kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse); if(flag & PASS_GLOSSY_COLOR) - kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, sample, L->color_glossy); + kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy); if(flag & PASS_TRANSMISSION_COLOR) - kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, sample, L->color_transmission); + kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission); if(flag & PASS_SUBSURFACE_COLOR) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, sample, L->color_subsurface); + kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface); if(flag & PASS_SHADOW) { float4 shadow = L->shadow; shadow.w = kernel_data.film.pass_shadow_scale; - kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, sample, shadow); + kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow); } if(flag & PASS_MIST) - kernel_write_pass_float(buffer + kernel_data.film.pass_mist, sample, 1.0f - L->mist); + kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist); #endif } ccl_device_inline void kernel_write_result(KernelGlobals *kg, ccl_global float *buffer, int sample, PathRadiance *L) { - if(L) { - float alpha; - float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha); + float alpha; + float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha); - kernel_write_pass_float4(buffer, sample, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha)); + kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha)); - kernel_write_light_passes(kg, buffer, L, sample); + kernel_write_light_passes(kg, buffer, L); #ifdef __DENOISING_FEATURES__ - if(kernel_data.film.pass_denoising_data) { + if(kernel_data.film.pass_denoising_data) { # ifdef __SHADOW_TRICKS__ - kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, average(L->path_total), average(L->path_total_shaded)); + kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, average(L->path_total), average(L->path_total_shaded)); # else - kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f); + kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f); # endif - if(kernel_data.film.pass_denoising_clean) { - float3 noisy, clean; - path_radiance_split_denoising(kg, L, &noisy, &clean); - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, - sample, noisy); - kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, - sample, clean); - } - else { - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, - sample, ensure_finite3(L_sum)); - } - - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, - sample, L->denoising_normal); - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, - sample, L->denoising_albedo); - kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, - sample, L->denoising_depth); + if(kernel_data.film.pass_denoising_clean) { + float3 noisy, clean; + path_radiance_split_denoising(kg, L, &noisy, &clean); + kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, + noisy); + kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, + clean); } + else { + kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, + ensure_finite3(L_sum)); + } + + kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, + L->denoising_normal); + kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, + L->denoising_albedo); + kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, + L->denoising_depth); + } #endif /* __DENOISING_FEATURES__ */ #ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, buffer, L, sample); + kernel_write_debug_passes(kg, buffer, L); #endif - } - else { - kernel_write_pass_float4(buffer, sample, make_float4(0.0f, 0.0f, 0.0f, 0.0f)); - -#ifdef __DENOISING_FEATURES__ - if(kernel_data.film.pass_denoising_data) { - kernel_write_denoising_shadow(kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f); - - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, - sample, make_float3(0.0f, 0.0f, 0.0f)); - - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, - sample, make_float3(0.0f, 0.0f, 0.0f)); - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, - sample, make_float3(0.0f, 0.0f, 0.0f)); - kernel_write_pass_float_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, - sample, 0.0f); - - if(kernel_data.film.pass_denoising_clean) { - kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, - sample, make_float3(0.0f, 0.0f, 0.0f)); - } - } -#endif /* __DENOISING_FEATURES__ */ - } } CCL_NAMESPACE_END |