diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_write_passes.h')
-rw-r--r-- | intern/cycles/kernel/kernel_write_passes.h | 53 |
1 files changed, 23 insertions, 30 deletions
diff --git a/intern/cycles/kernel/kernel_write_passes.h b/intern/cycles/kernel/kernel_write_passes.h index 410218d91d4..9d379495629 100644 --- a/intern/cycles/kernel/kernel_write_passes.h +++ b/intern/cycles/kernel/kernel_write_passes.h @@ -14,23 +14,25 @@ * limitations under the License. */ -#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__) +#pragma once + +#ifdef __KERNEL_GPU__ # define __ATOMIC_PASS_WRITE__ #endif CCL_NAMESPACE_BEGIN -ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) +ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value) { - ccl_global float *buf = buffer; #ifdef __ATOMIC_PASS_WRITE__ - atomic_add_and_fetch_float(buf, value); + atomic_add_and_fetch_float(buffer, value); #else - *buf += value; + *buffer += value; #endif } -ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value) +ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer, + float3 value) { #ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; @@ -41,12 +43,14 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 atomic_add_and_fetch_float(buf_y, value.y); atomic_add_and_fetch_float(buf_z, value.z); #else - ccl_global float3 *buf = (ccl_global float3 *)buffer; - *buf += value; + buffer[0] += value.x; + buffer[1] += value.y; + buffer[2] += value.z; #endif } -ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value) +ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer, + float4 value) { #ifdef __ATOMIC_PASS_WRITE__ ccl_global float *buf_x = buffer + 0; @@ -59,37 +63,26 @@ ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 atomic_add_and_fetch_float(buf_z, value.z); atomic_add_and_fetch_float(buf_w, value.w); #else - ccl_global float4 *buf = (ccl_global float4 *)buffer; - *buf += value; + buffer[0] += value.x; + buffer[1] += value.y; + buffer[2] += value.z; + buffer[3] += value.w; #endif } -#ifdef __DENOISING_FEATURES__ -ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value) +ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer) { - kernel_write_pass_float(buffer, value); - - /* The online one-pass variance update that's used for the megakernel can't easily be implemented - * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ - kernel_write_pass_float(buffer + 1, value * value); + return *buffer; } -# ifdef __ATOMIC_PASS_WRITE__ -# define kernel_write_pass_float3_unaligned kernel_write_pass_float3 -# else -ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value) +ccl_device_inline float3 kernel_read_pass_float3(ccl_global float *ccl_restrict buffer) { - buffer[0] += value.x; - buffer[1] += value.y; - buffer[2] += value.z; + return make_float3(buffer[0], buffer[1], buffer[2]); } -# endif -ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value) +ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict buffer) { - kernel_write_pass_float3_unaligned(buffer, value); - kernel_write_pass_float3_unaligned(buffer + 3, value * value); + return make_float4(buffer[0], buffer[1], buffer[2], buffer[3]); } -#endif /* __DENOISING_FEATURES__ */ CCL_NAMESPACE_END |