diff options
Diffstat (limited to 'intern/cycles/kernel/kernel_write_passes.h')
-rw-r--r-- | intern/cycles/kernel/kernel_write_passes.h | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/intern/cycles/kernel/kernel_write_passes.h b/intern/cycles/kernel/kernel_write_passes.h new file mode 100644 index 00000000000..410218d91d4 --- /dev/null +++ b/intern/cycles/kernel/kernel_write_passes.h @@ -0,0 +1,95 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__) +# define __ATOMIC_PASS_WRITE__ +#endif + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) +{ + ccl_global float *buf = buffer; +#ifdef __ATOMIC_PASS_WRITE__ + atomic_add_and_fetch_float(buf, value); +#else + *buf += value; +#endif +} + +ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value) +{ +#ifdef __ATOMIC_PASS_WRITE__ + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; + + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); +#else + ccl_global float3 *buf = (ccl_global float3 *)buffer; + *buf += value; +#endif +} + +ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value) +{ +#ifdef __ATOMIC_PASS_WRITE__ + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; + ccl_global float *buf_w = buffer + 3; + + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); + atomic_add_and_fetch_float(buf_w, value.w); +#else + ccl_global float4 *buf = (ccl_global float4 *)buffer; + *buf += value; +#endif +} + +#ifdef __DENOISING_FEATURES__ +ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value) +{ + kernel_write_pass_float(buffer, value); + + /* The online one-pass variance update that's used for the megakernel can't easily be implemented + * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ + kernel_write_pass_float(buffer + 1, value * value); +} + +# ifdef __ATOMIC_PASS_WRITE__ +# define kernel_write_pass_float3_unaligned kernel_write_pass_float3 +# else +ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value) +{ + buffer[0] += value.x; + buffer[1] += value.y; + buffer[2] += value.z; +} +# endif + +ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value) +{ + kernel_write_pass_float3_unaligned(buffer, value); + kernel_write_pass_float3_unaligned(buffer + 3, value * value); +} +#endif /* __DENOISING_FEATURES__ */ + +CCL_NAMESPACE_END |