1 files changed, 95 insertions, 0 deletions
diff --git a/intern/cycles/kernel/kernel_write_passes.h b/intern/cycles/kernel/kernel_write_passes.h
new file mode 100644
index 00000000000..410218d91d4
--- /dev/null
+++ b/intern/cycles/kernel/kernel_write_passes.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__)
+#  define __ATOMIC_PASS_WRITE__
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value)
+{
+  ccl_global float *buf = buffer;
+#ifdef __ATOMIC_PASS_WRITE__
+  atomic_add_and_fetch_float(buf, value);
+#else
+  *buf += value;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+  ccl_global float *buf_x = buffer + 0;
+  ccl_global float *buf_y = buffer + 1;
+  ccl_global float *buf_z = buffer + 2;
+
+  atomic_add_and_fetch_float(buf_x, value.x);
+  atomic_add_and_fetch_float(buf_y, value.y);
+  atomic_add_and_fetch_float(buf_z, value.z);
+#else
+  ccl_global float3 *buf = (ccl_global float3 *)buffer;
+  *buf += value;
+#endif
+}
+
+ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value)
+{
+#ifdef __ATOMIC_PASS_WRITE__
+  ccl_global float *buf_x = buffer + 0;
+  ccl_global float *buf_y = buffer + 1;
+  ccl_global float *buf_z = buffer + 2;
+  ccl_global float *buf_w = buffer + 3;
+
+  atomic_add_and_fetch_float(buf_x, value.x);
+  atomic_add_and_fetch_float(buf_y, value.y);
+  atomic_add_and_fetch_float(buf_z, value.z);
+  atomic_add_and_fetch_float(buf_w, value.w);
+#else
+  ccl_global float4 *buf = (ccl_global float4 *)buffer;
+  *buf += value;
+#endif
+}
+
+#ifdef __DENOISING_FEATURES__
+ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value)
+{
+  kernel_write_pass_float(buffer, value);
+
+  /* The online one-pass variance update that's used for the megakernel can't easily be implemented
+   * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */
+  kernel_write_pass_float(buffer + 1, value * value);
+}
+
+#  ifdef __ATOMIC_PASS_WRITE__
+#    define kernel_write_pass_float3_unaligned kernel_write_pass_float3
+#  else
+ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value)
+{
+  buffer[0] += value.x;
+  buffer[1] += value.y;
+  buffer[2] += value.z;
+}
+#  endif
+
+ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value)
+{
+  kernel_write_pass_float3_unaligned(buffer, value);
+  kernel_write_pass_float3_unaligned(buffer + 3, value * value);
+}
+#endif /* __DENOISING_FEATURES__ */
+
+CCL_NAMESPACE_END