Cycles: Remove sum_all_radiance kernel

This was only needed for the previous implementation of parallel samples. As we don't have that any more it can be removed. Real reason for removal tho is this: `per_sample_output_buffers` was being calculated too small and artifacts resulted. The tile buffer is already the correct size and calculating the size for `per_sample_output_buffers` is a bit difficult with the current layout of the code. As `per_sample_output_buffers` was only needed for `sum_all_radiance`, removing that kernel and writing output to the tile buffer directly fixes the artifacts.
author: Mai Lavelle <mai.lavelle@gmail.com> 2017-03-01 09:47:08 +0300
committer: Mai Lavelle <mai.lavelle@gmail.com> 2017-03-08 09:31:07 +0300
commit: cd7d5669d17070799e2d2a2b28f58a06c3417d7b (patch)
tree: c5420e4cd9432c44575c1e489b9890ddd8439ba9 /intern/cycles/kernel/split/kernel_data_init.h
parent: 4cf501b83557ed5d64dbd2ddb13e1e8c5add88f5 (diff)
1 files changed, 30 insertions, 0 deletions
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 982c7be2008..c22703e5abd 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -24,6 +24,21 @@ CCL_NAMESPACE_BEGIN
  * The number of elements in the queues is initialized to 0;
  */
 
+/* distributes an amount of work across all threads
+ * note: work done inside the loop may not show up to all threads till after the current kernel has completed
+ */
+#define parallel_for(kg, iter_name, work_size) \
+	for(size_t _size = (work_size), \
+	    _global_size = ccl_global_size(0) * ccl_global_size(1), \
+	    _n = _size / _global_size, \
+		_thread = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0), \
+	    iter_name = (_n > 0) ? (_thread * _n) : (_thread) \
+		; \
+		(iter_name < (_thread+1) * _n) || (iter_name == _n * _global_size + _thread && _thread < _size % _global_size) \
+		; \
+		iter_name = (iter_name != (_thread+1) * _n - 1) ? (iter_name + 1) : (_n * _global_size + _thread) \
+	)
+
 #ifndef __KERNEL_CPU__
 ccl_device void kernel_data_init(
 #else
@@ -110,6 +125,21 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 		 */
 		*use_queues_flag = 0;
 	}
+
+	/* zero the tiles pixels if this is the first sample */
+	if(start_sample == 0) {
+		parallel_for(kg, i, sw * sh * kernel_data.film.pass_stride) {
+			int pixel = i / kernel_data.film.pass_stride;
+			int pass = i % kernel_data.film.pass_stride;
+
+			int x = sx + pixel % sw;
+			int y = sy + pixel / sw;
+
+			int index = (offset + x + y*stride) * kernel_data.film.pass_stride + pass;
+
+			*(buffer + index) = 0.0f;
+		}
+	}
 }
 
 CCL_NAMESPACE_END
author	Mai Lavelle <mai.lavelle@gmail.com>	2017-03-01 09:47:08 +0300
committer	Mai Lavelle <mai.lavelle@gmail.com>	2017-03-08 09:31:07 +0300
commit	cd7d5669d17070799e2d2a2b28f58a06c3417d7b (patch)
tree	c5420e4cd9432c44575c1e489b9890ddd8439ba9 /intern/cycles/kernel/split/kernel_data_init.h
parent	4cf501b83557ed5d64dbd2ddb13e1e8c5add88f5 (diff)