Cycles: Remove sum_all_radiance kernel

This was only needed for the previous implementation of parallel samples. As we don't have that any more it can be removed. Real reason for removal tho is this: `per_sample_output_buffers` was being calculated too small and artifacts resulted. The tile buffer is already the correct size and calculating the size for `per_sample_output_buffers` is a bit difficult with the current layout of the code. As `per_sample_output_buffers` was only needed for `sum_all_radiance`, removing that kernel and writing output to the tile buffer directly fixes the artifacts.
author: Mai Lavelle <mai.lavelle@gmail.com> 2017-03-01 09:47:08 +0300
committer: Mai Lavelle <mai.lavelle@gmail.com> 2017-03-08 09:31:07 +0300
commit: cd7d5669d17070799e2d2a2b28f58a06c3417d7b (patch)
tree: c5420e4cd9432c44575c1e489b9890ddd8439ba9 /intern
parent: 4cf501b83557ed5d64dbd2ddb13e1e8c5add88f5 (diff)
13 files changed, 46 insertions, 137 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 85da7024a2c..13fee6c02e4 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -51,7 +51,6 @@ DeviceSplitKernel::~DeviceSplitKernel()
 	delete kernel_direct_lighting;
 	delete kernel_shadow_blocked;
 	delete kernel_next_iteration_setup;
-	delete kernel_sum_all_radiance;
 }
 
 bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
@@ -72,7 +71,6 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
 	LOAD_KERNEL(direct_lighting);
 	LOAD_KERNEL(shadow_blocked);
 	LOAD_KERNEL(next_iteration_setup);
-	LOAD_KERNEL(sum_all_radiance);
 
 #undef LOAD_KERNEL
 
@@ -258,15 +256,6 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 			avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample;
 		}
 
-		size_t sum_all_radiance_local_size[2] = {16, 16};
-		size_t sum_all_radiance_global_size[2];
-		sum_all_radiance_global_size[0] = round_up(tile.w, sum_all_radiance_local_size[0]);
-		sum_all_radiance_global_size[1] = round_up(tile.h, sum_all_radiance_local_size[1]);
-
-		ENQUEUE_SPLIT_KERNEL(sum_all_radiance,
-		                     sum_all_radiance_global_size,
-		                     sum_all_radiance_local_size);
-
 #undef ENQUEUE_SPLIT_KERNEL
 
 		tile.sample += subtile.num_samples;
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 1903574f0b5..1c6a2709cf2 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -65,7 +65,6 @@ private:
 	SplitKernelFunction *kernel_direct_lighting;
 	SplitKernelFunction *kernel_shadow_blocked;
 	SplitKernelFunction *kernel_next_iteration_setup;
-	SplitKernelFunction *kernel_sum_all_radiance;
 
 	/* Global memory variables [porting]; These memory is used for
 	 * co-operation between different kernels; Data written by one
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index d467e40b3e9..df40c3a0e8e 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -26,7 +26,6 @@ set(SRC
 	kernels/opencl/kernel_direct_lighting.cl
 	kernels/opencl/kernel_shadow_blocked.cl
 	kernels/opencl/kernel_next_iteration_setup.cl
-	kernels/opencl/kernel_sum_all_radiance.cl
 	kernels/cuda/kernel.cu
 	kernels/cuda/kernel_split.cu
 )
@@ -209,7 +208,6 @@ set(SRC_SPLIT_HEADERS
 	split/kernel_shadow_blocked.h
 	split/kernel_split_common.h
 	split/kernel_split_data.h
-	split/kernel_sum_all_radiance.h
 )
 
 # CUDA module
@@ -412,7 +410,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emiss
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 8c1675665cb..deb872444d0 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -81,7 +81,6 @@ DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
 DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
 DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
 DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
-DECLARE_SPLIT_KERNEL_FUNCTION(sum_all_radiance)
 
 void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func));
 
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index f6e0591ef24..d6d0db4e034 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -48,7 +48,6 @@
 #  include "split/kernel_direct_lighting.h"
 #  include "split/kernel_shadow_blocked.h"
 #  include "split/kernel_next_iteration_setup.h"
-#  include "split/kernel_sum_all_radiance.h"
 #endif
 
 CCL_NAMESPACE_BEGIN
@@ -174,7 +173,6 @@ DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
 DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
 DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
-DEFINE_SPLIT_KERNEL_FUNCTION(sum_all_radiance)
 
 void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
 {
@@ -198,7 +196,6 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
 	REGISTER(direct_lighting);
 	REGISTER(shadow_blocked);
 	REGISTER(next_iteration_setup);
-	REGISTER(sum_all_radiance);
 
 #undef REGISTER
 #undef REGISTER_EVAL_NAME
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 3a883265157..53a36b15e40 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -35,7 +35,6 @@
 #include "../../split/kernel_direct_lighting.h"
 #include "../../split/kernel_shadow_blocked.h"
 #include "../../split/kernel_next_iteration_setup.h"
-#include "../../split/kernel_sum_all_radiance.h"
 
 #include "../../kernel_film.h"
 
@@ -92,7 +91,6 @@ DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
 DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
 DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
-DEFINE_SPLIT_KERNEL_FUNCTION(sum_all_radiance)
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
deleted file mode 100644
index e945050a110..00000000000
--- a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel_compat_opencl.h"
-#include "split/kernel_split_common.h"
-#include "split/kernel_sum_all_radiance.h"
-
-__kernel void kernel_ocl_path_trace_sum_all_radiance(
-        KernelGlobals *kg,
-        ccl_constant KernelData *data)
-{
-	kernel_sum_all_radiance(kg);
-}
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index 07e5522c830..04aaf1bbaad 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -119,7 +119,7 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
 	ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
 	ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
 	ccl_global uint *rng = &kernel_split_state.rng[ray_index];
-	ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
+	ccl_global float *buffer = kernel_split_params.buffer;
 
 	unsigned int work_index;
 	ccl_global uint *initial_rng;
@@ -129,7 +129,6 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
 	unsigned int tile_y;
 	unsigned int pixel_x;
 	unsigned int pixel_y;
-	unsigned int my_sample_tile;
 
 	work_index = kernel_split_state.work_array[ray_index];
 	sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
@@ -137,11 +136,10 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
 	                        &tile_x, &tile_y,
 	                        work_index,
 	                        ray_index);
-	my_sample_tile = 0;
 	initial_rng = rng_state;
 
-	rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride;
-	per_sample_output_buffers += ((tile_x + (tile_y * stride)) + my_sample_tile) * kernel_data.film.pass_stride;
+	rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
+	buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
 
 	if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
 		/* eval background shader if nothing hit */
@@ -165,14 +163,14 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
 
 	if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
 		float3 L_sum = path_radiance_clamp_and_sum(kg, L);
-		kernel_write_light_passes(kg, per_sample_output_buffers, L, sample);
+		kernel_write_light_passes(kg, buffer, L, sample);
 #ifdef __KERNEL_DEBUG__
-		kernel_write_debug_passes(kg, per_sample_output_buffers, state, debug_data, sample);
+		kernel_write_debug_passes(kg, buffer, state, debug_data, sample);
 #endif
 		float4 L_rad = make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - (*L_transparent));
 
 		/* accumulate result in output buffer */
-		kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
+		kernel_write_pass_float4(buffer, sample, L_rad);
 		path_rng_end(kg, rng_state, *rng);
 
 		ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
@@ -192,13 +190,11 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
 			sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
 			/* Get pixel and tile position associated with current work */
 			get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, &tile_x, &tile_y, work_index, ray_index);
-			my_sample_tile = 0;
 
 			/* Remap rng_state according to the current work */
-			rng_state = initial_rng + kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride;
-			/* Remap per_sample_output_buffers according to the current work */
-			per_sample_output_buffers = kernel_split_state.per_sample_output_buffers
-				+ ((tile_x + (tile_y * stride)) + my_sample_tile) * kernel_data.film.pass_stride;
+			rng_state = initial_rng + kernel_split_params.offset + pixel_x + pixel_y*stride;
+			/* Remap buffer according to the current work */
+			buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
 
 			/* Initialize random numbers and ray. */
 			kernel_path_trace_setup(kg, rng_state, sample, pixel_x, pixel_y, rng, ray);
@@ -221,7 +217,7 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
 				/* These rays do not participate in path-iteration. */
 				float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 				/* Accumulate result in output buffer. */
-				kernel_write_pass_float4(per_sample_output_buffers, sample, L_rad);
+				kernel_write_pass_float4(buffer, sample, L_rad);
 				path_rng_end(kg, rng_state, *rng);
 
 				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 982c7be2008..c22703e5abd 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -24,6 +24,21 @@ CCL_NAMESPACE_BEGIN
  * The number of elements in the queues is initialized to 0;
  */
 
+/* distributes an amount of work across all threads
+ * note: work done inside the loop may not show up to all threads till after the current kernel has completed
+ */
+#define parallel_for(kg, iter_name, work_size) \
+	for(size_t _size = (work_size), \
+	    _global_size = ccl_global_size(0) * ccl_global_size(1), \
+	    _n = _size / _global_size, \
+		_thread = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0), \
+	    iter_name = (_n > 0) ? (_thread * _n) : (_thread) \
+		; \
+		(iter_name < (_thread+1) * _n) || (iter_name == _n * _global_size + _thread && _thread < _size % _global_size) \
+		; \
+		iter_name = (iter_name != (_thread+1) * _n - 1) ? (iter_name + 1) : (_n * _global_size + _thread) \
+	)
+
 #ifndef __KERNEL_CPU__
 ccl_device void kernel_data_init(
 #else
@@ -110,6 +125,21 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 		 */
 		*use_queues_flag = 0;
 	}
+
+	/* zero the tiles pixels if this is the first sample */
+	if(start_sample == 0) {
+		parallel_for(kg, i, sw * sh * kernel_data.film.pass_stride) {
+			int pixel = i / kernel_data.film.pass_stride;
+			int pass = i % kernel_data.film.pass_stride;
+
+			int x = sx + pixel % sw;
+			int y = sy + pixel / sw;
+
+			int index = (offset + x + y*stride) * kernel_data.film.pass_stride + pass;
+
+			*(buffer + index) = 0.0f;
+		}
+	}
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index ee9c4280b22..7168efa59ae 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -114,7 +114,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
 
 	unsigned int tile_x;
 	unsigned int tile_y;
-	int my_sample_tile;
 	unsigned int sample;
 
 	ccl_global RNG *rng = 0x0;
@@ -123,7 +122,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
 
 	ccl_global char *ray_state = kernel_split_state.ray_state;
 	ShaderData *sd = &kernel_split_state.sd[ray_index];
-	ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
+	ccl_global float *buffer = kernel_split_params.buffer;
 
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 
@@ -137,11 +136,8 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
 		                        &tile_x, &tile_y,
 		                        work_index,
 		                        ray_index);
-		my_sample_tile = 0;
 
-		per_sample_output_buffers +=
-		    ((tile_x + (tile_y * stride)) + my_sample_tile) *
-		    kernel_data.film.pass_stride;
+		buffer += (kernel_split_params.offset + pixel_x + pixel_y * stride) * kernel_data.film.pass_stride;
 
 		/* holdout */
 #ifdef __HOLDOUT__
@@ -172,7 +168,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
 		PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
 		/* Holdout mask objects do not write data passes. */
 		kernel_write_data_passes(kg,
-		                         per_sample_output_buffers,
+		                         buffer,
 		                         L,
 		                         sd,
 		                         sample,
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index e613db214ed..d2e2ffaca91 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -35,7 +35,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
 	unsigned int pixel_y;
 	unsigned int tile_x;
 	unsigned int tile_y;
-	unsigned int my_sample_tile;
 
 	unsigned int work_index = 0;
 	/* Get work. */
@@ -49,8 +48,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
 	/* Get the sample associated with the work. */
 	my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
 
-	my_sample_tile = 0;
-
 	/* Get pixel and tile position associated with the work. */
 	get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
 	                        &tile_x, &tile_y,
@@ -61,9 +58,8 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
 	ccl_global uint *rng_state = kernel_split_params.rng_state;
 	rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride;
 
-	ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
-	per_sample_output_buffers += (tile_x + tile_y * kernel_split_params.stride + my_sample_tile)
-	                             * kernel_data.film.pass_stride;
+	ccl_global float *buffer = kernel_split_params.buffer;
+	buffer += (kernel_split_params.offset + pixel_x + pixel_y * kernel_split_params.stride) * kernel_data.film.pass_stride;
 
 	/* Initialize random numbers and ray. */
 	kernel_path_trace_setup(kg,
@@ -94,7 +90,7 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
 		/* These rays do not participate in path-iteration. */
 		float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 		/* Accumulate result in output buffer. */
-		kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
+		kernel_write_pass_float4(buffer, my_sample, L_rad);
 		path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
 		ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
 	}
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 7e88b6f5168..5dd53f42478 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -89,7 +89,6 @@ typedef struct SplitData {
 	/* size calculation for these is non trivial, so they are left out of SPLIT_DATA_ENTRIES and handled separately */
 	ShaderData *sd;
 	ShaderData *sd_DL_shadow;
-	ccl_global float *per_sample_output_buffers;
 
 	/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
 	 * the host easily) but is still used the same as the other data so we have it here in this struct as well
@@ -113,7 +112,6 @@ ccl_device_inline size_t split_data_buffer_size(size_t num_elements,
 	 */
 	size += align_up(num_elements * SIZEOF_SD(max_closure), 16); /* sd */
 	size += align_up(2 * num_elements * SIZEOF_SD(max_closure), 16); /* sd_DL_shadow */
-	size += align_up(num_elements * per_thread_output_buffer_size, 16); /* per_sample_output_buffers */
 
 	return size;
 }
@@ -136,9 +134,6 @@ ccl_device_inline void split_data_init(ccl_global SplitData *split_data,
 	split_data->sd_DL_shadow = (ShaderData*)p;
 	p += align_up(2 * num_elements * SIZEOF_SD(MAX_CLOSURE), 16);
 
-	split_data->per_sample_output_buffers = (ccl_global float*)p;
-	//p += align_up(num_elements * per_thread_output_buffer_size, 16);
-
 	split_data->ray_state = ray_state;
 }
 
diff --git a/intern/cycles/kernel/split/kernel_sum_all_radiance.h b/intern/cycles/kernel/split/kernel_sum_all_radiance.h
deleted file mode 100644
index fdceae2dafb..00000000000
--- a/intern/cycles/kernel/split/kernel_sum_all_radiance.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright 2011-2015 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Since we process various samples in parallel; The output radiance of different samples
- * are stored in different locations; This kernel combines the output radiance contributed
- * by all different samples and stores them in the RenderTile's output buffer.
- */
-
-ccl_device void kernel_sum_all_radiance(KernelGlobals *kg)
-{
-	int x = ccl_global_id(0);
-	int y = ccl_global_id(1);
-
-	ccl_global float *buffer = kernel_split_params.buffer;
-	int sw = kernel_split_params.w;
-	int sh = kernel_split_params.h;
-	int stride = kernel_split_params.stride;
-	int start_sample = kernel_split_params.start_sample;
-
-	if(x < sw && y < sh) {
-		ccl_global float *per_sample_output_buffer = kernel_split_state.per_sample_output_buffers;
-		per_sample_output_buffer += (x + y * stride) * (kernel_data.film.pass_stride);
-
-		x += kernel_split_params.x;
-		y += kernel_split_params.y;
-
-		buffer += (kernel_split_params.offset + x + y*stride) * (kernel_data.film.pass_stride);
-
-		int pass_stride_iterator = 0;
-		int num_floats = kernel_data.film.pass_stride;
-
-		for(pass_stride_iterator = 0; pass_stride_iterator < num_floats; pass_stride_iterator++) {
-			*(buffer + pass_stride_iterator) =
-			        (start_sample == 0)
-			                ? *(per_sample_output_buffer + pass_stride_iterator)
-			                : *(buffer + pass_stride_iterator) + *(per_sample_output_buffer + pass_stride_iterator);
-		}
-	}
-}
-
-CCL_NAMESPACE_END
-
author	Mai Lavelle <mai.lavelle@gmail.com>	2017-03-01 09:47:08 +0300
committer	Mai Lavelle <mai.lavelle@gmail.com>	2017-03-08 09:31:07 +0300
commit	cd7d5669d17070799e2d2a2b28f58a06c3417d7b (patch)
tree	c5420e4cd9432c44575c1e489b9890ddd8439ba9 /intern
parent	4cf501b83557ed5d64dbd2ddb13e1e8c5add88f5 (diff)