From 5b7d6ea54b2fc35b8b12c667f5bf9a1c9c46d5c2 Mon Sep 17 00:00:00 2001
From: Brecht Van Lommel <brechtvanlommel@gmail.com>
Date: Tue, 26 Sep 2017 23:42:36 +0200
Subject: Code refactor: add WorkTile struct for passing work to kernel.

This makes sharing some code between mega/split in following commits a bit
easier, and also paves the way for rendering multiple tiles later.
---
 intern/cycles/device/device_cuda.cpp               | 58 ++++++++++++----------
 intern/cycles/device/device_memory.h               |  6 ++-
 intern/cycles/kernel/kernel_types.h                | 15 ++++++
 intern/cycles/kernel/kernel_work_stealing.h        | 34 ++++++-------
 intern/cycles/kernel/kernels/cuda/kernel.cu        | 26 ++++++----
 .../kernel/kernels/opencl/kernel_split_function.h  |  4 +-
 intern/cycles/kernel/split/kernel_buffer_update.h  | 20 ++++----
 intern/cycles/kernel/split/kernel_data_init.h      | 24 ++++-----
 ..._holdout_emission_blurring_pathtermination_ao.h |  2 +-
 intern/cycles/kernel/split/kernel_path_init.h      | 16 +++---
 .../cycles/kernel/split/kernel_split_data_types.h  | 17 +------
 11 files changed, 122 insertions(+), 100 deletions(-)

(limited to 'intern/cycles')
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 29b5bd70789..7ee74e9a512 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1293,8 +1293,6 @@ public:
 		CUDAContextScope scope(this);
 
 		CUfunction cuPathTrace;
-		CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer);
-		CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
 
 		/* get kernel function */
 		if(branched) {
@@ -1308,40 +1306,48 @@ public:
 			return;
 		}
 
-		/* pass in parameters */
-		void *args[] = {&d_buffer,
-		                &d_rng_state,
-		                &sample,
-		                &rtile.x,
-		                &rtile.y,
-		                &rtile.w,
-		                &rtile.h,
-		                &rtile.offset,
-		                &rtile.stride};
+		cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
 
-		/* launch kernel */
-		int threads_per_block;
-		cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace));
+		/* allocate work tile */
+		device_vector<WorkTile> work_tiles;
+		work_tiles.resize(1);
 
-		/*int num_registers;
-		cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace));
+		WorkTile *wtile = work_tiles.get_data();
+		wtile->x = rtile.x;
+		wtile->y = rtile.y;
+		wtile->w = rtile.w;
+		wtile->h = rtile.h;
+		wtile->offset = rtile.offset;
+		wtile->stride = rtile.stride;
+		wtile->start_sample = sample;
+		wtile->num_samples = 1;
+		wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
+		wtile->rng_state = (uint*)cuda_device_ptr(rtile.rng_state);
 
-		printf("threads_per_block %d\n", threads_per_block);
-		printf("num_registers %d\n", num_registers);*/
+		mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
+		mem_copy_to(work_tiles);
 
-		int xthreads = (int)sqrt(threads_per_block);
-		int ythreads = (int)sqrt(threads_per_block);
-		int xblocks = (rtile.w + xthreads - 1)/xthreads;
-		int yblocks = (rtile.h + ythreads - 1)/ythreads;
+		CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
 
-		cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+		uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
+
+		/* pass in parameters */
+		void *args[] = {&d_work_tiles,
+		                &total_work_size};
+
+		/* launch kernel */
+		int num_threads_per_block;
+		cuda_assert(cuFuncGetAttribute(&num_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace));
+		int num_blocks = divide_up(total_work_size, num_threads_per_block);
 
 		cuda_assert(cuLaunchKernel(cuPathTrace,
-		                           xblocks , yblocks, 1, /* blocks */
-		                           xthreads, ythreads, 1, /* threads */
+		                           num_blocks, 1, 1,
+		                           num_threads_per_block, 1, 1,
 		                           0, 0, args, 0));
 
 		cuda_assert(cuCtxSynchronize());
+
+		mem_free(work_tiles);
 	}
 
 	void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index b63dd00068b..20707ad04c9 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -46,6 +46,7 @@ enum MemoryType {
 /* Supported Data Types */
 
 enum DataType {
+	TYPE_UNKNOWN,
 	TYPE_UCHAR,
 	TYPE_UINT,
 	TYPE_INT,
@@ -57,6 +58,7 @@ enum DataType {
 static inline size_t datatype_size(DataType datatype) 
 {
 	switch(datatype) {
+		case TYPE_UNKNOWN: return 1;
 		case TYPE_UCHAR: return sizeof(uchar);
 		case TYPE_FLOAT: return sizeof(float);
 		case TYPE_UINT: return sizeof(uint);
@@ -70,8 +72,8 @@ static inline size_t datatype_size(DataType datatype)
 /* Traits for data types */
 
 template<typename T> struct device_type_traits {
-	static const DataType data_type = TYPE_UCHAR;
-	static const int num_elements = 0;
+	static const DataType data_type = TYPE_UNKNOWN;
+	static const int num_elements = sizeof(T);
 };
 
 template<> struct device_type_traits<uchar> {
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 6c5b6ca3b2d..bf3a2881666 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1448,6 +1448,21 @@ enum RayState {
 #define PATCH_MAP_NODE_IS_LEAF (1u << 31)
 #define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF))
 
+/* Work Tiles */
+
+typedef struct WorkTile {
+	uint x, y, w, h;
+
+	uint start_sample;
+	uint num_samples;
+
+	uint offset;
+	uint stride;
+
+	ccl_global float *buffer;
+	ccl_global uint *rng_state;
+} WorkTile;
+
 CCL_NAMESPACE_END
 
 #endif /*  __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 0c11158e8da..0c2d9379b63 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -27,29 +27,28 @@ CCL_NAMESPACE_BEGIN
 #  pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
 #endif
 
+#ifdef __SPLIT_KERNEL__
 /* Returns true if there is work */
 ccl_device bool get_next_work(KernelGlobals *kg,
-                              uint thread_index,
+                              ccl_global uint *work_pools,
+                              uint total_work_size,
+                              uint ray_index,
                               ccl_private uint *global_work_index)
 {
-	uint total_work_size = kernel_split_params.w
-	                     * kernel_split_params.h
-	                     * kernel_split_params.num_samples;
-
 	/* With a small amount of work there may be more threads than work due to
 	 * rounding up of global size, stop such threads immediately. */
-	if(thread_index >= total_work_size) {
+	if(ray_index >= total_work_size) {
 		return false;
 	}
 
 	/* Increase atomic work index counter in pool. */
-	uint pool = thread_index / WORK_POOL_SIZE;
-	uint work_index = atomic_fetch_and_inc_uint32(&kernel_split_params.work_pools[pool]);
+	uint pool = ray_index / WORK_POOL_SIZE;
+	uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]);
 
 	/* Map per-pool work index to a global work index. */
 	uint global_size = ccl_global_size(0) * ccl_global_size(1);
 	kernel_assert(global_size % WORK_POOL_SIZE == 0);
-	kernel_assert(thread_index < global_size);
+	kernel_assert(ray_index < global_size);
 
 	*global_work_index = (work_index / WORK_POOL_SIZE) * global_size
 	                   + (pool * WORK_POOL_SIZE)
@@ -58,23 +57,24 @@ ccl_device bool get_next_work(KernelGlobals *kg,
 	/* Test if all work for this pool is done. */
 	return (*global_work_index < total_work_size);
 }
+#endif
 
-/* Map global work index to pixel X/Y and sample. */
-ccl_device_inline void get_work_pixel(KernelGlobals *kg,
+/* Map global work index to tile, pixel X/Y and sample. */
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
                                       uint global_work_index,
                                       ccl_private uint *x,
                                       ccl_private uint *y,
                                       ccl_private uint *sample)
 {
-	uint tile_pixels = kernel_split_params.w * kernel_split_params.h;
+	uint tile_pixels = tile->w * tile->h;
 	uint sample_offset = global_work_index / tile_pixels;
 	uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-	uint y_offset = pixel_offset / kernel_split_params.w;
-	uint x_offset = pixel_offset - y_offset * kernel_split_params.w;
+	uint y_offset = pixel_offset / tile->w;
+	uint x_offset = pixel_offset - y_offset * tile->w;
 
-	*x = kernel_split_params.x + x_offset;
-	*y = kernel_split_params.y + y_offset;
-	*sample = kernel_split_params.start_sample + sample_offset;
+	*x = tile->x + x_offset;
+	*y = tile->y + y_offset;
+	*sample = tile->start_sample + sample_offset;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index dc343cb387a..4d100634421 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -20,6 +20,7 @@
 
 #include "kernel/kernel_compat_cuda.h"
 #include "kernel_config.h"
+
 #include "kernel/kernel_math.h"
 #include "kernel/kernel_types.h"
 #include "kernel/kernel_globals.h"
@@ -27,32 +28,37 @@
 #include "kernel/kernel_path.h"
 #include "kernel/kernel_path_branched.h"
 #include "kernel/kernel_bake.h"
+#include "kernel/kernel_work_stealing.h"
 
 /* kernels */
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+kernel_cuda_path_trace(WorkTile *tile, uint total_work_size)
 {
-	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
-	int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
+	int work_index = ccl_global_id(0);
+
+	if(work_index < total_work_size) {
+		uint x, y, sample;
+		get_work_pixel(tile, work_index, &x, &y, &sample);
 
-	if(x < sx + sw && y < sy + sh) {
 		KernelGlobals kg;
-		kernel_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride);
+		kernel_path_trace(&kg, tile->buffer, tile->rng_state, sample, x, y, tile->offset, tile->stride);
 	}
 }
 
 #ifdef __BRANCHED_PATH__
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
-kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+kernel_cuda_branched_path_trace(WorkTile *tile, uint total_work_size)
 {
-	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
-	int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
+	int work_index = ccl_global_id(0);
+
+	if(work_index < total_work_size) {
+		uint x, y, sample;
+		get_work_pixel(tile, work_index, &x, &y, &sample);
 
-	if(x < sx + sw && y < sy + sh) {
 		KernelGlobals kg;
-		kernel_branched_path_trace(&kg, buffer, rng_state, sample, x, y, offset, stride);
+		kernel_branched_path_trace(&kg, tile->buffer, tile->rng_state, sample, x, y, tile->offset, tile->stride);
 	}
 }
 #endif
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
index 591c3846ef2..499138b5581 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h
@@ -42,11 +42,11 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
 	if(ccl_local_id(0) + ccl_local_id(1) == 0) {
 		kg->data = data;
 
-		kernel_split_params.rng_state = rng_state;
+		kernel_split_params.tile.rng_state = rng_state;
 		kernel_split_params.queue_index = queue_index;
 		kernel_split_params.use_queues_flag = use_queues_flag;
 		kernel_split_params.work_pools = work_pools;
-		kernel_split_params.buffer = buffer;
+		kernel_split_params.tile.buffer = buffer;
 
 		split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
 
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h
index c9e7deddafa..e8547767480 100644
--- a/intern/cycles/kernel/split/kernel_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_buffer_update.h
@@ -75,8 +75,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
 	if(ray_index != QUEUE_EMPTY_SLOT) {
 #endif
 
-	int stride = kernel_split_params.stride;
-
 	ccl_global char *ray_state = kernel_split_state.ray_state;
 	ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
 	PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
@@ -86,7 +84,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
 	if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
 		uint sample = state->sample;
 		uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-		ccl_global float *buffer = kernel_split_params.buffer + buffer_offset;
+		ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
 
 		/* accumulate result in output buffer */
 		kernel_write_result(kg, buffer, sample, L);
@@ -96,22 +94,27 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
 
 	if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
 		/* We have completed current work; So get next work */
+		ccl_global uint *work_pools = kernel_split_params.work_pools;
+		uint total_work_size = kernel_split_params.total_work_size;
 		uint work_index;
-		if(!get_next_work(kg, ray_index, &work_index)) {
+
+		if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
 			/* If work is invalid, this means no more work is available and the thread may exit */
 			ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE);
 		}
 
 		if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) {
+			ccl_global WorkTile *tile = &kernel_split_params.tile;
 			uint x, y, sample;
-			get_work_pixel(kg, work_index, &x, &y, &sample);
+			get_work_pixel(tile, work_index, &x, &y, &sample);
 
 			/* Remap rng_state to current pixel. */
-			ccl_global uint *rng_state = kernel_split_params.rng_state;
-			rng_state += kernel_split_params.offset + x + y*stride;
+			ccl_global uint *rng_state = kernel_split_params.tile.rng_state;
+			rng_state += tile->offset + x + y*tile->stride;
 
 			/* Store buffer offset for writing to passes. */
-			uint buffer_offset = (kernel_split_params.offset + x + y*stride) * kernel_data.film.pass_stride;
+			uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
+			ccl_global float *buffer = tile->buffer + buffer_offset;
 			kernel_split_state.buffer_offset[ray_index] = buffer_offset;
 
 			/* Initialize random numbers and ray. */
@@ -135,7 +138,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg,
 				/* These rays do not participate in path-iteration. */
 				float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 				/* Accumulate result in output buffer. */
-				ccl_global float *buffer = kernel_split_params.buffer + buffer_offset;
 				kernel_write_pass_float4(buffer, sample, L_rad);
 
 				ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE);
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 2c042dfde6f..2da3ca47466 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -73,28 +73,28 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 	kg->data = data;
 #endif
 
-	kernel_split_params.x = sx;
-	kernel_split_params.y = sy;
-	kernel_split_params.w = sw;
-	kernel_split_params.h = sh;
+	kernel_split_params.tile.x = sx;
+	kernel_split_params.tile.y = sy;
+	kernel_split_params.tile.w = sw;
+	kernel_split_params.tile.h = sh;
 
-	kernel_split_params.offset = offset;
-	kernel_split_params.stride = stride;
+	kernel_split_params.tile.start_sample = start_sample;
+	kernel_split_params.tile.num_samples = num_samples;
 
-	kernel_split_params.rng_state = rng_state;
+	kernel_split_params.tile.offset = offset;
+	kernel_split_params.tile.stride = stride;
 
-	kernel_split_params.start_sample = start_sample;
-	kernel_split_params.end_sample = end_sample;
+	kernel_split_params.tile.rng_state = rng_state;
+	kernel_split_params.tile.buffer = buffer;
+
+	kernel_split_params.total_work_size = sw * sh * num_samples;
 
 	kernel_split_params.work_pools = work_pools;
-	kernel_split_params.num_samples = num_samples;
 
 	kernel_split_params.queue_index = Queue_index;
 	kernel_split_params.queue_size = queuesize;
 	kernel_split_params.use_queues_flag = use_queues_flag;
 
-	kernel_split_params.buffer = buffer;
-
 	split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
 
 #ifdef __KERNEL_OPENCL__
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index dffd291012d..906bad8ceb6 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -98,7 +98,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
 
 	if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
 		uint buffer_offset = kernel_split_state.buffer_offset[ray_index];
-		ccl_global float *buffer = kernel_split_params.buffer + buffer_offset;
+		ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
 
 		ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
 		ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
index 0ab2289348b..701d39403ad 100644
--- a/intern/cycles/kernel/split/kernel_path_init.h
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -30,23 +30,28 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
 	kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
 
 	/* Get work. */
+	ccl_global uint *work_pools = kernel_split_params.work_pools;
+	uint total_work_size = kernel_split_params.total_work_size;
 	uint work_index;
-	if(!get_next_work(kg, ray_index, &work_index)) {
+
+	if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) {
 		/* No more work, mark ray as inactive */
 		kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
 
 		return;
 	}
 
+	ccl_global WorkTile *tile = &kernel_split_params.tile;
 	uint x, y, sample;
-	get_work_pixel(kg, work_index, &x, &y, &sample);
+	get_work_pixel(tile, work_index, &x, &y, &sample);
 
 	/* Remap rng_state and buffer to current pixel. */
-	ccl_global uint *rng_state = kernel_split_params.rng_state;
-	rng_state += kernel_split_params.offset + x + y*kernel_split_params.stride;
+	ccl_global uint *rng_state = kernel_split_params.tile.rng_state;
+	rng_state += tile->offset + x + y*tile->stride;
 
 	/* Store buffer offset for writing to passes. */
-	uint buffer_offset = (kernel_split_params.offset + x + y*kernel_split_params.stride) * kernel_data.film.pass_stride;
+	uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride;
+	ccl_global float *buffer = tile->buffer + buffer_offset;
 	kernel_split_state.buffer_offset[ray_index] = buffer_offset;
 
 	/* Initialize random numbers and ray. */
@@ -78,7 +83,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
 		/* These rays do not participate in path-iteration. */
 		float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 		/* Accumulate result in output buffer. */
-		ccl_global float *buffer = kernel_split_params.buffer + buffer_offset;
 		kernel_write_pass_float4(buffer, sample, L_rad);
 		ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
 	}
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index c58c8463f5c..b0e6e5f5250 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -22,28 +22,15 @@ CCL_NAMESPACE_BEGIN
 /* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */
 
 typedef struct SplitParams {
-	int x;
-	int y;
-	int w;
-	int h;
-
-	int offset;
-	int stride;
-
-	ccl_global uint *rng_state;
-
-	int start_sample;
-	int end_sample;
+	WorkTile tile;
+	uint total_work_size;
 
 	ccl_global unsigned int *work_pools;
-	unsigned int num_samples;
 
 	ccl_global int *queue_index;
 	int queue_size;
 	ccl_global char *use_queues_flag;
 
-	ccl_global float *buffer;
-
 	/* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */
 	int dummy_sd_flag;
 } SplitParams;
-- 
cgit v1.2.3