diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-09-27 00:42:36 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2017-10-04 22:11:14 +0300 |
commit | 5b7d6ea54b2fc35b8b12c667f5bf9a1c9c46d5c2 (patch) | |
tree | 99a9ca07d5366b164dfdf267ad1ed3691d2d7d57 /intern/cycles/kernel/split | |
parent | 660e8e59e7b4265324a8fba7ae716f84a73c6c64 (diff) |
Code refactor: add WorkTile struct for passing work to kernel.
This makes sharing some code between mega/split in following commits a bit
easier, and also paves the way for rendering multiple tiles later.
Diffstat (limited to 'intern/cycles/kernel/split')
5 files changed, 36 insertions, 43 deletions
diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index c9e7deddafa..e8547767480 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -75,8 +75,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if(ray_index != QUEUE_EMPTY_SLOT) { #endif - int stride = kernel_split_params.stride; - ccl_global char *ray_state = kernel_split_state.ray_state; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; @@ -86,7 +84,7 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { uint sample = state->sample; uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.buffer + buffer_offset; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; /* accumulate result in output buffer */ kernel_write_result(kg, buffer, sample, L); @@ -96,22 +94,27 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { /* We have completed current work; So get next work */ + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; uint work_index; - if(!get_next_work(kg, ray_index, &work_index)) { + + if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { /* If work is invalid, this means no more work is available and the thread may exit */ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); } if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { + ccl_global WorkTile *tile = &kernel_split_params.tile; uint x, y, sample; - get_work_pixel(kg, work_index, &x, &y, &sample); + get_work_pixel(tile, work_index, &x, &y, &sample); /* Remap rng_state to current pixel. */ - ccl_global uint *rng_state = kernel_split_params.rng_state; - rng_state += kernel_split_params.offset + x + y*stride; + ccl_global uint *rng_state = kernel_split_params.tile.rng_state; + rng_state += tile->offset + x + y*tile->stride; /* Store buffer offset for writing to passes. */ - uint buffer_offset = (kernel_split_params.offset + x + y*stride) * kernel_data.film.pass_stride; + uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; + ccl_global float *buffer = tile->buffer + buffer_offset; kernel_split_state.buffer_offset[ray_index] = buffer_offset; /* Initialize random numbers and ray. */ @@ -135,7 +138,6 @@ ccl_device void kernel_buffer_update(KernelGlobals *kg, /* These rays do not participate in path-iteration. */ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* Accumulate result in output buffer. */ - ccl_global float *buffer = kernel_split_params.buffer + buffer_offset; kernel_write_pass_float4(buffer, sample, L_rad); ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 2c042dfde6f..2da3ca47466 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -73,28 +73,28 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( kg->data = data; #endif - kernel_split_params.x = sx; - kernel_split_params.y = sy; - kernel_split_params.w = sw; - kernel_split_params.h = sh; + kernel_split_params.tile.x = sx; + kernel_split_params.tile.y = sy; + kernel_split_params.tile.w = sw; + kernel_split_params.tile.h = sh; - kernel_split_params.offset = offset; - kernel_split_params.stride = stride; + kernel_split_params.tile.start_sample = start_sample; + kernel_split_params.tile.num_samples = num_samples; - kernel_split_params.rng_state = rng_state; + kernel_split_params.tile.offset = offset; + kernel_split_params.tile.stride = stride; - kernel_split_params.start_sample = start_sample; - kernel_split_params.end_sample = end_sample; + kernel_split_params.tile.rng_state = rng_state; + kernel_split_params.tile.buffer = buffer; + + kernel_split_params.total_work_size = sw * sh * num_samples; kernel_split_params.work_pools = work_pools; - kernel_split_params.num_samples = num_samples; kernel_split_params.queue_index = Queue_index; kernel_split_params.queue_size = queuesize; kernel_split_params.use_queues_flag = use_queues_flag; - kernel_split_params.buffer = buffer; - split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); #ifdef __KERNEL_OPENCL__ diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index dffd291012d..906bad8ceb6 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -98,7 +98,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.buffer + buffer_offset; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index 0ab2289348b..701d39403ad 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -30,23 +30,28 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; /* Get work. */ + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; uint work_index; - if(!get_next_work(kg, ray_index, &work_index)) { + + if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { /* No more work, mark ray as inactive */ kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; return; } + ccl_global WorkTile *tile = &kernel_split_params.tile; uint x, y, sample; - get_work_pixel(kg, work_index, &x, &y, &sample); + get_work_pixel(tile, work_index, &x, &y, &sample); /* Remap rng_state and buffer to current pixel. */ - ccl_global uint *rng_state = kernel_split_params.rng_state; - rng_state += kernel_split_params.offset + x + y*kernel_split_params.stride; + ccl_global uint *rng_state = kernel_split_params.tile.rng_state; + rng_state += tile->offset + x + y*tile->stride; /* Store buffer offset for writing to passes. */ - uint buffer_offset = (kernel_split_params.offset + x + y*kernel_split_params.stride) * kernel_data.film.pass_stride; + uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; + ccl_global float *buffer = tile->buffer + buffer_offset; kernel_split_state.buffer_offset[ray_index] = buffer_offset; /* Initialize random numbers and ray. */ @@ -78,7 +83,6 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { /* These rays do not participate in path-iteration. */ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* Accumulate result in output buffer. */ - ccl_global float *buffer = kernel_split_params.buffer + buffer_offset; kernel_write_pass_float4(buffer, sample, L_rad); ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); } diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index c58c8463f5c..b0e6e5f5250 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -22,28 +22,15 @@ CCL_NAMESPACE_BEGIN /* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */ typedef struct SplitParams { - int x; - int y; - int w; - int h; - - int offset; - int stride; - - ccl_global uint *rng_state; - - int start_sample; - int end_sample; + WorkTile tile; + uint total_work_size; ccl_global unsigned int *work_pools; - unsigned int num_samples; ccl_global int *queue_index; int queue_size; ccl_global char *use_queues_flag; - ccl_global float *buffer; - /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */ int dummy_sd_flag; } SplitParams; |