From 4923af4c773f64c54f1b46695ecb55525c03a437 Mon Sep 17 00:00:00 2001 From: Mai Lavelle Date: Sat, 14 Jan 2017 02:01:00 -0500 Subject: Cycles: Deduplicate sample range setting and getting --- intern/cycles/kernel/CMakeLists.txt | 1 + intern/cycles/kernel/kernel_sample_range.h | 99 +++++++++++++++++++++++++++ intern/cycles/kernel/kernels/cuda/kernel.cu | 98 ++++++-------------------- intern/cycles/kernel/kernels/opencl/kernel.cl | 54 ++------------- 4 files changed, 129 insertions(+), 123 deletions(-) create mode 100644 intern/cycles/kernel/kernel_sample_range.h diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 56bcafbce38..e0fbc57edd2 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -72,6 +72,7 @@ set(SRC_HEADERS kernel_projection.h kernel_queues.h kernel_random.h + kernel_sample_range.h kernel_shader.h kernel_shadow.h kernel_subsurface.h diff --git a/intern/cycles/kernel/kernel_sample_range.h b/intern/cycles/kernel/kernel_sample_range.h new file mode 100644 index 00000000000..6694a7e959d --- /dev/null +++ b/intern/cycles/kernel/kernel_sample_range.h @@ -0,0 +1,99 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device_inline void kernel_set_sample_range( + ccl_global SampleRange *sample_ranges, + int range, + ccl_global float *buffer, + ccl_global uint *rng_state, + int sample, + int sx, + int sy, + int sw, + int sh, + int offset, + int stride) +{ + ccl_global SampleRange* sample_range = &sample_ranges[range]; + + sample_range->buffer = buffer; + sample_range->rng_state = rng_state; + sample_range->sample = sample; + sample_range->x = sx; + sample_range->y = sy; + sample_range->w = sw; + sample_range->h = sh; + sample_range->offset = offset; + sample_range->stride = stride; + + if(range == 0) { + sample_range->work_offset = 0; + } + else { + ccl_global SampleRange* prev_range = &sample_ranges[range-1]; + sample_range->work_offset = prev_range->work_offset + prev_range->w * prev_range->h; + } +} + +ccl_device_inline bool kernel_pixel_sample_for_thread( + KernelGlobals *kg, + ccl_global SampleRange *sample_ranges, + int num_sample_ranges, + int *thread_x, + int *thread_y, + int *thread_sample, + ccl_global SampleRange **thread_sample_range) +{ + /* order threads to maintain inner block coherency */ + const int group_id = ccl_group_id(0) + ccl_num_groups(0) * ccl_group_id(1); + const int local_thread_id = ccl_local_id(0) + ccl_local_id(1) * ccl_local_size(0); + + const int thread_id = group_id * (ccl_local_size(0) * ccl_local_size(1)) + local_thread_id; + + /* find which sample range belongs to this thread */ + ccl_global SampleRange* sample_range = NULL; + + for(int i = 0; i < num_sample_ranges; i++) { + if(thread_id >= sample_ranges[i].work_offset && + thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h) + { + sample_range = &sample_ranges[i]; + } + } + + /* check if theres work for this thread */ + if(!sample_range) { + return false; + } + + int work_offset = thread_id - sample_range->work_offset; + + if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) { + return false; + } + + if(thread_sample_range) *thread_sample_range = sample_range; + if(thread_x) *thread_x = (work_offset % sample_range->w) + sample_range->x; + if(thread_y) *thread_y = (work_offset / sample_range->w) + sample_range->y; + if(thread_sample) *thread_sample = sample_range->sample; + + return true; +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 2c73ba48881..e90c2c902cc 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -24,6 +24,7 @@ #include "../../kernel_path.h" #include "../../kernel_path_branched.h" #include "../../kernel_bake.h" +#include "../../kernel_sample_range.h" /* device data taken from CUDA occupancy calculator */ @@ -123,68 +124,37 @@ /* kernels */ extern "C" __global__ void -kernel_cuda_set_sample_range(SampleRange *sample_ranges, int range, float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +kernel_cuda_set_sample_range( + SampleRange *sample_ranges, + int range, + float *buffer, + uint *rng_state, + int sample, + int sx, + int sy, + int sw, + int sh, + int offset, + int stride) { - SampleRange* sample_range = &sample_ranges[range]; - - sample_range->buffer = buffer; - sample_range->rng_state = rng_state; - sample_range->sample = sample; - sample_range->x = sx; - sample_range->y = sy; - sample_range->w = sw; - sample_range->h = sh; - sample_range->offset = offset; - sample_range->stride = stride; - - if(range == 0) { - sample_range->work_offset = 0; - } - else { - SampleRange* prev_range = &sample_ranges[range-1]; - sample_range->work_offset = prev_range->work_offset + prev_range->w * prev_range->h; - } + kernel_set_sample_range(sample_ranges, range, buffer, rng_state, sample, sx, sy, sw, sh, offset, stride); } extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) kernel_cuda_path_trace(SampleRange *sample_ranges, int num_sample_ranges) { - /* order threads to maintain inner block coherency */ - const int group_id = blockIdx.x + gridDim.x * blockIdx.y; - const int local_thread_id = threadIdx.x + threadIdx.y * blockDim.x; - - const int thread_id = group_id * (blockDim.x * blockDim.x) + local_thread_id; - - /* find which sample range belongs to this thread */ - SampleRange* sample_range = NULL; - - for(int i = 0; i < num_sample_ranges; i++) { - if(thread_id >= sample_ranges[i].work_offset && - thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h) - { - sample_range = &sample_ranges[i]; - } - } - - /* check if theres work for this thread */ - if(!sample_range) { - return; - } - - int work_offset = thread_id - sample_range->work_offset; + ccl_global SampleRange* sample_range; + int x, y, sample; - if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) { + if(!kernel_pixel_sample_for_thread(kg, sample_ranges, num_sample_ranges, &x, &y, &sample, &sample_range)) { return; } - int x = (work_offset % sample_range->w) + sample_range->x; - int y = (work_offset / sample_range->w) + sample_range->y; - kernel_path_trace(NULL, sample_range->buffer, sample_range->rng_state, - sample_range->sample, + sample, x, y, sample_range->offset, sample_range->stride); @@ -195,41 +165,17 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) kernel_cuda_branched_path_trace(SampleRange *sample_ranges, int num_sample_ranges) { - /* order threads to maintain inner block coherency */ - const int group_id = blockIdx.x + gridDim.x * blockIdx.y; - const int local_thread_id = threadIdx.x + threadIdx.y * blockDim.x; - - const int thread_id = group_id * (blockDim.x * blockDim.x) + local_thread_id; - - /* find which sample range belongs to this thread */ - SampleRange* sample_range = NULL; - - for(int i = 0; i < num_sample_ranges; i++) { - if(thread_id >= sample_ranges[i].work_offset && - thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h) - { - sample_range = &sample_ranges[i]; - } - } + ccl_global SampleRange* sample_range; + int x, y, sample; - /* check if theres work for this thread */ - if(!sample_range) { + if(!kernel_pixel_sample_for_thread(kg, sample_ranges, num_sample_ranges, &x, &y, &sample, &sample_range)) { return; } - int work_offset = thread_id - sample_range->work_offset; - - if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) { - return; - } - - int x = (work_offset % sample_range->w) + sample_range->x; - int y = (work_offset / sample_range->w) + sample_range->y; - kernel_branched_path_trace(NULL, sample_range->buffer, sample_range->rng_state, - sample_range->sample, + sample, x, y, sample_range->offset, sample_range->stride); diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index 1816d01d3b8..77bf645ef49 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -45,6 +45,8 @@ #include "../../kernel_bake.h" +#include "../../kernel_sample_range.h" + #ifdef __COMPILE_ONLY_MEGAKERNEL__ __kernel void kernel_ocl_path_trace( @@ -65,41 +67,17 @@ __kernel void kernel_ocl_path_trace( kg->name = name; #include "../../kernel_textures.h" - /* order threads to maintain inner block coherency */ - const int group_id = get_group_id(0) + get_num_groups(0) * get_group_id(1); - const int local_thread_id = get_local_id(0) + get_local_id(1) * get_local_size(0); - - const int thread_id = group_id * (get_local_size(0) * get_local_size(1)) + local_thread_id; - - /* find which sample range belongs to this thread */ - ccl_global SampleRange* sample_range = NULL; - - for(int i = 0; i < num_sample_ranges; i++) { - if(thread_id >= sample_ranges[i].work_offset && - thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h) - { - sample_range = &sample_ranges[i]; - } - } - - /* check if theres work for this thread */ - if(!sample_range) { - return; - } - - int work_offset = thread_id - sample_range->work_offset; + ccl_global SampleRange* sample_range; + int x, y, sample; - if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) { + if(!kernel_pixel_sample_for_thread(kg, sample_ranges, num_sample_ranges, &x, &y, &sample, &sample_range)) { return; } - int x = (work_offset % sample_range->w) + sample_range->x; - int y = (work_offset / sample_range->w) + sample_range->y; - kernel_path_trace(kg, sample_range->buffer, sample_range->rng_state, - sample_range->sample, + sample, x, y, sample_range->offset, sample_range->stride); @@ -121,25 +99,7 @@ __kernel void kernel_ocl_set_sample_range( int offset, int stride) { - ccl_global SampleRange* sample_range = &sample_ranges[range]; - - sample_range->buffer = buffer; - sample_range->rng_state = rng_state; - sample_range->sample = sample; - sample_range->x = sx; - sample_range->y = sy; - sample_range->w = sw; - sample_range->h = sh; - sample_range->offset = offset; - sample_range->stride = stride; - - if(range == 0) { - sample_range->work_offset = 0; - } - else { - ccl_global SampleRange* prev_range = &sample_ranges[range-1]; - sample_range->work_offset = prev_range->work_offset + prev_range->w * prev_range->h; - } + kernel_set_sample_range(sample_ranges, range, buffer, rng_state, sample, sx, sy, sw, sh, offset, stride); } __kernel void kernel_ocl_shader( -- cgit v1.2.3