Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMai Lavelle <mai.lavelle@gmail.com>2017-01-14 10:01:00 +0300
committerMai Lavelle <mai.lavelle@gmail.com>2017-01-14 10:02:32 +0300
commit4923af4c773f64c54f1b46695ecb55525c03a437 (patch)
treeac79b96e189fd78cbba14bd348a73153a8166f10
parent2922c57c49983e7254c1f0fda65e4a232a091944 (diff)
Cycles: Deduplicate sample range setting and getting
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/kernel_sample_range.h99
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel.cu98
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel.cl54
4 files changed, 129 insertions, 123 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 56bcafbce38..e0fbc57edd2 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -72,6 +72,7 @@ set(SRC_HEADERS
kernel_projection.h
kernel_queues.h
kernel_random.h
+ kernel_sample_range.h
kernel_shader.h
kernel_shadow.h
kernel_subsurface.h
diff --git a/intern/cycles/kernel/kernel_sample_range.h b/intern/cycles/kernel/kernel_sample_range.h
new file mode 100644
index 00000000000..6694a7e959d
--- /dev/null
+++ b/intern/cycles/kernel/kernel_sample_range.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_set_sample_range(
+ ccl_global SampleRange *sample_ranges,
+ int range,
+ ccl_global float *buffer,
+ ccl_global uint *rng_state,
+ int sample,
+ int sx,
+ int sy,
+ int sw,
+ int sh,
+ int offset,
+ int stride)
+{
+ ccl_global SampleRange* sample_range = &sample_ranges[range];
+
+ sample_range->buffer = buffer;
+ sample_range->rng_state = rng_state;
+ sample_range->sample = sample;
+ sample_range->x = sx;
+ sample_range->y = sy;
+ sample_range->w = sw;
+ sample_range->h = sh;
+ sample_range->offset = offset;
+ sample_range->stride = stride;
+
+ if(range == 0) {
+ sample_range->work_offset = 0;
+ }
+ else {
+ ccl_global SampleRange* prev_range = &sample_ranges[range-1];
+ sample_range->work_offset = prev_range->work_offset + prev_range->w * prev_range->h;
+ }
+}
+
+ccl_device_inline bool kernel_pixel_sample_for_thread(
+ KernelGlobals *kg,
+ ccl_global SampleRange *sample_ranges,
+ int num_sample_ranges,
+ int *thread_x,
+ int *thread_y,
+ int *thread_sample,
+ ccl_global SampleRange **thread_sample_range)
+{
+ /* order threads to maintain inner block coherency */
+ const int group_id = ccl_group_id(0) + ccl_num_groups(0) * ccl_group_id(1);
+ const int local_thread_id = ccl_local_id(0) + ccl_local_id(1) * ccl_local_size(0);
+
+ const int thread_id = group_id * (ccl_local_size(0) * ccl_local_size(1)) + local_thread_id;
+
+ /* find which sample range belongs to this thread */
+ ccl_global SampleRange* sample_range = NULL;
+
+ for(int i = 0; i < num_sample_ranges; i++) {
+ if(thread_id >= sample_ranges[i].work_offset &&
+ thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h)
+ {
+ sample_range = &sample_ranges[i];
+ }
+ }
+
+ /* check if theres work for this thread */
+ if(!sample_range) {
+ return false;
+ }
+
+ int work_offset = thread_id - sample_range->work_offset;
+
+ if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) {
+ return false;
+ }
+
+ if(thread_sample_range) *thread_sample_range = sample_range;
+ if(thread_x) *thread_x = (work_offset % sample_range->w) + sample_range->x;
+ if(thread_y) *thread_y = (work_offset / sample_range->w) + sample_range->y;
+ if(thread_sample) *thread_sample = sample_range->sample;
+
+ return true;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 2c73ba48881..e90c2c902cc 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -24,6 +24,7 @@
#include "../../kernel_path.h"
#include "../../kernel_path_branched.h"
#include "../../kernel_bake.h"
+#include "../../kernel_sample_range.h"
/* device data taken from CUDA occupancy calculator */
@@ -123,68 +124,37 @@
/* kernels */
extern "C" __global__ void
-kernel_cuda_set_sample_range(SampleRange *sample_ranges, int range, float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+kernel_cuda_set_sample_range(
+ SampleRange *sample_ranges,
+ int range,
+ float *buffer,
+ uint *rng_state,
+ int sample,
+ int sx,
+ int sy,
+ int sw,
+ int sh,
+ int offset,
+ int stride)
{
- SampleRange* sample_range = &sample_ranges[range];
-
- sample_range->buffer = buffer;
- sample_range->rng_state = rng_state;
- sample_range->sample = sample;
- sample_range->x = sx;
- sample_range->y = sy;
- sample_range->w = sw;
- sample_range->h = sh;
- sample_range->offset = offset;
- sample_range->stride = stride;
-
- if(range == 0) {
- sample_range->work_offset = 0;
- }
- else {
- SampleRange* prev_range = &sample_ranges[range-1];
- sample_range->work_offset = prev_range->work_offset + prev_range->w * prev_range->h;
- }
+ kernel_set_sample_range(sample_ranges, range, buffer, rng_state, sample, sx, sy, sw, sh, offset, stride);
}
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_path_trace(SampleRange *sample_ranges, int num_sample_ranges)
{
- /* order threads to maintain inner block coherency */
- const int group_id = blockIdx.x + gridDim.x * blockIdx.y;
- const int local_thread_id = threadIdx.x + threadIdx.y * blockDim.x;
-
- const int thread_id = group_id * (blockDim.x * blockDim.x) + local_thread_id;
-
- /* find which sample range belongs to this thread */
- SampleRange* sample_range = NULL;
-
- for(int i = 0; i < num_sample_ranges; i++) {
- if(thread_id >= sample_ranges[i].work_offset &&
- thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h)
- {
- sample_range = &sample_ranges[i];
- }
- }
-
- /* check if theres work for this thread */
- if(!sample_range) {
- return;
- }
-
- int work_offset = thread_id - sample_range->work_offset;
+ ccl_global SampleRange* sample_range;
+ int x, y, sample;
- if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) {
+ if(!kernel_pixel_sample_for_thread(kg, sample_ranges, num_sample_ranges, &x, &y, &sample, &sample_range)) {
return;
}
- int x = (work_offset % sample_range->w) + sample_range->x;
- int y = (work_offset / sample_range->w) + sample_range->y;
-
kernel_path_trace(NULL,
sample_range->buffer,
sample_range->rng_state,
- sample_range->sample,
+ sample,
x, y,
sample_range->offset,
sample_range->stride);
@@ -195,41 +165,17 @@ extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
kernel_cuda_branched_path_trace(SampleRange *sample_ranges, int num_sample_ranges)
{
- /* order threads to maintain inner block coherency */
- const int group_id = blockIdx.x + gridDim.x * blockIdx.y;
- const int local_thread_id = threadIdx.x + threadIdx.y * blockDim.x;
-
- const int thread_id = group_id * (blockDim.x * blockDim.x) + local_thread_id;
-
- /* find which sample range belongs to this thread */
- SampleRange* sample_range = NULL;
-
- for(int i = 0; i < num_sample_ranges; i++) {
- if(thread_id >= sample_ranges[i].work_offset &&
- thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h)
- {
- sample_range = &sample_ranges[i];
- }
- }
+ ccl_global SampleRange* sample_range;
+ int x, y, sample;
- /* check if theres work for this thread */
- if(!sample_range) {
+ if(!kernel_pixel_sample_for_thread(kg, sample_ranges, num_sample_ranges, &x, &y, &sample, &sample_range)) {
return;
}
- int work_offset = thread_id - sample_range->work_offset;
-
- if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) {
- return;
- }
-
- int x = (work_offset % sample_range->w) + sample_range->x;
- int y = (work_offset / sample_range->w) + sample_range->y;
-
kernel_branched_path_trace(NULL,
sample_range->buffer,
sample_range->rng_state,
- sample_range->sample,
+ sample,
x, y,
sample_range->offset,
sample_range->stride);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 1816d01d3b8..77bf645ef49 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -45,6 +45,8 @@
#include "../../kernel_bake.h"
+#include "../../kernel_sample_range.h"
+
#ifdef __COMPILE_ONLY_MEGAKERNEL__
__kernel void kernel_ocl_path_trace(
@@ -65,41 +67,17 @@ __kernel void kernel_ocl_path_trace(
kg->name = name;
#include "../../kernel_textures.h"
- /* order threads to maintain inner block coherency */
- const int group_id = get_group_id(0) + get_num_groups(0) * get_group_id(1);
- const int local_thread_id = get_local_id(0) + get_local_id(1) * get_local_size(0);
-
- const int thread_id = group_id * (get_local_size(0) * get_local_size(1)) + local_thread_id;
-
- /* find which sample range belongs to this thread */
- ccl_global SampleRange* sample_range = NULL;
-
- for(int i = 0; i < num_sample_ranges; i++) {
- if(thread_id >= sample_ranges[i].work_offset &&
- thread_id < sample_ranges[i].work_offset + sample_ranges[i].w * sample_ranges[i].h)
- {
- sample_range = &sample_ranges[i];
- }
- }
-
- /* check if theres work for this thread */
- if(!sample_range) {
- return;
- }
-
- int work_offset = thread_id - sample_range->work_offset;
+ ccl_global SampleRange* sample_range;
+ int x, y, sample;
- if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) {
+ if(!kernel_pixel_sample_for_thread(kg, sample_ranges, num_sample_ranges, &x, &y, &sample, &sample_range)) {
return;
}
- int x = (work_offset % sample_range->w) + sample_range->x;
- int y = (work_offset / sample_range->w) + sample_range->y;
-
kernel_path_trace(kg,
sample_range->buffer,
sample_range->rng_state,
- sample_range->sample,
+ sample,
x, y,
sample_range->offset,
sample_range->stride);
@@ -121,25 +99,7 @@ __kernel void kernel_ocl_set_sample_range(
int offset,
int stride)
{
- ccl_global SampleRange* sample_range = &sample_ranges[range];
-
- sample_range->buffer = buffer;
- sample_range->rng_state = rng_state;
- sample_range->sample = sample;
- sample_range->x = sx;
- sample_range->y = sy;
- sample_range->w = sw;
- sample_range->h = sh;
- sample_range->offset = offset;
- sample_range->stride = stride;
-
- if(range == 0) {
- sample_range->work_offset = 0;
- }
- else {
- ccl_global SampleRange* prev_range = &sample_ranges[range-1];
- sample_range->work_offset = prev_range->work_offset + prev_range->w * prev_range->h;
- }
+ kernel_set_sample_range(sample_ranges, range, buffer, rng_state, sample, sx, sy, sw, sh, offset, stride);
}
__kernel void kernel_ocl_shader(