Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMai Lavelle <mai.lavelle@gmail.com>2017-03-04 14:29:01 +0300
committerMai Lavelle <mai.lavelle@gmail.com>2017-03-08 09:31:30 +0300
commit306034790fb75ca2c182d55132287cc0ed4b9c2f (patch)
tree272a582174d84ebcb25c0bf3a4922d29b6c0b96f /intern/cycles/kernel
parent997e345bd25bf28a8a5d67d5ffcb5b70ff52ecdd (diff)
Cycles: Calculate size of split state buffer kernel side
By calculating the size of the state buffer in the kernel rather than the host less code is needed and the size actually reflects the requested features. Will also be a little faster in some cases because of larger global work size.
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu7
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl29
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h28
5 files changed, 45 insertions, 23 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index df40c3a0e8e..6867ab02318 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRC
kernels/cpu/kernel.cpp
kernels/cpu/kernel_split.cpp
kernels/opencl/kernel.cl
+ kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl
@@ -399,6 +400,7 @@ endif()
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 53a36b15e40..759475b175f 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -41,6 +41,13 @@
/* kernels */
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_state_buffer_size(uint num_threads, uint *size)
+{
+ *size = split_data_buffer_size(NULL, num_threads);
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_path_trace_data_init(
ccl_global void *split_data_buffer,
int num_elements,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
new file mode 100644
index 00000000000..0a1843ff8bd
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+
+__kernel void kernel_ocl_path_trace_state_buffer_size(
+ KernelGlobals *kg,
+ ccl_constant KernelData *data,
+ uint num_threads,
+ ccl_global uint *size)
+{
+ kg->data = data;
+ *size = split_data_buffer_size(kg, num_threads);
+}
+
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 785103a79ac..9b62d65ffd9 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -93,7 +93,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
kernel_split_params.buffer = buffer;
- split_data_init(&kernel_split_state, num_elements, split_data_buffer, ray_state);
+ split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
#ifdef __KERNEL_OPENCL__
#define KERNEL_TEX(type, ttype, name) \
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 5dd53f42478..0a2ba8d1e1a 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -78,6 +78,8 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 2) \
SPLIT_DATA_DEBUG_ENTRIES \
/* struct that holds pointers to data in the shared state buffer */
@@ -86,37 +88,25 @@ typedef struct SplitData {
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
- /* size calculation for these is non trivial, so they are left out of SPLIT_DATA_ENTRIES and handled separately */
- ShaderData *sd;
- ShaderData *sd_DL_shadow;
-
/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
* the host easily) but is still used the same as the other data so we have it here in this struct as well
*/
ccl_global char *ray_state;
} SplitData;
-#define SIZEOF_SD(max_closure) (sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - (max_closure))))
-
-ccl_device_inline size_t split_data_buffer_size(size_t num_elements,
- size_t max_closure,
- size_t per_thread_output_buffer_size)
+/* TODO: find a way to get access to kg here */
+ccl_device_inline size_t split_data_buffer_size(ccl_global void *kg, size_t num_elements)
{
size_t size = 0;
#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
- /* TODO(sergey): This will actually over-allocate if
- * particular kernel does not support multiclosure.
- */
- size += align_up(num_elements * SIZEOF_SD(max_closure), 16); /* sd */
- size += align_up(2 * num_elements * SIZEOF_SD(max_closure), 16); /* sd_DL_shadow */
-
return size;
}
-ccl_device_inline void split_data_init(ccl_global SplitData *split_data,
+ccl_device_inline void split_data_init(ccl_global void *kg,
+ ccl_global SplitData *split_data,
size_t num_elements,
ccl_global void *data,
ccl_global char *ray_state)
@@ -128,12 +118,6 @@ ccl_device_inline void split_data_init(ccl_global SplitData *split_data,
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
- split_data->sd = (ShaderData*)p;
- p += align_up(num_elements * SIZEOF_SD(MAX_CLOSURE), 16);
-
- split_data->sd_DL_shadow = (ShaderData*)p;
- p += align_up(2 * num_elements * SIZEOF_SD(MAX_CLOSURE), 16);
-
split_data->ray_state = ray_state;
}