Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu7
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl29
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h2
-rw-r--r--intern/cycles/kernel/split/kernel_split_data.h28
5 files changed, 45 insertions, 23 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index df40c3a0e8e..6867ab02318 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRC
kernels/cpu/kernel.cpp
kernels/cpu/kernel_split.cpp
kernels/opencl/kernel.cl
+ kernels/opencl/kernel_state_buffer_size.cl
kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl
@@ -399,6 +400,7 @@ endif()
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_state_buffer_size.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 53a36b15e40..759475b175f 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -41,6 +41,13 @@
/* kernels */
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_state_buffer_size(uint num_threads, uint *size)
+{
+ *size = split_data_buffer_size(NULL, num_threads);
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_path_trace_data_init(
ccl_global void *split_data_buffer,
int num_elements,
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
new file mode 100644
index 00000000000..0a1843ff8bd
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+
+__kernel void kernel_ocl_path_trace_state_buffer_size(
+ KernelGlobals *kg,
+ ccl_constant KernelData *data,
+ uint num_threads,
+ ccl_global uint *size)
+{
+ kg->data = data;
+ *size = split_data_buffer_size(kg, num_threads);
+}
+
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 785103a79ac..9b62d65ffd9 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -93,7 +93,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
kernel_split_params.buffer = buffer;
- split_data_init(&kernel_split_state, num_elements, split_data_buffer, ray_state);
+ split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
#ifdef __KERNEL_OPENCL__
#define KERNEL_TEX(type, ttype, name) \
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 5dd53f42478..0a2ba8d1e1a 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -78,6 +78,8 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \
SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
+ SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 2) \
SPLIT_DATA_DEBUG_ENTRIES \
/* struct that holds pointers to data in the shared state buffer */
@@ -86,37 +88,25 @@ typedef struct SplitData {
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
- /* size calculation for these is non trivial, so they are left out of SPLIT_DATA_ENTRIES and handled separately */
- ShaderData *sd;
- ShaderData *sd_DL_shadow;
-
/* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
* the host easily) but is still used the same as the other data so we have it here in this struct as well
*/
ccl_global char *ray_state;
} SplitData;
-#define SIZEOF_SD(max_closure) (sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - (max_closure))))
-
-ccl_device_inline size_t split_data_buffer_size(size_t num_elements,
- size_t max_closure,
- size_t per_thread_output_buffer_size)
+/* TODO: find a way to get access to kg here */
+ccl_device_inline size_t split_data_buffer_size(ccl_global void *kg, size_t num_elements)
{
size_t size = 0;
#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
- /* TODO(sergey): This will actually over-allocate if
- * particular kernel does not support multiclosure.
- */
- size += align_up(num_elements * SIZEOF_SD(max_closure), 16); /* sd */
- size += align_up(2 * num_elements * SIZEOF_SD(max_closure), 16); /* sd_DL_shadow */
-
return size;
}
-ccl_device_inline void split_data_init(ccl_global SplitData *split_data,
+ccl_device_inline void split_data_init(ccl_global void *kg,
+ ccl_global SplitData *split_data,
size_t num_elements,
ccl_global void *data,
ccl_global char *ray_state)
@@ -128,12 +118,6 @@ ccl_device_inline void split_data_init(ccl_global SplitData *split_data,
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
- split_data->sd = (ShaderData*)p;
- p += align_up(num_elements * SIZEOF_SD(MAX_CLOSURE), 16);
-
- split_data->sd_DL_shadow = (ShaderData*)p;
- p += align_up(2 * num_elements * SIZEOF_SD(MAX_CLOSURE), 16);
-
split_data->ray_state = ray_state;
}