Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMai Lavelle <mai.lavelle@gmail.com>2017-03-01 09:05:55 +0300
committerMai Lavelle <mai.lavelle@gmail.com>2017-03-08 09:30:43 +0300
commit4cf501b83557ed5d64dbd2ddb13e1e8c5add88f5 (patch)
tree9f142597016bae22e73137aeeaf4d8107af3d8c8 /intern/cycles
parent5b8f1c8d342274e08d1a489b655ca4138eb1c5fc (diff)
Cycles: Split path initialization into own kernel
This makes it easier to initialize things correctly in the data_init kernel before they are needed by path tracing.
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/device/device_split_kernel.cpp4
-rw-r--r--intern/cycles/device/device_split_kernel.h1
-rw-r--r--intern/cycles/kernel/CMakeLists.txt3
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h1
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h3
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu2
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_path_init.cl26
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h101
-rw-r--r--intern/cycles/kernel/split/kernel_path_init.h104
9 files changed, 144 insertions, 101 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index c50afe85da5..85da7024a2c 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -41,6 +41,7 @@ DeviceSplitKernel::~DeviceSplitKernel()
device->mem_free(queue_index);
device->mem_free(work_pool_wgs);
+ delete kernel_path_init;
delete kernel_scene_intersect;
delete kernel_lamp_emission;
delete kernel_queue_enqueue;
@@ -61,6 +62,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
return false; \
}
+ LOAD_KERNEL(path_init);
LOAD_KERNEL(scene_intersect);
LOAD_KERNEL(lamp_emission);
LOAD_KERNEL(queue_enqueue);
@@ -200,6 +202,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
return false;
}
+ ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
+
bool activeRaysAvailable = true;
while(activeRaysAvailable) {
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index b3106fd5632..1903574f0b5 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -55,6 +55,7 @@ class DeviceSplitKernel {
private:
Device *device;
+ SplitKernelFunction *kernel_path_init;
SplitKernelFunction *kernel_scene_intersect;
SplitKernelFunction *kernel_lamp_emission;
SplitKernelFunction *kernel_queue_enqueue;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 685955170b5..d467e40b3e9 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRC
kernels/cpu/kernel_split.cpp
kernels/opencl/kernel.cl
kernels/opencl/kernel_data_init.cl
+ kernels/opencl/kernel_path_init.cl
kernels/opencl/kernel_queue_enqueue.cl
kernels/opencl/kernel_scene_intersect.cl
kernels/opencl/kernel_lamp_emission.cl
@@ -201,6 +202,7 @@ set(SRC_SPLIT_HEADERS
split/kernel_holdout_emission_blurring_pathtermination_ao.h
split/kernel_lamp_emission.h
split/kernel_next_iteration_setup.h
+ split/kernel_path_init.h
split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h
split/kernel_shader_eval.h
@@ -400,6 +402,7 @@ endif()
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 1d710157817..8c1675665cb 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -71,6 +71,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
+DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index c59f4892546..f6e0591ef24 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -38,6 +38,7 @@
# include "split/kernel_split_common.h"
# include "split/kernel_data_init.h"
+# include "split/kernel_path_init.h"
# include "split/kernel_scene_intersect.h"
# include "split/kernel_lamp_emission.h"
# include "split/kernel_queue_enqueue.h"
@@ -163,6 +164,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
kernel_##name(kg); \
}
+DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
@@ -186,6 +188,7 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
REGISTER(shader);
REGISTER(data_init);
+ REGISTER(path_init);
REGISTER(scene_intersect);
REGISTER(lamp_emission);
REGISTER(queue_enqueue);
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 441cd96fafa..3a883265157 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -25,6 +25,7 @@
#include "../../split/kernel_split_common.h"
#include "../../split/kernel_data_init.h"
+#include "../../split/kernel_path_init.h"
#include "../../split/kernel_scene_intersect.h"
#include "../../split/kernel_lamp_emission.h"
#include "../../split/kernel_queue_enqueue.h"
@@ -81,6 +82,7 @@ kernel_cuda_path_trace_data_init(
kernel_##name(NULL); \
}
+DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
new file mode 100644
index 00000000000..7e9e4a02529
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_path_init.h"
+
+__kernel void kernel_ocl_path_trace_path_init(
+ KernelGlobals *kg,
+ ccl_constant KernelData *data)
+{
+ kernel_path_init(kg);
+}
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 5604363dcd9..982c7be2008 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -18,33 +18,6 @@ CCL_NAMESPACE_BEGIN
/* Note on kernel_data_initialization kernel
* This kernel Initializes structures needed in path-iteration kernels.
- * This is the first kernel in ray-tracing logic.
- *
- * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
- *
- * Its input and output are as follows,
- *
- * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng
- * Un-initialized throughput -------| |--- Initialized throughput
- * Un-initialized L_transparent ----| |--- Initialized L_transparent
- * Un-initialized PathRadiance -----| |--- Initialized PathRadiance
- * Un-initialized Ray --------------| |--- Initialized Ray
- * Un-initialized PathState --------| |--- Initialized PathState
- * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT)
- * Un-initialized QueueIndex -------| |--- Initialized QueueIndex (to 0)
- * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false)
- * Un-initialized ray_state --------| |--- Initialized ray_state
- * parallel_samples --------------- | |--- Initialized per_sample_output_buffers
- * rng_state -----------------------| |--- Initialized work_array
- * data ----------------------------| |--- Initialized work_pool_wgs
- * start_sample --------------------| |
- * sx ------------------------------| |
- * sy ------------------------------| |
- * sw ------------------------------| |
- * sh ------------------------------| |
- * stride --------------------------| |
- * queuesize -----------------------| |
- * num_samples ---------------------| |
*
* Note on Queues :
* All slots in queues are initialized to queue empty slot;
@@ -137,80 +110,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
*/
*use_queues_flag = 0;
}
-
- int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
-
- /* This is the first assignment to ray_state;
- * So we dont use ASSIGN_RAY_STATE macro.
- */
- kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
-
- unsigned int my_sample;
- unsigned int pixel_x;
- unsigned int pixel_y;
- unsigned int tile_x;
- unsigned int tile_y;
- unsigned int my_sample_tile;
-
- unsigned int work_index = 0;
- /* Get work. */
- if(!get_next_work(kg, &work_index, ray_index)) {
- /* No more work, mark ray as inactive */
- kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
-
- return;
- }
-
- /* Get the sample associated with the work. */
- my_sample = get_work_sample(kg, work_index, ray_index) + start_sample;
-
- my_sample_tile = 0;
-
- /* Get pixel and tile position associated with the work. */
- get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
- &tile_x, &tile_y,
- work_index,
- ray_index);
- kernel_split_state.work_array[ray_index] = work_index;
-
- rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
-
- ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
- per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride;
-
- /* Initialize random numbers and ray. */
- kernel_path_trace_setup(kg,
- rng_state,
- my_sample,
- pixel_x, pixel_y,
- &kernel_split_state.rng[ray_index],
- &kernel_split_state.ray[ray_index]);
-
- if(kernel_split_state.ray[ray_index].t != 0.0f) {
- /* Initialize throughput, L_transparent, Ray, PathState;
- * These rays proceed with path-iteration.
- */
- kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
- kernel_split_state.L_transparent[ray_index] = 0.0f;
- path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
- path_state_init(kg,
- &kernel_split_state.sd_DL_shadow[ray_index],
- &kernel_split_state.path_state[ray_index],
- &kernel_split_state.rng[ray_index],
- my_sample,
- &kernel_split_state.ray[ray_index]);
-#ifdef __KERNEL_DEBUG__
- debug_data_init(&kernel_split_state.debug_data[ray_index]);
-#endif
- }
- else {
- /* These rays do not participate in path-iteration. */
- float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- /* Accumulate result in output buffer. */
- kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
- path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
- }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
new file mode 100644
index 00000000000..e613db214ed
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_path_init.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* This kernel initializes structures needed in path-iteration kernels.
+ * This is the first kernel in ray-tracing logic.
+ *
+ * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
+ */
+
+ccl_device void kernel_path_init(KernelGlobals *kg) {
+ int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
+
+ /* This is the first assignment to ray_state;
+ * So we dont use ASSIGN_RAY_STATE macro.
+ */
+ kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
+
+ unsigned int my_sample;
+ unsigned int pixel_x;
+ unsigned int pixel_y;
+ unsigned int tile_x;
+ unsigned int tile_y;
+ unsigned int my_sample_tile;
+
+ unsigned int work_index = 0;
+ /* Get work. */
+ if(!get_next_work(kg, &work_index, ray_index)) {
+ /* No more work, mark ray as inactive */
+ kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
+
+ return;
+ }
+
+ /* Get the sample associated with the work. */
+ my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
+
+ my_sample_tile = 0;
+
+ /* Get pixel and tile position associated with the work. */
+ get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
+ &tile_x, &tile_y,
+ work_index,
+ ray_index);
+ kernel_split_state.work_array[ray_index] = work_index;
+
+ ccl_global uint *rng_state = kernel_split_params.rng_state;
+ rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride;
+
+ ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
+ per_sample_output_buffers += (tile_x + tile_y * kernel_split_params.stride + my_sample_tile)
+ * kernel_data.film.pass_stride;
+
+ /* Initialize random numbers and ray. */
+ kernel_path_trace_setup(kg,
+ rng_state,
+ my_sample,
+ pixel_x, pixel_y,
+ &kernel_split_state.rng[ray_index],
+ &kernel_split_state.ray[ray_index]);
+
+ if(kernel_split_state.ray[ray_index].t != 0.0f) {
+ /* Initialize throughput, L_transparent, Ray, PathState;
+ * These rays proceed with path-iteration.
+ */
+ kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
+ kernel_split_state.L_transparent[ray_index] = 0.0f;
+ path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
+ path_state_init(kg,
+ &kernel_split_state.sd_DL_shadow[ray_index],
+ &kernel_split_state.path_state[ray_index],
+ &kernel_split_state.rng[ray_index],
+ my_sample,
+ &kernel_split_state.ray[ray_index]);
+#ifdef __KERNEL_DEBUG__
+ debug_data_init(&kernel_split_state.debug_data[ray_index]);
+#endif
+ }
+ else {
+ /* These rays do not participate in path-iteration. */
+ float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ /* Accumulate result in output buffer. */
+ kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
+ path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
+ ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
+ }
+}
+
+CCL_NAMESPACE_END
+