diff options
author | Mai Lavelle <mai.lavelle@gmail.com> | 2017-03-01 09:05:55 +0300 |
---|---|---|
committer | Mai Lavelle <mai.lavelle@gmail.com> | 2017-03-08 09:30:43 +0300 |
commit | 4cf501b83557ed5d64dbd2ddb13e1e8c5add88f5 (patch) | |
tree | 9f142597016bae22e73137aeeaf4d8107af3d8c8 /intern | |
parent | 5b8f1c8d342274e08d1a489b655ca4138eb1c5fc (diff) |
Cycles: Split path initialization into own kernel
This makes it easier to initialize things correctly in the data_init kernel
before they are needed by path tracing.
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/device/device_split_kernel.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_split_kernel.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cuda/kernel_split.cu | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/opencl/kernel_path_init.cl | 26 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_data_init.h | 101 | ||||
-rw-r--r-- | intern/cycles/kernel/split/kernel_path_init.h | 104 |
9 files changed, 144 insertions, 101 deletions
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index c50afe85da5..85da7024a2c 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -41,6 +41,7 @@ DeviceSplitKernel::~DeviceSplitKernel() device->mem_free(queue_index); device->mem_free(work_pool_wgs); + delete kernel_path_init; delete kernel_scene_intersect; delete kernel_lamp_emission; delete kernel_queue_enqueue; @@ -61,6 +62,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe return false; \ } + LOAD_KERNEL(path_init); LOAD_KERNEL(scene_intersect); LOAD_KERNEL(lamp_emission); LOAD_KERNEL(queue_enqueue); @@ -200,6 +202,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, return false; } + ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size); + bool activeRaysAvailable = true; while(activeRaysAvailable) { diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index b3106fd5632..1903574f0b5 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -55,6 +55,7 @@ class DeviceSplitKernel { private: Device *device; + SplitKernelFunction *kernel_path_init; SplitKernelFunction *kernel_scene_intersect; SplitKernelFunction *kernel_lamp_emission; SplitKernelFunction *kernel_queue_enqueue; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 685955170b5..d467e40b3e9 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -16,6 +16,7 @@ set(SRC kernels/cpu/kernel_split.cpp kernels/opencl/kernel.cl kernels/opencl/kernel_data_init.cl + kernels/opencl/kernel_path_init.cl kernels/opencl/kernel_queue_enqueue.cl kernels/opencl/kernel_scene_intersect.cl kernels/opencl/kernel_lamp_emission.cl @@ -201,6 +202,7 @@ set(SRC_SPLIT_HEADERS split/kernel_holdout_emission_blurring_pathtermination_ao.h split/kernel_lamp_emission.h split/kernel_next_iteration_setup.h + split/kernel_path_init.h split/kernel_queue_enqueue.h split/kernel_scene_intersect.h split/kernel_shader_eval.h @@ -400,6 +402,7 @@ endif() delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 1d710157817..8c1675665cb 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -71,6 +71,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( #define DECLARE_SPLIT_KERNEL_FUNCTION(name) \ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data); +DECLARE_SPLIT_KERNEL_FUNCTION(path_init) DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index c59f4892546..f6e0591ef24 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -38,6 +38,7 @@ # include "split/kernel_split_common.h" # include "split/kernel_data_init.h" +# include "split/kernel_path_init.h" # include "split/kernel_scene_intersect.h" # include "split/kernel_lamp_emission.h" # include "split/kernel_queue_enqueue.h" @@ -163,6 +164,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, kernel_##name(kg); \ } +DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) @@ -186,6 +188,7 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, REGISTER(shader); REGISTER(data_init); + REGISTER(path_init); REGISTER(scene_intersect); REGISTER(lamp_emission); REGISTER(queue_enqueue); diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu index 441cd96fafa..3a883265157 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu @@ -25,6 +25,7 @@ #include "../../split/kernel_split_common.h" #include "../../split/kernel_data_init.h" +#include "../../split/kernel_path_init.h" #include "../../split/kernel_scene_intersect.h" #include "../../split/kernel_lamp_emission.h" #include "../../split/kernel_queue_enqueue.h" @@ -81,6 +82,7 @@ kernel_cuda_path_trace_data_init( kernel_##name(NULL); \ } +DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl new file mode 100644 index 00000000000..7e9e4a02529 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" +#include "split/kernel_path_init.h" + +__kernel void kernel_ocl_path_trace_path_init( + KernelGlobals *kg, + ccl_constant KernelData *data) +{ + kernel_path_init(kg); +} diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 5604363dcd9..982c7be2008 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -18,33 +18,6 @@ CCL_NAMESPACE_BEGIN /* Note on kernel_data_initialization kernel * This kernel Initializes structures needed in path-iteration kernels. - * This is the first kernel in ray-tracing logic. - * - * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE - * - * Its input and output are as follows, - * - * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng - * Un-initialized throughput -------| |--- Initialized throughput - * Un-initialized L_transparent ----| |--- Initialized L_transparent - * Un-initialized PathRadiance -----| |--- Initialized PathRadiance - * Un-initialized Ray --------------| |--- Initialized Ray - * Un-initialized PathState --------| |--- Initialized PathState - * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT) - * Un-initialized QueueIndex -------| |--- Initialized QueueIndex (to 0) - * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false) - * Un-initialized ray_state --------| |--- Initialized ray_state - * parallel_samples --------------- | |--- Initialized per_sample_output_buffers - * rng_state -----------------------| |--- Initialized work_array - * data ----------------------------| |--- Initialized work_pool_wgs - * start_sample --------------------| | - * sx ------------------------------| | - * sy ------------------------------| | - * sw ------------------------------| | - * sh ------------------------------| | - * stride --------------------------| | - * queuesize -----------------------| | - * num_samples ---------------------| | * * Note on Queues : * All slots in queues are initialized to queue empty slot; @@ -137,80 +110,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( */ *use_queues_flag = 0; } - - int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); - - /* This is the first assignment to ray_state; - * So we dont use ASSIGN_RAY_STATE macro. - */ - kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; - - unsigned int my_sample; - unsigned int pixel_x; - unsigned int pixel_y; - unsigned int tile_x; - unsigned int tile_y; - unsigned int my_sample_tile; - - unsigned int work_index = 0; - /* Get work. */ - if(!get_next_work(kg, &work_index, ray_index)) { - /* No more work, mark ray as inactive */ - kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; - - return; - } - - /* Get the sample associated with the work. */ - my_sample = get_work_sample(kg, work_index, ray_index) + start_sample; - - my_sample_tile = 0; - - /* Get pixel and tile position associated with the work. */ - get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, - &tile_x, &tile_y, - work_index, - ray_index); - kernel_split_state.work_array[ray_index] = work_index; - - rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride; - - ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers; - per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride; - - /* Initialize random numbers and ray. */ - kernel_path_trace_setup(kg, - rng_state, - my_sample, - pixel_x, pixel_y, - &kernel_split_state.rng[ray_index], - &kernel_split_state.ray[ray_index]); - - if(kernel_split_state.ray[ray_index].t != 0.0f) { - /* Initialize throughput, L_transparent, Ray, PathState; - * These rays proceed with path-iteration. - */ - kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); - kernel_split_state.L_transparent[ray_index] = 0.0f; - path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); - path_state_init(kg, - &kernel_split_state.sd_DL_shadow[ray_index], - &kernel_split_state.path_state[ray_index], - &kernel_split_state.rng[ray_index], - my_sample, - &kernel_split_state.ray[ray_index]); -#ifdef __KERNEL_DEBUG__ - debug_data_init(&kernel_split_state.debug_data[ray_index]); -#endif - } - else { - /* These rays do not participate in path-iteration. */ - float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - /* Accumulate result in output buffer. */ - kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad); - path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]); - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); - } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h new file mode 100644 index 00000000000..e613db214ed --- /dev/null +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -0,0 +1,104 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* This kernel initializes structures needed in path-iteration kernels. + * This is the first kernel in ray-tracing logic. + * + * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE + */ + +ccl_device void kernel_path_init(KernelGlobals *kg) { + int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); + + /* This is the first assignment to ray_state; + * So we dont use ASSIGN_RAY_STATE macro. + */ + kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; + + unsigned int my_sample; + unsigned int pixel_x; + unsigned int pixel_y; + unsigned int tile_x; + unsigned int tile_y; + unsigned int my_sample_tile; + + unsigned int work_index = 0; + /* Get work. */ + if(!get_next_work(kg, &work_index, ray_index)) { + /* No more work, mark ray as inactive */ + kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; + + return; + } + + /* Get the sample associated with the work. */ + my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample; + + my_sample_tile = 0; + + /* Get pixel and tile position associated with the work. */ + get_work_pixel_tile_position(kg, &pixel_x, &pixel_y, + &tile_x, &tile_y, + work_index, + ray_index); + kernel_split_state.work_array[ray_index] = work_index; + + ccl_global uint *rng_state = kernel_split_params.rng_state; + rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride; + + ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers; + per_sample_output_buffers += (tile_x + tile_y * kernel_split_params.stride + my_sample_tile) + * kernel_data.film.pass_stride; + + /* Initialize random numbers and ray. */ + kernel_path_trace_setup(kg, + rng_state, + my_sample, + pixel_x, pixel_y, + &kernel_split_state.rng[ray_index], + &kernel_split_state.ray[ray_index]); + + if(kernel_split_state.ray[ray_index].t != 0.0f) { + /* Initialize throughput, L_transparent, Ray, PathState; + * These rays proceed with path-iteration. + */ + kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); + kernel_split_state.L_transparent[ray_index] = 0.0f; + path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); + path_state_init(kg, + &kernel_split_state.sd_DL_shadow[ray_index], + &kernel_split_state.path_state[ray_index], + &kernel_split_state.rng[ray_index], + my_sample, + &kernel_split_state.ray[ray_index]); +#ifdef __KERNEL_DEBUG__ + debug_data_init(&kernel_split_state.debug_data[ray_index]); +#endif + } + else { + /* These rays do not participate in path-iteration. */ + float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + /* Accumulate result in output buffer. */ + kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad); + path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]); + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); + } +} + +CCL_NAMESPACE_END + |