From 6bf4115c13962c99d1cdc97f2be92c4922f3fd33 Mon Sep 17 00:00:00 2001 From: Hristo Gueorguiev Date: Wed, 3 May 2017 15:30:45 +0200 Subject: Cycles: Split kernel - sort shaders Reduce thread divergence in kernel_shader_eval. Rays are sorted in blocks of 2048 according to shader->id. On R9 290 Classroom is ~30% faster, and Pabellon Barcelone is ~8% faster. No sorting for CUDA split kernel. Reviewers: sergey, maiself Reviewed By: maiself Differential Revision: https://developer.blender.org/D2598 --- intern/cycles/kernel/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'intern/cycles/kernel/CMakeLists.txt') diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index a92e8bc4aee..9bb0455b9d5 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -21,6 +21,8 @@ set(SRC kernels/opencl/kernel_lamp_emission.cl kernels/opencl/kernel_do_volume.cl kernels/opencl/kernel_indirect_background.cl + kernels/opencl/kernel_shader_setup.cl + kernels/opencl/kernel_shader_sort.cl kernels/opencl/kernel_shader_eval.cl kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl kernels/opencl/kernel_subsurface_scatter.cl @@ -248,6 +250,8 @@ set(SRC_SPLIT_HEADERS split/kernel_path_init.h split/kernel_queue_enqueue.h split/kernel_scene_intersect.h + split/kernel_shader_setup.h + split/kernel_shader_sort.h split/kernel_shader_eval.h split/kernel_shadow_blocked_ao.h split/kernel_shadow_blocked_dl.h @@ -457,6 +461,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_interse delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_sort.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) -- cgit v1.2.3