diff options
38 files changed, 1090 insertions, 313 deletions
diff --git a/SConstruct b/SConstruct index e1ac00bdac4..d05e6899a61 100644 --- a/SConstruct +++ b/SConstruct @@ -1028,7 +1028,6 @@ if env['OURPLATFORM']!='darwin': dir=os.path.join(env['BF_INSTALLDIR'], VERSION, 'scripts', 'addons','cycles', 'kernel') source=os.listdir('intern/cycles/kernel') if '__pycache__' in source: source.remove('__pycache__') - source.remove('kernel.cpp') source.remove('CMakeLists.txt') source.remove('SConscript') source.remove('svm') @@ -1036,6 +1035,7 @@ if env['OURPLATFORM']!='darwin': source.remove('geom') source.remove('shaders') source.remove('osl') + source.remove('split') source=['intern/cycles/kernel/'+s for s in source] source.append('intern/cycles/util/util_atomic.h') source.append('intern/cycles/util/util_color.h') @@ -1063,6 +1063,12 @@ if env['OURPLATFORM']!='darwin': if '__pycache__' in source: source.remove('__pycache__') source=['intern/cycles/kernel/geom/'+s for s in source] scriptinstall.append(env.Install(dir=dir,source=source)) + # split + dir=os.path.join(env['BF_INSTALLDIR'], VERSION, 'scripts', 'addons','cycles', 'kernel', 'split') + source=os.listdir('intern/cycles/kernel/split') + if '__pycache__' in source: source.remove('__pycache__') + source=['intern/cycles/kernel/split/'+s for s in source] + scriptinstall.append(env.Install(dir=dir,source=source)) # licenses dir=os.path.join(env['BF_INSTALLDIR'], VERSION, 'scripts', 'addons','cycles', 'license') diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index 75bb5bb0a97..99df8c299fc 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -34,12 +34,8 @@ cycles.Depends('../../source/blender/makesrna/intern/RNA_blender_cpp.h', 'makesr sources = cycles.Glob('bvh/*.cpp') + cycles.Glob('device/*.cpp') + cycles.Glob('kernel/*.cpp') + cycles.Glob('render/*.cpp') + cycles.Glob('subd/*.cpp') + cycles.Glob('util/*.cpp') + cycles.Glob('blender/*.cpp') +sources.append(path.join('kernel', 'kernels', 'cpu', 'kernel.cpp')) sources.remove(path.join('util', 'util_view.cpp')) -sources.remove(path.join('kernel', 'kernel_sse2.cpp')) -sources.remove(path.join('kernel', 'kernel_sse3.cpp')) -sources.remove(path.join('kernel', 'kernel_sse41.cpp')) -sources.remove(path.join('kernel', 'kernel_avx.cpp')) -sources.remove(path.join('kernel', 'kernel_avx2.cpp')) incs = [] defs = [] @@ -146,7 +142,7 @@ for kernel_type in kernel_flags.keys(): defs.append('WITH_KERNEL_' + kernel_type.upper()) for kernel_type in kernel_flags.keys(): - kernel_source = path.join('kernel', 'kernel_' + kernel_type + '.cpp') + kernel_source = path.join('kernel', 'kernels', 'cpu', 'kernel_' + kernel_type + '.cpp') kernel_cxxflags = Split(env['CXXFLAGS']) kernel_cxxflags.append(kernel_flags[kernel_type].split()) kernel_defs = defs[:] diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index f050153d914..8d690466c07 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -266,7 +266,7 @@ public: printf("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported.\n", cuda_version/10, cuda_version%10); /* compile */ - string kernel = path_join(kernel_path, "kernel.cu"); + string kernel = path_join(kernel_path, path_join("kernels", path_join("cuda", "kernel.cu"))); string include = kernel_path; const int machine = system_cpu_bits(); diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 88c2a2f58c3..d209340d0c4 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -748,7 +748,7 @@ public: } else { - string init_kernel_source = "#include \"kernel.cl\" // " + kernel_md5 + "\n"; + string init_kernel_source = "#include \"kernels/opencl/kernel.cl\" // " + kernel_md5 + "\n"; /* if does not exist or loading binary failed, compile kernel */ if(!compile_kernel(kernel_path, init_kernel_source, "", &cpProgram, debug_src)) @@ -1322,7 +1322,7 @@ public: /* Kernel loaded from binary, nothing to do. */ } else { - string init_kernel_source = "#include \"kernel.cl\" // " + + string init_kernel_source = "#include \"kernels/opencl/kernel.cl\" // " + kernel_md5 + "\n"; /* If does not exist or loading binary failed, compile kernel. */ if(!compile_kernel(kernel_path, @@ -1996,7 +1996,7 @@ public: #define GLUE(a, b) a ## b #define LOAD_KERNEL(name) \ do { \ - kernel_init_source = "#include \"kernel_" #name ".cl\" // " + \ + kernel_init_source = "#include \"kernels/opencl/kernel_" #name ".cl\" // " + \ kernel_md5 + "\n"; \ device_md5 = device_md5_hash(build_options); \ clbin = string_printf("cycles_kernel_%s_%s_" #name ".clbin", \ diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 3e16c4b5fd9..89dd3542ef6 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -12,20 +12,20 @@ set(INC_SYS ) set(SRC - kernel.cpp - kernel.cl - kernel_data_init.cl - kernel_queue_enqueue.cl - kernel_scene_intersect.cl - kernel_lamp_emission.cl - kernel_background_buffer_update.cl - kernel_shader_eval.cl - kernel_holdout_emission_blurring_pathtermination_ao.cl - kernel_direct_lighting.cl - kernel_shadow_blocked.cl - kernel_next_iteration_setup.cl - kernel_sum_all_radiance.cl - kernel.cu + kernels/cpu/kernel.cpp + kernels/opencl/kernel.cl + kernels/opencl/kernel_data_init.cl + kernels/opencl/kernel_queue_enqueue.cl + kernels/opencl/kernel_scene_intersect.cl + kernels/opencl/kernel_lamp_emission.cl + kernels/opencl/kernel_background_buffer_update.cl + kernels/opencl/kernel_shader_eval.cl + kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl + kernels/opencl/kernel_direct_lighting.cl + kernels/opencl/kernel_shadow_blocked.cl + kernels/opencl/kernel_next_iteration_setup.cl + kernels/opencl/kernel_sum_all_radiance.cl + kernels/cuda/kernel.cu ) set(SRC_HEADERS @@ -57,7 +57,6 @@ set(SRC_HEADERS kernel_shader.h kernel_shaderdata_vars.h kernel_shadow.h - kernel_split.h kernel_subsurface.h kernel_textures.h kernel_types.h @@ -162,6 +161,22 @@ set(SRC_UTIL_HEADERS ../util/util_transform.h ../util/util_types.h ) + +set(SRC_SPLIT_HEADERS + split/kernel_background_buffer_update.h + split/kernel_data_init.h + split/kernel_direct_lighting.h + split/kernel_holdout_emission_blurring_pathtermination_ao.h + split/kernel_lamp_emission.h + split/kernel_next_iteration_setup.h + split/kernel_queue_enqueue.h + split/kernel_scene_intersect.h + split/kernel_shader_eval.h + split/kernel_shadow_blocked.h + split/kernel_split_common.h + split/kernel_sum_all_radiance.h +) + # CUDA module if(WITH_CYCLES_CUDA_BINARIES) @@ -187,7 +202,7 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() # build for each arch - set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) + set(cuda_sources kernels/cuda/kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch experimental) @@ -213,7 +228,7 @@ if(WITH_CYCLES_CUDA_BINARIES) COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu + --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} @@ -261,28 +276,28 @@ include_directories(SYSTEM ${INC_SYS}) if(CXX_HAS_SSE) list(APPEND SRC - kernel_sse2.cpp - kernel_sse3.cpp - kernel_sse41.cpp + kernels/cpu/kernel_sse2.cpp + kernels/cpu/kernel_sse3.cpp + kernels/cpu/kernel_sse41.cpp ) - set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") - set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") - set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX) list(APPEND SRC - kernel_avx.cpp + kernels/cpu/kernel_avx.cpp ) - set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX2) list(APPEND SRC - kernel_avx2.cpp + kernels/cpu/kernel_avx2.cpp ) - set_source_files_properties(kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") endif() add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS}) @@ -301,22 +316,23 @@ endif() #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cu" ${CYCLES_INSTALL_PATH}/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/split) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 24cc4517eaf..e8d51013924 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -57,8 +57,9 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: build_dir = os.path.join(root_build_dir, 'intern/cycles/kernel') # source directories and files + kernel_file_rel = os.path.join("kernels", "cuda", "kernel.cu") source_dir = Dir('.').srcnode().path - kernel_file = os.path.join(source_dir, "kernel.cu") + kernel_file = os.path.join(source_dir, kernel_file_rel) util_dir = os.path.join(source_dir, "../util") svm_dir = os.path.join(source_dir, "../svm") geom_dir = os.path.join(source_dir, "../geom") @@ -83,7 +84,7 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: nvcc_flags += " -D__KERNEL_DEBUG__" # dependencies - dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') + dependencies = [kernel_file_rel] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') last_cubin_file = None configs = (("kernel_%s.cubin", ''), @@ -105,7 +106,7 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: else: command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, current_flags, kernel_file, cubin_file) - kernel.Command(cubin_file, 'kernel.cu', command) + kernel.Command(cubin_file, kernel_file_rel, command) kernel.Depends(cubin_file, dependencies) kernel_binaries.append(cubin_file) diff --git a/intern/cycles/kernel/kernel_split.h b/intern/cycles/kernel/kernel_split.h deleted file mode 100644 index d4bcb9b9d8f..00000000000 --- a/intern/cycles/kernel/kernel_split.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright 2011-2015 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _KERNEL_SPLIT_H_ -#define _KERNEL_SPLIT_H_ - -#include "kernel_compat_opencl.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" - -#include "util_atomic.h" - -#include "kernel_random.h" -#include "kernel_projection.h" -#include "kernel_montecarlo.h" -#include "kernel_differential.h" -#include "kernel_camera.h" - -#include "geom/geom.h" - -#include "kernel_accumulate.h" -#include "kernel_shader.h" -#include "kernel_light.h" -#include "kernel_passes.h" - -#ifdef __SUBSURFACE__ -#include "kernel_subsurface.h" -#endif - -#ifdef __VOLUME__ -#include "kernel_volume.h" -#endif - -#include "kernel_path_state.h" -#include "kernel_shadow.h" -#include "kernel_emission.h" -#include "kernel_path_common.h" -#include "kernel_path_surface.h" -#include "kernel_path_volume.h" - -#ifdef __KERNEL_DEBUG__ -#include "kernel_debug.h" -#endif - -#include "kernel_queues.h" -#include "kernel_work_stealing.h" - -#endif diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index a7eaa758f5d..a7eaa758f5d 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index f1027ad413d..f1027ad413d 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp index b2f16ff54d8..b2f16ff54d8 100644 --- a/intern/cycles/kernel/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp index cc8c603e8f8..cc8c603e8f8 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp index 20919a4f26e..20919a4f26e 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp index 48579d3b7e5..48579d3b7e5 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu index 64069fc049f..29bf67d9750 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernels/cuda/kernel.cu @@ -16,13 +16,13 @@ /* CUDA kernel entry points */ -#include "kernel_compat_cuda.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_bake.h" +#include "../../kernel_compat_cuda.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" +#include "../../kernel_film.h" +#include "../../kernel_path.h" +#include "../../kernel_bake.h" /* device data taken from CUDA occupancy calculator */ diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index cbc0592fe1f..bffcd53bab3 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -16,14 +16,14 @@ /* OpenCL kernel entry points - unfinished */ -#include "kernel_compat_opencl.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" +#include "../../kernel_compat_opencl.h" +#include "../../kernel_math.h" +#include "../../kernel_types.h" +#include "../../kernel_globals.h" -#include "kernel_film.h" -#include "kernel_path.h" -#include "kernel_bake.h" +#include "../../kernel_film.h" +#include "../../kernel_path.h" +#include "../../kernel_bake.h" #ifdef __COMPILE_ONLY_MEGAKERNEL__ @@ -34,7 +34,7 @@ __kernel void kernel_ocl_path_trace( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" int sample, int sx, int sy, int sw, int sh, int offset, int stride) @@ -45,7 +45,7 @@ __kernel void kernel_ocl_path_trace( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); int y = sy + get_global_id(1); @@ -63,7 +63,7 @@ __kernel void kernel_ocl_shader( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" int type, int sx, int sw, int offset, int sample) { @@ -73,7 +73,7 @@ __kernel void kernel_ocl_shader( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); @@ -88,7 +88,7 @@ __kernel void kernel_ocl_bake( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" int type, int sx, int sw, int offset, int sample) { @@ -98,7 +98,7 @@ __kernel void kernel_ocl_bake( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); @@ -124,7 +124,7 @@ __kernel void kernel_ocl_convert_to_byte( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -135,7 +135,7 @@ __kernel void kernel_ocl_convert_to_byte( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); int y = sy + get_global_id(1); @@ -151,7 +151,7 @@ __kernel void kernel_ocl_convert_to_half_float( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../../kernel_textures.h" float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) @@ -162,7 +162,7 @@ __kernel void kernel_ocl_convert_to_half_float( #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../../kernel_textures.h" int x = sx + get_global_id(0); int y = sy + get_global_id(1); diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl new file mode 100644 index 00000000000..2d1944d01e6 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl @@ -0,0 +1,81 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_background_buffer_update.h" + +__kernel void kernel_ocl_path_trace_background_buffer_update( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* Required for buffer Update */ + ccl_global float3 *throughput_coop, /* Required for background hit processing */ + PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */ + ccl_global Ray *Ray_coop, /* Required for background hit processing */ + ccl_global PathState *PathState_coop, /* Required for background hit processing */ + ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */ + ccl_global char *ray_state, /* Stores information on the current state of a ray */ + int sw, int sh, int sx, int sy, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global unsigned int *work_array, /* Denotes work of each ray */ + ccl_global int *Queue_data, /* Queues memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int end_sample, + int start_sample, +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_background_buffer_update(globals, + data, + shader_data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + L_transparent_coop, + ray_state, + sw, sh, sx, sy, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + work_array, + Queue_data, + Queue_index, + queuesize, + end_sample, + start_sample, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl new file mode 100644 index 00000000000..015f0872413 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_data_init.cl @@ -0,0 +1,242 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_data_init.h" + +__kernel void kernel_ocl_path_trace_data_init( + ccl_global char *globals, + ccl_global char *shader_data_sd, /* Arguments related to ShaderData */ + ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */ + + ccl_global float3 *P_sd, + ccl_global float3 *P_sd_DL_shadow, + + ccl_global float3 *N_sd, + ccl_global float3 *N_sd_DL_shadow, + + ccl_global float3 *Ng_sd, + ccl_global float3 *Ng_sd_DL_shadow, + + ccl_global float3 *I_sd, + ccl_global float3 *I_sd_DL_shadow, + + ccl_global int *shader_sd, + ccl_global int *shader_sd_DL_shadow, + + ccl_global int *flag_sd, + ccl_global int *flag_sd_DL_shadow, + + ccl_global int *prim_sd, + ccl_global int *prim_sd_DL_shadow, + + ccl_global int *type_sd, + ccl_global int *type_sd_DL_shadow, + + ccl_global float *u_sd, + ccl_global float *u_sd_DL_shadow, + + ccl_global float *v_sd, + ccl_global float *v_sd_DL_shadow, + + ccl_global int *object_sd, + ccl_global int *object_sd_DL_shadow, + + ccl_global float *time_sd, + ccl_global float *time_sd_DL_shadow, + + ccl_global float *ray_length_sd, + ccl_global float *ray_length_sd_DL_shadow, + + ccl_global int *ray_depth_sd, + ccl_global int *ray_depth_sd_DL_shadow, + + ccl_global int *transparent_depth_sd, + ccl_global int *transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + ccl_global differential3 *dP_sd, + ccl_global differential3 *dP_sd_DL_shadow, + + ccl_global differential3 *dI_sd, + ccl_global differential3 *dI_sd_DL_shadow, + + ccl_global differential *du_sd, + ccl_global differential *du_sd_DL_shadow, + + ccl_global differential *dv_sd, + ccl_global differential *dv_sd_DL_shadow, + + /* Dp/Du */ + ccl_global float3 *dPdu_sd, + ccl_global float3 *dPdu_sd_DL_shadow, + + ccl_global float3 *dPdv_sd, + ccl_global float3 *dPdv_sd_DL_shadow, + + /* Object motion. */ + ccl_global Transform *ob_tfm_sd, + ccl_global Transform *ob_tfm_sd_DL_shadow, + + ccl_global Transform *ob_itfm_sd, + ccl_global Transform *ob_itfm_sd_DL_shadow, + + ShaderClosure *closure_sd, + ShaderClosure *closure_sd_DL_shadow, + + ccl_global int *num_closure_sd, + ccl_global int *num_closure_sd_DL_shadow, + + ccl_global float *randb_closure_sd, + ccl_global float *randb_closure_sd_DL_shadow, + + ccl_global float3 *ray_P_sd, + ccl_global float3 *ray_P_sd_DL_shadow, + + ccl_global differential3 *ray_dP_sd, + ccl_global differential3 *ray_dP_sd_DL_shadow, + + ccl_constant KernelData *data, + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_state, + ccl_global uint *rng_coop, /* rng array to store rng values for all rays */ + ccl_global float3 *throughput_coop, /* throughput array to store throughput values for all rays */ + ccl_global float *L_transparent_coop, /* L_transparent array to store L_transparent values for all rays */ + PathRadiance *PathRadiance_coop, /* PathRadiance array to store PathRadiance values for all rays */ + ccl_global Ray *Ray_coop, /* Ray array to store Ray information for all rays */ + ccl_global PathState *PathState_coop, /* PathState array to store PathState information for all rays */ + ccl_global char *ray_state, /* Stores information on current state of a ray */ + +#define KERNEL_TEX(type, ttype, name) \ + ccl_global type *name, +#include "../../kernel_textures.h" + + int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, + int rng_state_offset_x, + int rng_state_offset_y, + int rng_state_stride, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global unsigned int *work_array, /* work array to store which work each ray belongs to */ +#ifdef __WORK_STEALING__ + ccl_global unsigned int *work_pool_wgs, /* Work pool for each work group */ + unsigned int num_samples, /* Total number of samples per pixel */ +#endif +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples /* Number of samples to be processed in parallel */ + ) +{ + kernel_data_init(globals, + shader_data_sd, + shader_data_sd_DL_shadow, + P_sd, + P_sd_DL_shadow, + N_sd, + N_sd_DL_shadow, + Ng_sd, + Ng_sd_DL_shadow, + I_sd, + I_sd_DL_shadow, + shader_sd, + shader_sd_DL_shadow, + flag_sd, + flag_sd_DL_shadow, + prim_sd, + prim_sd_DL_shadow, + type_sd, + type_sd_DL_shadow, + u_sd, + u_sd_DL_shadow, + v_sd, + v_sd_DL_shadow, + object_sd, + object_sd_DL_shadow, + time_sd, + time_sd_DL_shadow, + ray_length_sd, + ray_length_sd_DL_shadow, + ray_depth_sd, + ray_depth_sd_DL_shadow, + transparent_depth_sd, + transparent_depth_sd_DL_shadow, + + /* Ray differentials. */ + dP_sd, + dP_sd_DL_shadow, + dI_sd, + dI_sd_DL_shadow, + du_sd, + du_sd_DL_shadow, + dv_sd, + dv_sd_DL_shadow, + + /* Dp/Du */ + dPdu_sd, + dPdu_sd_DL_shadow, + dPdv_sd, + dPdv_sd_DL_shadow, + + /* Object motion. */ + ob_tfm_sd, + ob_tfm_sd_DL_shadow, + ob_itfm_sd, + ob_itfm_sd_DL_shadow, + + closure_sd, + closure_sd_DL_shadow, + num_closure_sd, + num_closure_sd_DL_shadow, + randb_closure_sd, + randb_closure_sd_DL_shadow, + ray_P_sd, + ray_P_sd_DL_shadow, + ray_dP_sd, + ray_dP_sd_DL_shadow, + data, + per_sample_output_buffers, + rng_state, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + ray_state, + +#define KERNEL_TEX(type, ttype, name) name, +#include "../../kernel_textures.h" + + start_sample, sx, sy, sw, sh, offset, stride, + rng_state_offset_x, + rng_state_offset_y, + rng_state_stride, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, + work_array, +#ifdef __WORK_STEALING__ + work_pool_wgs, + num_samples, +#endif +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl new file mode 100644 index 00000000000..0b22c6d0864 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl @@ -0,0 +1,47 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_direct_lighting.h" + +__kernel void kernel_ocl_path_trace_direct_lighting( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for direct lighting */ + ccl_global char *shader_DL, /* Required for direct lighting */ + ccl_global uint *rng_coop, /* Required for direct lighting */ + ccl_global PathState *PathState_coop, /* Required for direct lighting */ + ccl_global int *ISLamp_coop, /* Required for direct lighting */ + ccl_global Ray *LightRay_coop, /* Required for direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for direct lighting */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + kernel_direct_lighting(globals, + data, + shader_data, + shader_DL, + rng_coop, + PathState_coop, + ISLamp_coop, + LightRay_coop, + BSDFEval_coop, + ray_state, + Queue_data, + Queue_index, + queuesize); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl new file mode 100644 index 00000000000..502f10a7a59 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl @@ -0,0 +1,67 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" + +__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */ + ccl_global float *per_sample_output_buffers, + ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */ + ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */ + ccl_global float *L_transparent_coop, /* Required for handling holdout material */ + PathRadiance *PathRadiance_coop, /* Required for "kernel_write_data_passes" and indirect primitive emission */ + ccl_global PathState *PathState_coop, /* Required throughout the kernel and AO */ + Intersection *Intersection_coop, /* Required for indirect primitive emission */ + ccl_global float3 *AOAlpha_coop, /* Required for AO */ + ccl_global float3 *AOBSDF_coop, /* Required for AO */ + ccl_global Ray *AOLightRay_coop, /* Required for AO */ + int sw, int sh, int sx, int sy, int stride, + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global unsigned int *work_array, /* Denotes the work that each ray belongs to */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ +#ifdef __WORK_STEALING__ + unsigned int start_sample, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_holdout_emission_blurring_pathtermination_ao(globals, + data, + shader_data, + per_sample_output_buffers, + rng_coop, + throughput_coop, + L_transparent_coop, + PathRadiance_coop, + PathState_coop, + Intersection_coop, + AOAlpha_coop, + AOBSDF_coop, + AOLightRay_coop, + sw, sh, sx, sy, stride, + ray_state, + work_array, + Queue_data, + Queue_index, + queuesize, +#ifdef __WORK_STEALING__ + start_sample, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl new file mode 100644 index 00000000000..af83e68b53e --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl @@ -0,0 +1,52 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_lamp_emission.h" + +__kernel void kernel_ocl_path_trace_lamp_emission( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for lamp emission */ + ccl_global float3 *throughput_coop, /* Required for lamp emission */ + PathRadiance *PathRadiance_coop, /* Required for lamp emission */ + ccl_global Ray *Ray_coop, /* Required for lamp emission */ + ccl_global PathState *PathState_coop, /* Required for lamp emission */ + Intersection *Intersection_coop, /* Required for lamp emission */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* used to decide if this kernel should use queues to fetch ray index */ + int parallel_samples /* Number of samples to be processed in parallel */ + ) +{ + kernel_lamp_emission(globals, + data, + shader_data, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl new file mode 100644 index 00000000000..4acd991f0b4 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl @@ -0,0 +1,59 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_next_iteration_setup.h" + +__kernel void kernel_ocl_path_trace_next_iteration_setup( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Required for setting up ray for next iteration */ + ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */ + ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */ + PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *Ray_coop, /* Required for setting up ray for next iteration */ + ccl_global PathState *PathState_coop, /* Required for setting up ray for next iteration */ + ccl_global Ray *LightRay_dl_coop, /* Required for radiance update - direct lighting */ + ccl_global int *ISLamp_coop, /* Required for radiance update - direct lighting */ + ccl_global BsdfEval *BSDFEval_coop, /* Required for radiance update - direct lighting */ + ccl_global Ray *LightRay_ao_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOBSDF_coop, /* Required for radiance update - AO */ + ccl_global float3 *AOAlpha_coop, /* Required for radiance update - AO */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + ccl_global char *use_queues_flag) /* flag to decide if scene_intersect kernel should use queues to fetch ray index */ +{ + kernel_next_iteration_setup(globals, + data, + shader_data, + rng_coop, + throughput_coop, + PathRadiance_coop, + Ray_coop, + PathState_coop, + LightRay_dl_coop, + ISLamp_coop, + BSDFEval_coop, + LightRay_ao_coop, + AOBSDF_coop, + AOAlpha_coop, + ray_state, + Queue_data, + Queue_index, + queuesize, + use_queues_flag); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl new file mode 100644 index 00000000000..62cf08c387d --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_queue_enqueue.h" + +__kernel void kernel_ocl_path_trace_queue_enqueue( + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int queuesize) /* Size (capacity) of each queue */ +{ + kernel_queue_enqueue(Queue_data, + Queue_index, + ray_state, + queuesize); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl new file mode 100644 index 00000000000..d219874d391 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl @@ -0,0 +1,53 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_scene_intersect.h" + +__kernel void kernel_ocl_path_trace_scene_intersect( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global uint *rng_coop, + ccl_global Ray *Ray_coop, /* Required for scene_intersect */ + ccl_global PathState *PathState_coop, /* Required for scene_intersect */ + Intersection *Intersection_coop, /* Required for scene_intersect */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + int sw, int sh, + ccl_global int *Queue_data, /* Memory for queues */ + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* Size (capacity) of queues */ + ccl_global char *use_queues_flag, /* used to decide if this kernel should use queues to fetch ray index */ +#ifdef __KERNEL_DEBUG__ + DebugData *debugdata_coop, +#endif + int parallel_samples) /* Number of samples to be processed in parallel */ +{ + kernel_scene_intersect(globals, + data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + sw, sh, + Queue_data, + Queue_index, + queuesize, + use_queues_flag, +#ifdef __KERNEL_DEBUG__ + debugdata_coop, +#endif + parallel_samples); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl new file mode 100644 index 00000000000..04769d7d792 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl @@ -0,0 +1,43 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shader_eval.h" + +__kernel void kernel_ocl_path_trace_shader_eval( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_data, /* Output ShaderData structure to be filled */ + ccl_global uint *rng_coop, /* Required for rbsdf calculation */ + ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */ + ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */ + Intersection *Intersection_coop, /* Required for setting up shader from ray */ + ccl_global char *ray_state, /* Denotes the state of each ray */ + ccl_global int *Queue_data, /* queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize) /* Size (capacity) of each queue */ +{ + kernel_shader_eval(globals, + data, + shader_data, + rng_coop, + Ray_coop, + PathState_coop, + Intersection_coop, + ray_state, + Queue_data, + Queue_index, + queuesize); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl new file mode 100644 index 00000000000..9d57364c8d6 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl @@ -0,0 +1,47 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_shadow_blocked.h" + +__kernel void kernel_ocl_path_trace_shadow_blocked( + ccl_global char *globals, + ccl_constant KernelData *data, + ccl_global char *shader_shadow, /* Required for shadow blocked */ + ccl_global PathState *PathState_coop, /* Required for shadow blocked */ + ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */ + ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */ + Intersection *Intersection_coop_AO, + Intersection *Intersection_coop_DL, + ccl_global char *ray_state, + ccl_global int *Queue_data, /* Queue memory */ + ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ + int queuesize, /* Size (capacity) of each queue */ + int total_num_rays) +{ + kernel_shadow_blocked(globals, + data, + shader_shadow, + PathState_coop, + LightRay_dl_coop, + LightRay_ao_coop, + Intersection_coop_AO, + Intersection_coop_DL, + ray_state, + Queue_data, + Queue_index, + queuesize, + total_num_rays); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl new file mode 100644 index 00000000000..88a1ed830af --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl @@ -0,0 +1,38 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "split/kernel_sum_all_radiance.h" + +__kernel void kernel_ocl_path_trace_sum_all_radiance( + ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ + ccl_global float *buffer, /* Output buffer of RenderTile */ + ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ + int parallel_samples, int sw, int sh, int stride, + int buffer_offset_x, + int buffer_offset_y, + int buffer_stride, + int start_sample) +{ + kernel_sum_all_radiance(data, + buffer, + per_sample_output_buffer, + parallel_samples, + sw, sh, stride, + buffer_offset_x, + buffer_offset_y, + buffer_stride, + start_sample); +} diff --git a/intern/cycles/kernel/kernel_background_buffer_update.cl b/intern/cycles/kernel/split/kernel_background_buffer_update.h index bf08477cfbf..95de1a4b2a9 100644 --- a/intern/cycles/kernel/kernel_background_buffer_update.cl +++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_background_buffer_update kernel. + * Note on kernel_background_buffer_update kernel. * This is the fourth kernel in the ray tracing logic, and the third * of the path iteration kernels. This kernel takes care of rays that hit * the background (sceneintersect kernel), and for the rays of @@ -33,30 +33,30 @@ * * The input and output are as follows, * - * rng_coop ---------------------------------------------|--- kernel_ocl_path_trace_background_buffer_update --|--- PathRadiance_coop - * throughput_coop --------------------------------------| |--- L_transparent_coop - * per_sample_output_buffers ----------------------------| |--- per_sample_output_buffers - * Ray_coop ---------------------------------------------| |--- ray_state - * PathState_coop ---------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) - * L_transparent_coop -----------------------------------| |--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) - * ray_state --------------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ----| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) - * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- work_array - * parallel_samples -------------------------------------| |--- PathState_coop - * end_sample -------------------------------------------| |--- throughput_coop - * kg (globals + data) ----------------------------------| |--- rng_coop - * rng_state --------------------------------------------| |--- Ray - * PathRadiance_coop ------------------------------------| | - * sw ---------------------------------------------------| | - * sh ---------------------------------------------------| | - * sx ---------------------------------------------------| | - * sy ---------------------------------------------------| | - * stride -----------------------------------------------| | - * work_array -------------------------------------------| |--- work_array - * queuesize --------------------------------------------| | - * start_sample -----------------------------------------| |--- work_pool_wgs - * work_pool_wgs ----------------------------------------| | - * num_samples ------------------------------------------| | + * rng_coop ---------------------------------------------|--- kernel_background_buffer_update --|--- PathRadiance_coop + * throughput_coop --------------------------------------| |--- L_transparent_coop + * per_sample_output_buffers ----------------------------| |--- per_sample_output_buffers + * Ray_coop ---------------------------------------------| |--- ray_state + * PathState_coop ---------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * L_transparent_coop -----------------------------------| |--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * ray_state --------------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ----| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- work_array + * parallel_samples -------------------------------------| |--- PathState_coop + * end_sample -------------------------------------------| |--- throughput_coop + * kg (globals + data) ----------------------------------| |--- rng_coop + * rng_state --------------------------------------------| |--- Ray + * PathRadiance_coop ------------------------------------| | + * sw ---------------------------------------------------| | + * sh ---------------------------------------------------| | + * sx ---------------------------------------------------| | + * sy ---------------------------------------------------| | + * stride -----------------------------------------------| | + * work_array -------------------------------------------| |--- work_array + * queuesize --------------------------------------------| | + * start_sample -----------------------------------------| |--- work_pool_wgs + * work_pool_wgs ----------------------------------------| | + * num_samples ------------------------------------------| | * * note on shader_data : shader_data argument is neither an input nor an output for this kernel. It is just filled and consumed here itself. * Note on Queues : @@ -70,7 +70,7 @@ * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty */ -__kernel void kernel_ocl_path_trace_background_buffer_update( +ccl_device void kernel_background_buffer_update( ccl_global char *globals, ccl_constant KernelData *data, ccl_global char *shader_data, diff --git a/intern/cycles/kernel/kernel_data_init.cl b/intern/cycles/kernel/split/kernel_data_init.h index 62b5c4e6a29..b7a4d847d03 100644 --- a/intern/cycles/kernel/kernel_data_init.cl +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_data_initialization kernel + * Note on kernel_data_initialization kernel * This kernel Initializes structures needed in path-iteration kernels. * This is the first kernel in ray-tracing logic. * @@ -25,33 +25,33 @@ * * Its input and output are as follows, * - * Un-initialized rng---------------|--- kernel_ocl_path_trace_data_initialization ---|--- Initialized rng - * Un-initialized throughput -------| |--- Initialized throughput - * Un-initialized L_transparent ----| |--- Initialized L_transparent - * Un-initialized PathRadiance -----| |--- Initialized PathRadiance - * Un-initialized Ray --------------| |--- Initialized Ray - * Un-initialized PathState --------| |--- Initialized PathState - * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT) - * Un-initilaized QueueIndex -------| |--- Initialized QueueIndex (to 0) - * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false) - * Un-initialized ray_state --------| |--- Initialized ray_state - * parallel_samples --------------- | |--- Initialized per_sample_output_buffers - * rng_state -----------------------| |--- Initialized work_array - * data ----------------------------| |--- Initialized work_pool_wgs - * start_sample --------------------| | - * sx ------------------------------| | - * sy ------------------------------| | - * sw ------------------------------| | - * sh ------------------------------| | - * stride --------------------------| | - * queuesize -----------------------| | - * num_samples ---------------------| | + * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng + * Un-initialized throughput -------| |--- Initialized throughput + * Un-initialized L_transparent ----| |--- Initialized L_transparent + * Un-initialized PathRadiance -----| |--- Initialized PathRadiance + * Un-initialized Ray --------------| |--- Initialized Ray + * Un-initialized PathState --------| |--- Initialized PathState + * Un-initialized QueueData --------| |--- Initialized QueueData (to QUEUE_EMPTY_SLOT) + * Un-initilaized QueueIndex -------| |--- Initialized QueueIndex (to 0) + * Un-initialized use_queues_flag---| |--- Initialized use_queues_flag (to false) + * Un-initialized ray_state --------| |--- Initialized ray_state + * parallel_samples --------------- | |--- Initialized per_sample_output_buffers + * rng_state -----------------------| |--- Initialized work_array + * data ----------------------------| |--- Initialized work_pool_wgs + * start_sample --------------------| | + * sx ------------------------------| | + * sy ------------------------------| | + * sw ------------------------------| | + * sh ------------------------------| | + * stride --------------------------| | + * queuesize -----------------------| | + * num_samples ---------------------| | * * Note on Queues : * All slots in queues are initialized to queue empty slot; * The number of elements in the queues is initialized to 0; */ -__kernel void kernel_ocl_path_trace_data_init( +ccl_device void kernel_data_init( ccl_global char *globals, ccl_global char *shader_data_sd, /* Arguments related to ShaderData */ ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */ @@ -156,7 +156,7 @@ __kernel void kernel_ocl_path_trace_data_init( #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name, -#include "kernel_textures.h" +#include "../kernel_textures.h" int start_sample, int sx, int sy, int sw, int sh, int offset, int stride, int rng_state_offset_x, @@ -184,7 +184,7 @@ __kernel void kernel_ocl_path_trace_data_init( kg->data = data; #define KERNEL_TEX(type, ttype, name) \ kg->name = name; -#include "kernel_textures.h" +#include "../kernel_textures.h" /* Load ShaderData structure */ ShaderData *sd = (ShaderData *)shader_data_sd; diff --git a/intern/cycles/kernel/kernel_direct_lighting.cl b/intern/cycles/kernel/split/kernel_direct_lighting.h index f874122c508..6b83d892057 100644 --- a/intern/cycles/kernel/kernel_direct_lighting.cl +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_direct_lighting kernel. + * Note on kernel_direct_lighting kernel. * This is the eighth kernel in the ray tracing logic. This is the seventh * of the path iteration kernels. This kernel takes care of direct lighting * logic. However, the "shadow ray cast" part of direct lighting is handled @@ -29,13 +29,13 @@ * * The input and output are as follows, * - * rng_coop -----------------------------------------|--- kernel_ocl_path_trace_direct_lighting --|--- BSDFEval_coop - * PathState_coop -----------------------------------| |--- ISLamp_coop - * shader_data --------------------------------------| |--- LightRay_coop - * ray_state ----------------------------------------| |--- ray_state - * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| | - * kg (globals + data) ------------------------------| | - * queuesize ----------------------------------------| | + * rng_coop -----------------------------------------|--- kernel_direct_lighting --|--- BSDFEval_coop + * PathState_coop -----------------------------------| |--- ISLamp_coop + * shader_data --------------------------------------| |--- LightRay_coop + * ray_state ----------------------------------------| |--- ray_state + * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| | + * kg (globals + data) ------------------------------| | + * queuesize ----------------------------------------| | * * note on shader_DL : shader_DL is neither input nor output to this kernel; shader_DL is filled and consumed in this kernel itself. * Note on Queues : @@ -49,7 +49,7 @@ * QUEUE_SHADOW_RAY_CAST_DL_RAYS queue will be filled with rays for which a shadow_blocked function must be executed, after this * kernel call. Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty. */ -__kernel void kernel_ocl_path_trace_direct_lighting( +ccl_device void kernel_direct_lighting( ccl_global char *globals, ccl_constant KernelData *data, ccl_global char *shader_data, /* Required for direct lighting */ diff --git a/intern/cycles/kernel/kernel_holdout_emission_blurring_pathtermination_ao.cl b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index a2e57771522..393ea4bcadc 100644 --- a/intern/cycles/kernel/kernel_holdout_emission_blurring_pathtermination_ao.cl +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao kernel. + * Note on kernel_holdout_emission_blurring_pathtermination_ao kernel. * This is the sixth kernel in the ray tracing logic. This is the fifth * of the path iteration kernels. This kernel takes care of the logic to process * "material of type holdout", indirect primitive emission, bsdf blurring, @@ -31,27 +31,27 @@ * * The input and output are as follows, * - * rng_coop ---------------------------------------------|--- kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) - * throughput_coop --------------------------------------| |--- PathState_coop - * PathRadiance_coop ------------------------------------| |--- throughput_coop - * Intersection_coop ------------------------------------| |--- L_transparent_coop - * PathState_coop ---------------------------------------| |--- per_sample_output_buffers - * L_transparent_coop -----------------------------------| |--- PathRadiance_coop - * shader_data ------------------------------------------| |--- ShaderData - * ray_state --------------------------------------------| |--- ray_state - * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- AOAlpha_coop - * kg (globals + data) ----------------------------------| |--- AOBSDF_coop - * parallel_samples -------------------------------------| |--- AOLightRay_coop - * per_sample_output_buffers ----------------------------| | - * sw ---------------------------------------------------| | - * sh ---------------------------------------------------| | - * sx ---------------------------------------------------| | - * sy ---------------------------------------------------| | - * stride -----------------------------------------------| | - * work_array -------------------------------------------| | - * queuesize --------------------------------------------| | - * start_sample -----------------------------------------| | + * rng_coop ---------------------------------------------|--- kernel_holdout_emission_blurring_pathtermination_ao ---|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * throughput_coop --------------------------------------| |--- PathState_coop + * PathRadiance_coop ------------------------------------| |--- throughput_coop + * Intersection_coop ------------------------------------| |--- L_transparent_coop + * PathState_coop ---------------------------------------| |--- per_sample_output_buffers + * L_transparent_coop -----------------------------------| |--- PathRadiance_coop + * shader_data ------------------------------------------| |--- ShaderData + * ray_state --------------------------------------------| |--- ray_state + * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- AOAlpha_coop + * kg (globals + data) ----------------------------------| |--- AOBSDF_coop + * parallel_samples -------------------------------------| |--- AOLightRay_coop + * per_sample_output_buffers ----------------------------| | + * sw ---------------------------------------------------| | + * sh ---------------------------------------------------| | + * sx ---------------------------------------------------| | + * sy ---------------------------------------------------| | + * stride -----------------------------------------------| | + * work_array -------------------------------------------| | + * queuesize --------------------------------------------| | + * start_sample -----------------------------------------| | * * Note on Queues : * This kernel fetches rays from the queue QUEUE_ACTIVE_AND_REGENERATED_RAYS and processes only @@ -72,7 +72,7 @@ * QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO */ -__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao( +ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( ccl_global char *globals, ccl_constant KernelData *data, ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */ diff --git a/intern/cycles/kernel/kernel_lamp_emission.cl b/intern/cycles/kernel/split/kernel_lamp_emission.h index e7f8b227dd8..f400a99e229 100644 --- a/intern/cycles/kernel/kernel_lamp_emission.cl +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -14,33 +14,33 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_lamp_emission + * Note on kernel_lamp_emission * This is the 3rd kernel in the ray-tracing logic. This is the second of the * path-iteration kernels. This kernel takes care of the indirect lamp emission logic. * This kernel operates on QUEUE_ACTIVE_AND_REGENERATED_RAYS. It processes rays of state RAY_ACTIVE * and RAY_HIT_BACKGROUND. * We will empty QUEUE_ACTIVE_AND_REGENERATED_RAYS queue in this kernel. * The input/output of the kernel is as follows, - * Throughput_coop ------------------------------------|--- kernel_ocl_path_trace_lamp_emission --|--- PathRadiance_coop - * Ray_coop -------------------------------------------| |--- Queue_data(QUEUE_ACTIVE_AND_REGENERATED_RAYS) - * PathState_coop -------------------------------------| |--- Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) - * kg (globals + data) --------------------------------| | - * Intersection_coop ----------------------------------| | - * ray_state ------------------------------------------| | - * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -----| | - * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ----| | - * queuesize ------------------------------------------| | - * use_queues_flag ------------------------------------| | - * sw -------------------------------------------------| | - * sh -------------------------------------------------| | - * parallel_samples -----------------------------------| | + * Throughput_coop ------------------------------------|--- kernel_lamp_emission --|--- PathRadiance_coop + * Ray_coop -------------------------------------------| |--- Queue_data(QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * PathState_coop -------------------------------------| |--- Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) + * kg (globals + data) --------------------------------| | + * Intersection_coop ----------------------------------| | + * ray_state ------------------------------------------| | + * Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -----| | + * Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ----| | + * queuesize ------------------------------------------| | + * use_queues_flag ------------------------------------| | + * sw -------------------------------------------------| | + * sh -------------------------------------------------| | + * parallel_samples -----------------------------------| | * - * note : shader_data is neither input nor output. Its just filled and consumed in the same, kernel_ocl_path_trace_lamp_emission, kernel. + * note : shader_data is neither input nor output. Its just filled and consumed in the same, kernel_lamp_emission, kernel. */ -__kernel void kernel_ocl_path_trace_lamp_emission( +ccl_device void kernel_lamp_emission( ccl_global char *globals, ccl_constant KernelData *data, ccl_global char *shader_data, /* Required for lamp emission */ diff --git a/intern/cycles/kernel/kernel_next_iteration_setup.cl b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index 3c0e4e9240d..343dbb06e99 100644 --- a/intern/cycles/kernel/kernel_next_iteration_setup.cl +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_setup_next_iteration kernel. + * Note on kernel_setup_next_iteration kernel. * This is the tenth kernel in the ray tracing logic. This is the ninth * of the path iteration kernels. This kernel takes care of setting up * Ray for the next iteration of path-iteration and accumulating radiance @@ -27,16 +27,16 @@ * * The input and output are as follows, * - * rng_coop ---------------------------------------------|--- kernel_ocl_path_trace_next_iteration_setup -|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) - * throughput_coop --------------------------------------| |--- Queue_data (QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) - * PathRadiance_coop ------------------------------------| |--- throughput_coop - * PathState_coop ---------------------------------------| |--- PathRadiance_coop - * shader_data ------------------------------------------| |--- PathState_coop - * ray_state --------------------------------------------| |--- ray_state - * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS) --------| |--- Ray_coop - * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- use_queues_flag - * Ray_coop ---------------------------------------------| | - * kg (globals + data) ----------------------------------| | + * rng_coop ---------------------------------------------|--- kernel_next_iteration_setup -|--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * throughput_coop --------------------------------------| |--- Queue_data (QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * PathRadiance_coop ------------------------------------| |--- throughput_coop + * PathState_coop ---------------------------------------| |--- PathRadiance_coop + * shader_data ------------------------------------------| |--- PathState_coop + * ray_state --------------------------------------------| |--- ray_state + * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS) --------| |--- Ray_coop + * Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- use_queues_flag + * Ray_coop ---------------------------------------------| | + * kg (globals + data) ----------------------------------| | * LightRay_dl_coop -------------------------------------| * ISLamp_coop ------------------------------------------| * BSDFEval_coop ----------------------------------------| @@ -61,7 +61,7 @@ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays */ -__kernel void kernel_ocl_path_trace_next_iteration_setup( +ccl_device void kernel_next_iteration_setup( ccl_global char *globals, ccl_constant KernelData *data, ccl_global char *shader_data, /* Required for setting up ray for next iteration */ diff --git a/intern/cycles/kernel/kernel_queue_enqueue.cl b/intern/cycles/kernel/split/kernel_queue_enqueue.h index eee7860fb84..9bcf8f540b4 100644 --- a/intern/cycles/kernel/kernel_queue_enqueue.cl +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -14,26 +14,26 @@ * limitations under the License. */ -#include "kernel_compat_opencl.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" -#include "kernel_queues.h" +#include "../kernel_compat_opencl.h" +#include "../kernel_math.h" +#include "../kernel_types.h" +#include "../kernel_globals.h" +#include "../kernel_queues.h" /* - * The kernel "kernel_ocl_path_trace_queue_enqueue" enqueues rays of + * The kernel "kernel_queue_enqueue" enqueues rays of * different ray state into their appropriate Queues; * 1. Rays that have been determined to hit the background from the - * "kernel_ocl_path_trace_scene_intersect" kernel + * "kernel_scene_intersect" kernel * are enqueued in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; * 2. Rays that have been determined to be actively participating in path-iteration will be enqueued into QUEUE_ACTIVE_AND_REGENERATED_RAYS. * * The input and output of the kernel is as follows, * - * ray_state -------------------------------------------|--- kernel_ocl_path_trace_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) - * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| | - * queuesize -------------------------------------------| | + * ray_state -------------------------------------------|--- kernel_queue_enqueue --|--- Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ------| |--- Queue_index (QUEUE_ACTIVE_AND_REGENERATED_RAYS & QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS) + * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| | + * queuesize -------------------------------------------| | * * Note on Queues : * State of queues during the first time this kernel is called : @@ -52,7 +52,7 @@ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. */ -__kernel void kernel_ocl_path_trace_queue_enqueue( +ccl_device void kernel_queue_enqueue( ccl_global int *Queue_data, /* Queue memory */ ccl_global int *Queue_index, /* Tracks the number of elements in each queue */ ccl_global char *ray_state, /* Denotes the state of each ray */ diff --git a/intern/cycles/kernel/kernel_scene_intersect.cl b/intern/cycles/kernel/split/kernel_scene_intersect.h index 6817e28a302..01e0b1fd19e 100644 --- a/intern/cycles/kernel/kernel_scene_intersect.cl +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_scene_intersect kernel. + * Note on kernel_scene_intersect kernel. * This is the second kernel in the ray tracing logic. This is the first * of the path iteration kernels. This kernel takes care of scene_intersect function. * @@ -27,20 +27,20 @@ * * The input and output are as follows, * - * Ray_coop ---------------------------------------|--------- kernel_ocl_path_trace_scene_intersect----------|--- PathState - * PathState_coop ---------------------------------| |--- Intersection - * ray_state --------------------------------------| |--- ray_state - * use_queues_flag --------------------------------| | - * parallel_samples -------------------------------| | - * QueueData(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| | - * kg (data + globals) ----------------------------| | - * rng_coop ---------------------------------------| | - * sw ---------------------------------------------| | - * sh ---------------------------------------------| | - * queuesize --------------------------------------| | + * Ray_coop ---------------------------------------|--------- kernel_scene_intersect----------|--- PathState + * PathState_coop ---------------------------------| |--- Intersection + * ray_state --------------------------------------| |--- ray_state + * use_queues_flag --------------------------------| | + * parallel_samples -------------------------------| | + * QueueData(QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| | + * kg (data + globals) ----------------------------| | + * rng_coop ---------------------------------------| | + * sw ---------------------------------------------| | + * sh ---------------------------------------------| | + * queuesize --------------------------------------| | * * Note on Queues : - * Ideally we would want kernel_ocl_path_trace_scene_intersect to work on queues. + * Ideally we would want kernel_scene_intersect to work on queues. * But during the very first time, the queues wil be empty and hence we perform a direct mapping * between ray-index and thread-index; From the next time onward, the queue will be filled and * we may start operating on queues. @@ -63,7 +63,7 @@ * QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS - no change */ -__kernel void kernel_ocl_path_trace_scene_intersect( +ccl_device void kernel_scene_intersect( ccl_global char *globals, ccl_constant KernelData *data, ccl_global uint *rng_coop, diff --git a/intern/cycles/kernel/kernel_shader_eval.cl b/intern/cycles/kernel/split/kernel_shader_eval.h index b3983081be6..0a8d77f52b0 100644 --- a/intern/cycles/kernel/kernel_shader_eval.cl +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -14,25 +14,25 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_shader_eval kernel + * Note on kernel_shader_eval kernel * This kernel is the 5th kernel in the ray tracing logic. This is * the 4rd kernel in path iteration. This kernel sets up the ShaderData * structure from the values computed by the previous kernels. It also identifies * the rays of state RAY_TO_REGENERATE and enqueues them in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. * * The input and output of the kernel is as follows, - * rng_coop -------------------------------------------|--- kernel_ocl_path_trace_shader_eval --|--- shader_data - * Ray_coop -------------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) - * PathState_coop -------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) - * Intersection_coop ----------------------------------| | - * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS)-------| | - * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)---| | - * ray_state ------------------------------------------| | - * kg (globals + data) --------------------------------| | - * queuesize ------------------------------------------| | + * rng_coop -------------------------------------------|--- kernel_shader_eval --|--- shader_data + * Ray_coop -------------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * PathState_coop -------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) + * Intersection_coop ----------------------------------| | + * Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS)-------| | + * Queue_index(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)---| | + * ray_state ------------------------------------------| | + * kg (globals + data) --------------------------------| | + * queuesize ------------------------------------------| | * * Note on Queues : * This kernel reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes @@ -46,7 +46,7 @@ * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays */ -__kernel void kernel_ocl_path_trace_shader_eval( +ccl_device void kernel_shader_eval( ccl_global char *globals, ccl_constant KernelData *data, ccl_global char *shader_data, /* Output ShaderData structure to be filled */ diff --git a/intern/cycles/kernel/kernel_shadow_blocked.cl b/intern/cycles/kernel/split/kernel_shadow_blocked.h index 2fc4be6d528..71fab19518c 100644 --- a/intern/cycles/kernel/kernel_shadow_blocked.cl +++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h @@ -14,26 +14,26 @@ * limitations under the License. */ -#include "kernel_split.h" +#include "kernel_split_common.h" /* - * Note on kernel_ocl_path_trace_shadow_blocked kernel. + * Note on kernel_shadow_blocked kernel. * This is the ninth kernel in the ray tracing logic. This is the eighth * of the path iteration kernels. This kernel takes care of "shadow ray cast" * logic of the direct lighting and AO part of ray tracing. * * The input and output are as follows, * - * PathState_coop ----------------------------------|--- kernel_ocl_path_trace_shadow_blocked --| - * LightRay_dl_coop --------------------------------| |--- LightRay_dl_coop - * LightRay_ao_coop --------------------------------| |--- LightRay_ao_coop - * ray_state ---------------------------------------| |--- ray_state - * Queue_data(QUEUE_SHADOW_RAY_CAST_AO_RAYS & | |--- Queue_data (QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_AO_RAYS) - QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | + * PathState_coop ----------------------------------|--- kernel_shadow_blocked --| + * LightRay_dl_coop --------------------------------| |--- LightRay_dl_coop + * LightRay_ao_coop --------------------------------| |--- LightRay_ao_coop + * ray_state ---------------------------------------| |--- ray_state + * Queue_data(QUEUE_SHADOW_RAY_CAST_AO_RAYS & | |--- Queue_data (QUEUE_SHADOW_RAY_CAST_AO_RAYS & QUEUE_SHADOW_RAY_CAST_AO_RAYS) + QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | * Queue_index(QUEUE_SHADOW_RAY_CAST_AO_RAYS& - QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | - * kg (globals + data) -----------------------------| | - * queuesize ---------------------------------------| | + QUEUE_SHADOW_RAY_CAST_DL_RAYS) -------| | + * kg (globals + data) -----------------------------| | + * queuesize ---------------------------------------| | * * Note on shader_shadow : shader_shadow is neither input nor output to this kernel. shader_shadow is filled and consumed in this kernel itself. * Note on queues : @@ -47,7 +47,7 @@ * QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit. */ -__kernel void kernel_ocl_path_trace_shadow_blocked( +ccl_device void kernel_shadow_blocked( ccl_global char *globals, ccl_constant KernelData *data, ccl_global char *shader_shadow, /* Required for shadow blocked */ diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h new file mode 100644 index 00000000000..f5830be6bb1 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -0,0 +1,62 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _KERNEL_SPLIT_H_ +#define _KERNEL_SPLIT_H_ + +#include "../kernel_compat_opencl.h" +#include "../kernel_math.h" +#include "../kernel_types.h" +#include "../kernel_globals.h" + +#include "../util_atomic.h" + +#include "../kernel_random.h" +#include "../kernel_projection.h" +#include "../kernel_montecarlo.h" +#include "../kernel_differential.h" +#include "../kernel_camera.h" + +#include "../geom/geom.h" + +#include "../kernel_accumulate.h" +#include "../kernel_shader.h" +#include "../kernel_light.h" +#include "../kernel_passes.h" + +#ifdef __SUBSURFACE__ +#include "../kernel_subsurface.h" +#endif + +#ifdef __VOLUME__ +#include "../kernel_volume.h" +#endif + +#include "../kernel_path_state.h" +#include "../kernel_shadow.h" +#include "../kernel_emission.h" +#include "../kernel_path_common.h" +#include "../kernel_path_surface.h" +#include "../kernel_path_volume.h" + +#ifdef __KERNEL_DEBUG__ +#include "../kernel_debug.h" +#endif + +#include "../kernel_queues.h" +#include "../kernel_work_stealing.h" + +#endif diff --git a/intern/cycles/kernel/kernel_sum_all_radiance.cl b/intern/cycles/kernel/split/kernel_sum_all_radiance.h index 739a85d4cc8..eeb7da76e73 100644 --- a/intern/cycles/kernel/kernel_sum_all_radiance.cl +++ b/intern/cycles/kernel/split/kernel_sum_all_radiance.h @@ -14,10 +14,10 @@ * limitations under the License. */ -#include "kernel_compat_opencl.h" -#include "kernel_math.h" -#include "kernel_types.h" -#include "kernel_globals.h" +#include "../kernel_compat_opencl.h" +#include "../kernel_math.h" +#include "../kernel_types.h" +#include "../kernel_globals.h" /* * Since we process various samples in parallel; The output radiance of different samples @@ -25,7 +25,7 @@ * by all different samples and stores them in the RenderTile's output buffer. */ -__kernel void kernel_ocl_path_trace_sum_all_radiance( +ccl_device void kernel_sum_all_radiance( ccl_constant KernelData *data, /* To get pass_stride to offet into buffer */ ccl_global float *buffer, /* Output buffer of RenderTile */ ccl_global float *per_sample_output_buffer, /* Radiance contributed by all samples */ |