diff options
author | Hristo Gueorguiev <prem.nirved@gmail.com> | 2017-03-08 17:42:26 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-03-09 19:09:37 +0300 |
commit | 57e26627c485baab63e108821b2712d5e234ae7c (patch) | |
tree | e3ee5aa4bf759121559c69504b77d600552bf527 /intern | |
parent | 6c942db30dee14eb37229879656fa049a9ac6df6 (diff) |
Cycles: SSS and Volume rendering in split kernel
Decoupled ray marching is not supported yet.
Transparent shadows are always enabled for volume rendering.
Changes in kernel/bvh and kernel/geom are from Sergey.
This simiplifies code significantly, and prepares it for
record-all transparent shadow function in split kernel.
Diffstat (limited to 'intern')
46 files changed, 1022 insertions, 371 deletions
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index c740cada98b..18e8e274172 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -194,7 +194,7 @@ public: if(!use_patch_evaluation) { build_options += " -D__NO_PATCH_EVAL__"; } - if(!use_transparent) { + if(!use_transparent && !use_volume) { build_options += " -D__NO_TRANSPARENT__"; } return build_options; diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index b9705077fbf..6ab0b3c5777 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -35,13 +35,17 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device) kernel_path_init = NULL; kernel_scene_intersect = NULL; kernel_lamp_emission = NULL; + kernel_do_volume = NULL; kernel_queue_enqueue = NULL; - kernel_background_buffer_update = NULL; + kernel_indirect_background = NULL; kernel_shader_eval = NULL; kernel_holdout_emission_blurring_pathtermination_ao = NULL; + kernel_subsurface_scatter = NULL; kernel_direct_lighting = NULL; kernel_shadow_blocked = NULL; kernel_next_iteration_setup = NULL; + kernel_indirect_subsurface = NULL; + kernel_buffer_update = NULL; } DeviceSplitKernel::~DeviceSplitKernel() @@ -55,13 +59,17 @@ DeviceSplitKernel::~DeviceSplitKernel() delete kernel_path_init; delete kernel_scene_intersect; delete kernel_lamp_emission; + delete kernel_do_volume; delete kernel_queue_enqueue; - delete kernel_background_buffer_update; + delete kernel_indirect_background; delete kernel_shader_eval; delete kernel_holdout_emission_blurring_pathtermination_ao; + delete kernel_subsurface_scatter; delete kernel_direct_lighting; delete kernel_shadow_blocked; delete kernel_next_iteration_setup; + delete kernel_indirect_subsurface; + delete kernel_buffer_update; } bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features) @@ -75,13 +83,17 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe LOAD_KERNEL(path_init); LOAD_KERNEL(scene_intersect); LOAD_KERNEL(lamp_emission); + LOAD_KERNEL(do_volume); LOAD_KERNEL(queue_enqueue); - LOAD_KERNEL(background_buffer_update); + LOAD_KERNEL(indirect_background); LOAD_KERNEL(shader_eval); LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao); + LOAD_KERNEL(subsurface_scatter); LOAD_KERNEL(direct_lighting); LOAD_KERNEL(shadow_blocked); LOAD_KERNEL(next_iteration_setup); + LOAD_KERNEL(indirect_subsurface); + LOAD_KERNEL(buffer_update); #undef LOAD_KERNEL @@ -220,13 +232,18 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task, for(int PathIter = 0; PathIter < 16; PathIter++) { ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); - ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size); ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size); ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); + ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size); if(task->get_cancel()) { return true; diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index cc3e1aa26ae..abaf350cbbb 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -58,13 +58,17 @@ private: SplitKernelFunction *kernel_path_init; SplitKernelFunction *kernel_scene_intersect; SplitKernelFunction *kernel_lamp_emission; + SplitKernelFunction *kernel_do_volume; SplitKernelFunction *kernel_queue_enqueue; - SplitKernelFunction *kernel_background_buffer_update; + SplitKernelFunction *kernel_indirect_background; SplitKernelFunction *kernel_shader_eval; SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao; + SplitKernelFunction *kernel_subsurface_scatter; SplitKernelFunction *kernel_direct_lighting; SplitKernelFunction *kernel_shadow_blocked; SplitKernelFunction *kernel_next_iteration_setup; + SplitKernelFunction *kernel_indirect_subsurface; + SplitKernelFunction *kernel_buffer_update; /* Global memory variables [porting]; These memory is used for * co-operation between different kernels; Data written by one diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 1c740b5c6eb..30b3a6b52f7 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -21,12 +21,16 @@ set(SRC kernels/opencl/kernel_queue_enqueue.cl kernels/opencl/kernel_scene_intersect.cl kernels/opencl/kernel_lamp_emission.cl - kernels/opencl/kernel_background_buffer_update.cl + kernels/opencl/kernel_do_volume.cl + kernels/opencl/kernel_indirect_background.cl kernels/opencl/kernel_shader_eval.cl kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl + kernels/opencl/kernel_subsurface_scatter.cl kernels/opencl/kernel_direct_lighting.cl kernels/opencl/kernel_shadow_blocked.cl kernels/opencl/kernel_next_iteration_setup.cl + kernels/opencl/kernel_indirect_subsurface.cl + kernels/opencl/kernel_buffer_update.cl kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu ) @@ -71,6 +75,7 @@ set(SRC_HEADERS kernel_path_common.h kernel_path_state.h kernel_path_surface.h + kernel_path_subsurface.h kernel_path_volume.h kernel_projection.h kernel_queues.h @@ -196,10 +201,13 @@ set(SRC_UTIL_HEADERS ) set(SRC_SPLIT_HEADERS - split/kernel_background_buffer_update.h + split/kernel_buffer_update.h split/kernel_data_init.h split/kernel_direct_lighting.h + split/kernel_do_volume.h split/kernel_holdout_emission_blurring_pathtermination_ao.h + split/kernel_indirect_background.h + split/kernel_indirect_subsurface.h split/kernel_lamp_emission.h split/kernel_next_iteration_setup.h split/kernel_path_init.h @@ -210,6 +218,7 @@ set(SRC_SPLIT_HEADERS split/kernel_split_common.h split/kernel_split_data.h split/kernel_split_data_types.h + split/kernel_subsurface_scatter.h ) # CUDA module @@ -407,12 +416,16 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index df33a86bb18..b4f65bc8efd 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -309,9 +309,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, object = kernel_tex_fetch(__prim_object, -prim_addr-1); # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif triangle_intersect_precalc(dir, &isect_precalc); @@ -362,12 +362,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, } } else { - float ignore_t = FLT_MAX; - # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif triangle_intersect_precalc(dir, &isect_precalc); } diff --git a/intern/cycles/kernel/bvh/bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h index 889bbca21e2..583f7f7c469 100644 --- a/intern/cycles/kernel/bvh/bvh_subsurface.h +++ b/intern/cycles/kernel/bvh/bvh_subsurface.h @@ -75,16 +75,16 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) Transform ob_itfm; - bvh_instance_motion_push(kg, - subsurface_object, - ray, - &P, - &dir, - &idir, - &isect_t, - &ob_itfm); + isect_t = bvh_instance_motion_push(kg, + subsurface_object, + ray, + &P, + &dir, + &idir, + isect_t, + &ob_itfm); #else - bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); + isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t); #endif object = subsurface_object; } diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index 80c8f31473a..0eca0c8e38b 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -354,9 +354,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, object = kernel_tex_fetch(__prim_object, -prim_addr-1); # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); + isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); + isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); # endif triangle_intersect_precalc(dir, &isect_precalc); @@ -391,9 +391,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, /* instance pop */ # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif triangle_intersect_precalc(dir, &isect_precalc); diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h index 57e5b8d736d..136034aa484 100644 --- a/intern/cycles/kernel/bvh/bvh_volume.h +++ b/intern/cycles/kernel/bvh/bvh_volume.h @@ -238,9 +238,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, int object_flag = kernel_tex_fetch(__object_flag, object); if(object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); + isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); + isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); # endif triangle_intersect_precalc(dir, &isect_precalc); @@ -281,9 +281,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, /* instance pop */ # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif triangle_intersect_precalc(dir, &isect_precalc); diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h index 5a1accebaa0..6f3346e7634 100644 --- a/intern/cycles/kernel/bvh/bvh_volume_all.h +++ b/intern/cycles/kernel/bvh/bvh_volume_all.h @@ -288,11 +288,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, object = kernel_tex_fetch(__prim_object, -prim_addr-1); int object_flag = kernel_tex_fetch(__object_flag, object); if(object_flag & SD_OBJECT_HAS_VOLUME) { - # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif triangle_intersect_precalc(dir, &isect_precalc); @@ -348,11 +347,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, } } else { - float ignore_t = FLT_MAX; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif triangle_intersect_precalc(dir, &isect_precalc); } diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index 607295f9ed5..2a4da3eea82 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -390,9 +390,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, object = kernel_tex_fetch(__prim_object, -prim_addr-1); # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif num_hits_in_instance = 0; @@ -445,11 +445,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, } } else { - float ignore_t = FLT_MAX; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif } diff --git a/intern/cycles/kernel/bvh/qbvh_subsurface.h b/intern/cycles/kernel/bvh/qbvh_subsurface.h index 84dc4003133..a6431a94e6e 100644 --- a/intern/cycles/kernel/bvh/qbvh_subsurface.h +++ b/intern/cycles/kernel/bvh/qbvh_subsurface.h @@ -64,16 +64,16 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) Transform ob_itfm; - bvh_instance_motion_push(kg, - subsurface_object, - ray, - &P, - &dir, - &idir, - &isect_t, - &ob_itfm); + isect_t = bvh_instance_motion_push(kg, + subsurface_object, + ray, + &P, + &dir, + &idir, + isect_t, + &ob_itfm); #else - bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t); + isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t); #endif object = subsurface_object; } diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h index 10ae7bee852..c20a8f3703f 100644 --- a/intern/cycles/kernel/bvh/qbvh_traversal.h +++ b/intern/cycles/kernel/bvh/qbvh_traversal.h @@ -468,9 +468,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Instance pop. */ # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif qbvh_near_far_idx_calc(idir, diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h index dc6627e2dbb..859c5da808b 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume.h +++ b/intern/cycles/kernel/bvh/qbvh_volume.h @@ -295,9 +295,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int object_flag = kernel_tex_fetch(__object_flag, object); if(object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); + isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); + isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); # endif qbvh_near_far_idx_calc(idir, @@ -341,9 +341,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* Instance pop. */ # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif qbvh_near_far_idx_calc(idir, diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h index ff1fa92af6e..bbe588c878f 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume_all.h +++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h @@ -346,9 +346,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int object_flag = kernel_tex_fetch(__object_flag, object); if(object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif qbvh_near_far_idx_calc(idir, @@ -406,11 +406,10 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, } } else { - float ignore_t = FLT_MAX; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif } diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 5a04be8b0bf..6ecdfe0173a 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -425,7 +425,13 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir) /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t) +ccl_device_inline float bvh_instance_push(KernelGlobals *kg, + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float t) { Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); @@ -435,8 +441,11 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); *idir = bvh_inverse_direction(*dir); - if(*t != FLT_MAX) - *t *= len; + if(t != FLT_MAX) { + t *= len; + } + + return t; } #ifdef __QBVH__ @@ -473,16 +482,24 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg, /* Transorm ray to exit static object in BVH */ -ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t) +ccl_device_inline float bvh_instance_pop(KernelGlobals *kg, + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float t) { - if(*t != FLT_MAX) { + if(t != FLT_MAX) { Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *t /= len(transform_direction(&tfm, ray->D)); + t /= len(transform_direction(&tfm, ray->D)); } *P = ray->P; *dir = bvh_clamp_direction(ray->D); *idir = bvh_inverse_direction(*dir); + + return t; } /* Same as above, but returns scale factor to apply to multiple intersection distances */ @@ -501,13 +518,13 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ -ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, +ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, - ccl_addr_space float *t, + float t, Transform *itfm) { object_fetch_transform_motion_test(kg, object, ray->time, itfm); @@ -518,8 +535,11 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); *idir = bvh_inverse_direction(*dir); - if(*t != FLT_MAX) - *t *= len; + if(t != FLT_MAX) { + t *= len; + } + + return t; } #ifdef __QBVH__ @@ -557,22 +577,24 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, /* Transorm ray to exit motion blurred object in BVH */ -ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, - int object, - const Ray *ray, - float3 *P, - float3 *dir, - float3 *idir, - ccl_addr_space float *t, - Transform *itfm) -{ - if(*t != FLT_MAX) { - *t /= len(transform_direction(itfm, ray->D)); +ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg, + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float t, + Transform *itfm) +{ + if(t != FLT_MAX) { + t /= len(transform_direction(itfm, ray->D)); } *P = ray->P; *dir = bvh_clamp_direction(ray->D); *idir = bvh_inverse_direction(*dir); + + return t; } /* Same as above, but returns scale factor to apply to multiple intersection distances */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 95c27850513..ebf03ad9778 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -46,6 +46,7 @@ #include "kernel_path_common.h" #include "kernel_path_surface.h" #include "kernel_path_volume.h" +#include "kernel_path_subsurface.h" #ifdef __KERNEL_DEBUG__ # include "kernel_debug.h" @@ -413,172 +414,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, } } -#ifdef __SUBSURFACE__ -# ifndef __KERNEL_CUDA__ -ccl_device -# else -ccl_device_inline -# endif -bool kernel_path_subsurface_scatter( - KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - PathRadiance *L, - PathState *state, - RNG *rng, - Ray *ray, - float3 *throughput, - SubsurfaceIndirectRays *ss_indirect) -{ - float bssrdf_probability; - ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability); - - /* modify throughput for picking bssrdf or bsdf */ - *throughput *= bssrdf_probability; - - /* do bssrdf scatter step if we picked a bssrdf closure */ - if(sc) { - /* We should never have two consecutive BSSRDF bounces, - * the second one should be converted to a diffuse BSDF to - * avoid this. - */ - kernel_assert(!ss_indirect->tracing); - - uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb); - - SubsurfaceIntersection ss_isect; - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - int num_hits = subsurface_scatter_multi_intersect(kg, - &ss_isect, - sd, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - false); -# ifdef __VOLUME__ - ss_indirect->need_update_volume_stack = - kernel_data.integrator.use_volumes && - sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; -# endif /* __VOLUME__ */ - - /* compute lighting with the BSDF closure */ - for(int hit = 0; hit < num_hits; hit++) { - /* NOTE: We reuse the existing ShaderData, we assume the path - * integration loop stops when this function returns true. - */ - subsurface_scatter_multi_setup(kg, - &ss_isect, - hit, - sd, - state, - state->flag, - sc, - false); - - PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays]; - Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays]; - float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays]; - PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays]; - - *hit_state = *state; - *hit_ray = *ray; - *hit_tp = *throughput; - - hit_state->rng_offset += PRNG_BOUNCE_NUM; - - path_radiance_init(hit_L, kernel_data.film.use_light_pass); - hit_L->direct_throughput = L->direct_throughput; - path_radiance_copy_indirect(hit_L, L); - - kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L); - - if(kernel_path_surface_bounce(kg, - rng, - sd, - hit_tp, - hit_state, - hit_L, - hit_ray)) - { -# ifdef __LAMP_MIS__ - hit_state->ray_t = 0.0f; -# endif /* __LAMP_MIS__ */ - -# ifdef __VOLUME__ - if(ss_indirect->need_update_volume_stack) { - Ray volume_ray = *ray; - /* Setup ray from previous surface point to the new one. */ - volume_ray.D = normalize_len(hit_ray->P - volume_ray.P, - &volume_ray.t); - - kernel_volume_stack_update_for_subsurface( - kg, - emission_sd, - &volume_ray, - hit_state->volume_stack); - } -# endif /* __VOLUME__ */ - path_radiance_reset_indirect(L); - ss_indirect->num_rays++; - } - else { - path_radiance_accum_sample(L, hit_L, 1); - } - } - return true; - } - return false; -} - -ccl_device_inline void kernel_path_subsurface_init_indirect( - SubsurfaceIndirectRays *ss_indirect) -{ - ss_indirect->tracing = false; - ss_indirect->num_rays = 0; -} - -ccl_device void kernel_path_subsurface_accum_indirect( - SubsurfaceIndirectRays *ss_indirect, - PathRadiance *L) -{ - if(ss_indirect->tracing) { - path_radiance_sum_indirect(L); - path_radiance_accum_sample(&ss_indirect->direct_L, L, 1); - if(ss_indirect->num_rays == 0) { - *L = ss_indirect->direct_L; - } - } -} - -ccl_device void kernel_path_subsurface_setup_indirect( - KernelGlobals *kg, - SubsurfaceIndirectRays *ss_indirect, - PathState *state, - Ray *ray, - PathRadiance *L, - float3 *throughput) -{ - if(!ss_indirect->tracing) { - ss_indirect->direct_L = *L; - } - ss_indirect->tracing = true; - - /* Setup state, ray and throughput for indirect SSS rays. */ - ss_indirect->num_rays--; - - Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays]; - PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays]; - - *state = ss_indirect->state[ss_indirect->num_rays]; - *ray = *indirect_ray; - *L = *indirect_L; - *throughput = ss_indirect->throughputs[ss_indirect->num_rays]; - - state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM; -} - -#endif /* __SUBSURFACE__ */ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, diff --git a/intern/cycles/kernel/kernel_path_subsurface.h b/intern/cycles/kernel/kernel_path_subsurface.h new file mode 100644 index 00000000000..d22ec992074 --- /dev/null +++ b/intern/cycles/kernel/kernel_path_subsurface.h @@ -0,0 +1,187 @@ +/* + * Copyright 2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +#ifdef __SUBSURFACE__ +# ifndef __KERNEL_CUDA__ +ccl_device +# else +ccl_device_inline +# endif +bool kernel_path_subsurface_scatter( + KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + PathRadiance *L, + ccl_addr_space PathState *state, + ccl_addr_space RNG *rng, + ccl_addr_space Ray *ray, + ccl_addr_space float3 *throughput, + ccl_addr_space SubsurfaceIndirectRays *ss_indirect) +{ + float bssrdf_probability; + ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability); + + /* modify throughput for picking bssrdf or bsdf */ + *throughput *= bssrdf_probability; + + /* do bssrdf scatter step if we picked a bssrdf closure */ + if(sc) { + /* We should never have two consecutive BSSRDF bounces, + * the second one should be converted to a diffuse BSDF to + * avoid this. + */ + kernel_assert(!ss_indirect->tracing); + + uint lcg_state = lcg_state_init_addrspace(rng, state, 0x68bc21eb); + + SubsurfaceIntersection ss_isect; + float bssrdf_u, bssrdf_v; + path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + int num_hits = subsurface_scatter_multi_intersect(kg, + &ss_isect, + sd, + sc, + &lcg_state, + bssrdf_u, bssrdf_v, + false); +# ifdef __VOLUME__ + ss_indirect->need_update_volume_stack = + kernel_data.integrator.use_volumes && + sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; +# endif /* __VOLUME__ */ + + /* compute lighting with the BSDF closure */ + for(int hit = 0; hit < num_hits; hit++) { + /* NOTE: We reuse the existing ShaderData, we assume the path + * integration loop stops when this function returns true. + */ + subsurface_scatter_multi_setup(kg, + &ss_isect, + hit, + sd, + state, + state->flag, + sc, + false); + + ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays]; + ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays]; + ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays]; + PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays]; + + *hit_state = *state; + *hit_ray = *ray; + *hit_tp = *throughput; + + hit_state->rng_offset += PRNG_BOUNCE_NUM; + + path_radiance_init(hit_L, kernel_data.film.use_light_pass); + hit_L->direct_throughput = L->direct_throughput; + path_radiance_copy_indirect(hit_L, L); + + kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L); + + if(kernel_path_surface_bounce(kg, + rng, + sd, + hit_tp, + hit_state, + hit_L, + hit_ray)) + { +# ifdef __LAMP_MIS__ + hit_state->ray_t = 0.0f; +# endif /* __LAMP_MIS__ */ + +# ifdef __VOLUME__ + if(ss_indirect->need_update_volume_stack) { + Ray volume_ray = *ray; + /* Setup ray from previous surface point to the new one. */ + volume_ray.D = normalize_len(hit_ray->P - volume_ray.P, + &volume_ray.t); + + kernel_volume_stack_update_for_subsurface( + kg, + emission_sd, + &volume_ray, + hit_state->volume_stack); + } +# endif /* __VOLUME__ */ + path_radiance_reset_indirect(L); + ss_indirect->num_rays++; + } + else { + path_radiance_accum_sample(L, hit_L, 1); + } + } + return true; + } + return false; +} + +ccl_device_inline void kernel_path_subsurface_init_indirect( + ccl_addr_space SubsurfaceIndirectRays *ss_indirect) +{ + ss_indirect->tracing = false; + ss_indirect->num_rays = 0; +} + +ccl_device void kernel_path_subsurface_accum_indirect( + ccl_addr_space SubsurfaceIndirectRays *ss_indirect, + PathRadiance *L) +{ + if(ss_indirect->tracing) { + path_radiance_sum_indirect(L); + path_radiance_accum_sample(&ss_indirect->direct_L, L, 1); + if(ss_indirect->num_rays == 0) { + *L = ss_indirect->direct_L; + } + } +} + +ccl_device void kernel_path_subsurface_setup_indirect( + KernelGlobals *kg, + ccl_addr_space SubsurfaceIndirectRays *ss_indirect, + ccl_addr_space PathState *state, + ccl_addr_space Ray *ray, + PathRadiance *L, + ccl_addr_space float3 *throughput) +{ + if(!ss_indirect->tracing) { + ss_indirect->direct_L = *L; + } + ss_indirect->tracing = true; + + /* Setup state, ray and throughput for indirect SSS rays. */ + ss_indirect->num_rays--; + + ccl_addr_space Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays]; + PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays]; + + *state = ss_indirect->state[ss_indirect->num_rays]; + *ray = *indirect_ray; + *L = *indirect_L; + *throughput = ss_indirect->throughputs[ss_indirect->num_rays]; + + state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM; +} + +#endif /* __SUBSURFACE__ */ + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 34a78552c1d..efa23038089 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN -#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) +#if (defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)) && !defined(__SPLIT_KERNEL__) /* branched path tracing: connect path directly to position on one or more lights and add it to L */ ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng, @@ -188,7 +188,6 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng, #endif -#ifndef __SPLIT_KERNEL__ /* path tracing: connect path directly to position on a light and add it to L */ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state, @@ -226,7 +225,6 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_ } #endif } -#endif /* path tracing: bounce off or through surface to with new direction stored in ray */ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index 3d3b7385d8b..28e1b5ba98e 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -20,11 +20,11 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_path_volume_connect_light( KernelGlobals *kg, - RNG *rng, + ccl_addr_space RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput, - PathState *state, + ccl_addr_space PathState *state, PathRadiance *L) { #ifdef __EMISSION__ @@ -59,7 +59,7 @@ ccl_device_inline void kernel_path_volume_connect_light( } } } -#endif +#endif /* __EMISSION__ */ } #ifdef __KERNEL_GPU__ @@ -67,8 +67,14 @@ ccl_device_noinline #else ccl_device #endif -bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, - ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray) +bool kernel_path_volume_bounce( + KernelGlobals *kg, + ccl_addr_space RNG *rng, + ShaderData *sd, + ccl_addr_space float3 *throughput, + ccl_addr_space PathState *state, + PathRadiance *L, + ccl_addr_space Ray *ray) { /* sample phase function */ float phase_pdf; @@ -111,6 +117,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, return true; } +#ifdef __BRANCHED_PATH__ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng, ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L, bool sample_all_lights, Ray *ray, const VolumeSegment *segment) @@ -261,10 +268,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG } } } -#endif +#endif /* __EMISSION__ */ } +#endif /* __BRANCHED_PATH__ */ -#endif +#endif /* __VOLUME_SCATTER__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index a2ab96b35e2..93a92c63a40 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -203,11 +203,11 @@ void shader_setup_from_subsurface( # ifdef __INSTANCING__ if(isect->object != OBJECT_NONE) { /* instance transform */ - object_normal_transform(kg, sd, &sd->N); - object_normal_transform(kg, sd, &sd->Ng); + object_normal_transform_auto(kg, sd, &sd->N); + object_normal_transform_auto(kg, sd, &sd->Ng); # ifdef __DPDU__ - object_dir_transform(kg, sd, &sd->dPdu); - object_dir_transform(kg, sd, &sd->dPdv); + object_dir_transform_auto(kg, sd, &sd->dPdu); + object_dir_transform_auto(kg, sd, &sd->dPdv); # endif } # endif @@ -816,7 +816,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b *N_ = (is_zero(N))? sd->N: normalize(N); if(texture_blur_) - *texture_blur_ = texture_blur/weight_sum; + *texture_blur_ = safe_divide(texture_blur, weight_sum); return eval; } @@ -1036,8 +1036,8 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData * ccl_device_inline void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, - PathState *state, - VolumeStack *stack, + ccl_addr_space PathState *state, + ccl_addr_space VolumeStack *stack, int path_flag, ShaderContext ctx) { diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 2483c5f9ae1..68a7ccfd903 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -24,7 +24,7 @@ ccl_device_forceinline bool shadow_handle_transparent_isect( ShaderData *shadow_sd, ccl_addr_space PathState *state, # ifdef __VOLUME__ - struct PathState *volume_state, + ccl_addr_space struct PathState *volume_state, # endif Intersection *isect, Ray *ray, @@ -276,7 +276,13 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( float3 Pend = ray->P + ray->D*ray->t; int bounce = state->transparent_bounce; # ifdef __VOLUME__ - PathState ps = *state; +# ifdef __SPLIT_KERNEL__ + ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; +# else + PathState ps_object; + PathState *ps = &ps_object; +# endif + *ps = *state; # endif for(;;) { if(bounce >= kernel_data.integrator.transparent_max_bounce) { @@ -299,7 +305,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( shadow_sd, state, #ifdef __VOLUME__ - &ps, + ps, #endif isect, ray, @@ -316,8 +322,8 @@ ccl_device bool shadow_blocked_transparent_stepped_loop( } # ifdef __VOLUME__ /* Attenuation for last line segment towards light. */ - if(ps.volume_stack[0].shader != SHADER_NONE) { - kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput); + if(ps->volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); } # endif *shadow *= throughput; @@ -365,21 +371,11 @@ ccl_device bool shadow_blocked_transparent_stepped( ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *shadow_sd, ccl_addr_space PathState *state, - ccl_addr_space Ray *ray_input, + Ray *ray_input, float3 *shadow) { - /* Special trickery for split kernel: some data is coming from the - * global memory. - */ -#ifdef __SPLIT_KERNEL__ - Ray private_ray = *ray_input; - Ray *ray = &private_ray; - Intersection *isect = &kernel_split_state.isect_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; -#else /* __SPLIT_KERNEL__ */ Ray *ray = ray_input; - Intersection isect_object; - Intersection *isect = &isect_object; -#endif /* __SPLIT_KERNEL__ */ + Intersection isect; /* Some common early checks. */ *shadow = make_float3(1.0f, 1.0f, 1.0f); if(ray->t == 0.0f) { @@ -397,7 +393,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, shadow_sd, state, ray, - isect, + &isect, shadow); } #ifdef __TRANSPARENT_SHADOWS__ @@ -423,11 +419,11 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, const bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, - isect, + &isect, NULL, 0.0f, 0.0f); const bool is_transparent_isect = blocked - ? shader_transparent_shadow(kg, isect) + ? shader_transparent_shadow(kg, &isect) : false; if(!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) @@ -436,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, shadow_sd, state, ray, - isect, + &isect, blocked, is_transparent_isect, shadow); @@ -454,7 +450,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, shadow_sd, state, ray, - isect, + &isect, shadow); # endif /* __SHADOW_RECORD_ALL__ */ #endif /* __TRANSPARENT_SHADOWS__ */ diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index a8fa6432542..fe88ba4ff05 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -185,7 +185,7 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent) ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, ShaderData *sd, - PathState *state, + ccl_addr_space PathState *state, int state_flag, float3 *eval, float3 *N) @@ -277,7 +277,12 @@ ccl_device_inline int subsurface_scatter_multi_intersect( float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B; /* create ray */ +#ifdef __SPLIT_KERNEL__ + Ray ray_object = ss_isect->ray; + Ray *ray = &ray_object; +#else Ray *ray = &ss_isect->ray; +#endif ray->P = sd->P + disk_N*disk_height + disk_P; ray->D = -disk_N; ray->t = 2.0f*disk_height; @@ -351,6 +356,10 @@ ccl_device_inline int subsurface_scatter_multi_intersect( ss_isect->weight[hit] = eval; } +#ifdef __SPLIT_KERNEL__ + ss_isect->ray = *ray; +#endif + return num_eval_hits; } @@ -359,13 +368,19 @@ ccl_device_noinline void subsurface_scatter_multi_setup( SubsurfaceIntersection* ss_isect, int hit, ShaderData *sd, - PathState *state, + ccl_addr_space PathState *state, int state_flag, ShaderClosure *sc, bool all) { +#ifdef __SPLIT_KERNEL__ + Ray ray_object = ss_isect->ray; + Ray *ray = &ray_object; +#else + Ray *ray = &ss_isect->ray; +#endif /* Setup new shading point. */ - shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray); + shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray); /* Optionally blur colors and bump mapping. */ float3 weight = ss_isect->weight[hit]; @@ -376,6 +391,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup( subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N); } +#ifndef __SPLIT_KERNEL__ /* subsurface scattering step, from a point on the surface to another nearby point on the same object */ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state, int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all) @@ -465,6 +481,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS /* setup diffuse bsdf */ subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N); } +#endif /* ! __SPLIT_KERNEL__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a7faaef89ca..6c18cab6406 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -76,14 +76,12 @@ CCL_NAMESPACE_BEGIN # ifdef WITH_OSL # define __OSL__ # endif -# ifndef __SPLIT_KERNEL__ -# define __SUBSURFACE__ -# endif +# define __SUBSURFACE__ # define __CMJ__ +# define __VOLUME__ +# define __VOLUME_SCATTER__ # ifndef __SPLIT_KERNEL__ -# define __VOLUME__ # define __VOLUME_DECOUPLED__ -# define __VOLUME_SCATTER__ # define __SHADOW_RECORD_ALL__ # define __VOLUME_RECORD_ALL__ # endif @@ -130,6 +128,9 @@ CCL_NAMESPACE_BEGIN # define __CL_USE_NATIVE__ # define __KERNEL_SHADING__ # define __KERNEL_ADV_SHADING__ +# define __SUBSURFACE__ +# define __VOLUME__ +# define __VOLUME_SCATTER__ # endif /* __KERNEL_OPENCL_AMD__ */ # ifdef __KERNEL_OPENCL_INTEL_CPU__ @@ -552,7 +553,7 @@ typedef struct Ray { /* Intersection */ -typedef ccl_addr_space struct Intersection { +typedef struct Intersection { float t, u, v; int prim; int object; @@ -934,7 +935,7 @@ typedef struct PathState { /* Subsurface */ /* Struct to gather multiple SSS hits. */ -struct SubsurfaceIntersection +typedef struct SubsurfaceIntersection { Ray ray; float3 weight[BSSRDF_MAX_HITS]; @@ -942,10 +943,10 @@ struct SubsurfaceIntersection int num_hits; struct Intersection hits[BSSRDF_MAX_HITS]; float3 Ng[BSSRDF_MAX_HITS]; -}; +} SubsurfaceIntersection; /* Struct to gather SSS indirect rays and delay tracing them. */ -struct SubsurfaceIndirectRays +typedef struct SubsurfaceIndirectRays { bool need_update_volume_stack; bool tracing; @@ -956,7 +957,7 @@ struct SubsurfaceIndirectRays struct Ray rays[BSSRDF_MAX_HITS]; float3 throughputs[BSSRDF_MAX_HITS]; struct PathRadiance L[BSSRDF_MAX_HITS]; -}; +} SubsurfaceIndirectRays; /* Constant Kernel Data * diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 10d0d185345..608350a9038 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -38,7 +38,7 @@ typedef struct VolumeShaderCoefficients { /* evaluate shader to get extinction coefficient at P */ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, - PathState *state, + ccl_addr_space PathState *state, float3 P, float3 *extinction) { @@ -64,7 +64,7 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg, /* evaluate shader to get absorption, scattering and emission at P */ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, - PathState *state, + ccl_addr_space PathState *state, float3 P, VolumeShaderCoefficients *coeff) { @@ -112,7 +112,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel) return (channel == 0)? value.x: ((channel == 1)? value.y: value.z); } -ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack) +ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack) { for(int i = 0; stack[i].shader != SHADER_NONE; i++) { int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE); @@ -161,7 +161,11 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac /* homogeneous volume: assume shader evaluation at the starts gives * the extinction coefficient for the entire line segment */ -ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput) +ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + ShaderData *sd, + float3 *throughput) { float3 sigma_t; @@ -171,7 +175,11 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *s /* heterogeneous volume: integrate stepping through the volume until we * reach the end, get absorbed entirely, or run out of iterations */ -ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput) +ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + ShaderData *sd, + float3 *throughput) { float3 tp = *throughput; const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ @@ -179,7 +187,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* prepare for stepping */ int max_steps = kernel_data.integrator.volume_max_steps; float step = kernel_data.integrator.volume_step_size; - float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step; + float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step; /* compute extinction at the start */ float t = 0.0f; @@ -193,7 +201,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* use random position inside this segment to sample shader */ if(new_t == ray->t) - random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt; + random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt; float3 new_P = ray->P + ray->D * (t + random_jitter_offset); float3 sigma_t; @@ -227,7 +235,11 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* get the volume attenuation over line segment defined by ray, with the * assumption that there are no surfaces blocking light between the endpoints */ -ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput) +ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + Ray *ray, + float3 *throughput) { shader_setup_from_volume(kg, shadow_sd, ray); @@ -341,9 +353,15 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe /* homogeneous volume: assume shader evaluation at the start gives * the volume shading coefficient for the entire line segment */ -ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg, - PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, - RNG *rng, bool probalistic_scatter) +ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous( + KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + ShaderData *sd, + PathRadiance *L, + ccl_addr_space float3 *throughput, + ccl_addr_space RNG *rng, + bool probalistic_scatter) { VolumeShaderCoefficients coeff; @@ -444,8 +462,14 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba * volume until we reach the end, get absorbed entirely, or run out of * iterations. this does probabilistically scatter or get transmitted through * for path tracing where we don't want to branch. */ -ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, - PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng) +ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( + KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + ShaderData *sd, + PathRadiance *L, + ccl_addr_space float3 *throughput, + ccl_addr_space RNG *rng) { float3 tp = *throughput; const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ @@ -453,7 +477,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( /* prepare for stepping */ int max_steps = kernel_data.integrator.volume_max_steps; float step_size = kernel_data.integrator.volume_step_size; - float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size; + float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size; /* compute coefficients at the start */ float t = 0.0f; @@ -474,7 +498,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( /* use random position inside this segment to sample shader */ if(new_t == ray->t) - random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt; + random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt; float3 new_P = ray->P + ray->D * (t + random_jitter_offset); VolumeShaderCoefficients coeff; @@ -579,8 +603,15 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( * ray, with the assumption that there are no surfaces blocking light * between the endpoints. distance sampling is used to decide if we will * scatter or not. */ -ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg, - PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous) +ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate( + KernelGlobals *kg, + ccl_addr_space PathState *state, + ShaderData *sd, + Ray *ray, + PathRadiance *L, + ccl_addr_space float3 *throughput, + ccl_addr_space RNG *rng, + bool heterogeneous) { shader_setup_from_volume(kg, sd, ray); @@ -590,6 +621,7 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true); } +#ifndef __SPLIT_KERNEL__ /* Decoupled Volume Sampling * * VolumeSegment is list of coefficients and transmittance stored at all steps @@ -990,6 +1022,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( return VOLUME_PATH_SCATTERED; } +#endif /* __SPLIT_KERNEL */ /* decide if we need to use decoupled or not */ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method) @@ -1021,9 +1054,9 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou ccl_device void kernel_volume_stack_init(KernelGlobals *kg, ShaderData *stack_sd, - const PathState *state, - const Ray *ray, - VolumeStack *stack) + ccl_addr_space const PathState *state, + ccl_addr_space const Ray *ray, + ccl_addr_space VolumeStack *stack) { /* NULL ray happens in the baker, does it need proper initialization of * camera in volume? @@ -1166,7 +1199,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, } } -ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack) +ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack) { /* todo: we should have some way for objects to indicate if they want the * world shader to work inside them. excluding it by default is problematic @@ -1215,7 +1248,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, ShaderData *stack_sd, Ray *ray, - VolumeStack *stack) + ccl_addr_space VolumeStack *stack) { kernel_assert(kernel_data.integrator.use_volumes); @@ -1277,7 +1310,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, * the world's one after the last bounce to avoid render artifacts. */ ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg, - VolumeStack *volume_stack) + ccl_addr_space VolumeStack *volume_stack) { if(kernel_data.background.volume_shader != SHADER_NONE) { /* Keep the world's volume in stack. */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index deb872444d0..8ce420d8a48 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -74,13 +74,17 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)( DECLARE_SPLIT_KERNEL_FUNCTION(path_init) DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DECLARE_SPLIT_KERNEL_FUNCTION(do_volume) DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue) -DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background) DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval) DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting) DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked) DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) +DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update) void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)); diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index d6d0db4e034..8c519a21d95 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -41,13 +41,17 @@ # include "split/kernel_path_init.h" # include "split/kernel_scene_intersect.h" # include "split/kernel_lamp_emission.h" +# include "split/kernel_do_volume.h" # include "split/kernel_queue_enqueue.h" -# include "split/kernel_background_buffer_update.h" +# include "split/kernel_indirect_background.h" # include "split/kernel_shader_eval.h" # include "split/kernel_holdout_emission_blurring_pathtermination_ao.h" +# include "split/kernel_subsurface_scatter.h" # include "split/kernel_direct_lighting.h" # include "split/kernel_shadow_blocked.h" # include "split/kernel_next_iteration_setup.h" +# include "split/kernel_indirect_subsurface.h" +# include "split/kernel_buffer_update.h" #endif CCL_NAMESPACE_BEGIN @@ -166,13 +170,17 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission) +DEFINE_SPLIT_KERNEL_FUNCTION(do_volume) DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue) -DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update) +DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao) +DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked) DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup) +DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) +DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update) void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func)) { @@ -189,13 +197,17 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, REGISTER(path_init); REGISTER(scene_intersect); REGISTER(lamp_emission); + REGISTER(do_volume); REGISTER(queue_enqueue); - REGISTER(background_buffer_update); + REGISTER(indirect_background); REGISTER(shader_eval); REGISTER(holdout_emission_blurring_pathtermination_ao); + REGISTER(subsurface_scatter); REGISTER(direct_lighting); REGISTER(shadow_blocked); REGISTER(next_iteration_setup); + REGISTER(indirect_subsurface); + REGISTER(buffer_update); #undef REGISTER #undef REGISTER_EVAL_NAME diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl index 47e363f6e03..d3058501f27 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl @@ -16,11 +16,11 @@ #include "kernel_compat_opencl.h" #include "split/kernel_split_common.h" -#include "split/kernel_background_buffer_update.h" +#include "split/kernel_buffer_update.h" -__kernel void kernel_ocl_path_trace_background_buffer_update( +__kernel void kernel_ocl_path_trace_buffer_update( KernelGlobals *kg, ccl_constant KernelData *data) { - kernel_background_buffer_update(kg); + kernel_buffer_update(kg); } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl new file mode 100644 index 00000000000..6380e9cb746 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" +#include "split/kernel_do_volume.h" + +__kernel void kernel_ocl_path_trace_do_volume( + KernelGlobals *kg, + ccl_constant KernelData *data) +{ + kernel_do_volume(kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl new file mode 100644 index 00000000000..671501bf237 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" +#include "split/kernel_indirect_background.h" + +__kernel void kernel_ocl_path_trace_indirect_background( + KernelGlobals *kg, + ccl_constant KernelData *data) +{ + kernel_indirect_background(kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl new file mode 100644 index 00000000000..b5e52e81ebf --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" +#include "split/kernel_indirect_subsurface.h" + +__kernel void kernel_ocl_path_trace_indirect_subsurface( + KernelGlobals *kg, + ccl_constant KernelData *data) +{ + kernel_indirect_subsurface(kg); +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl new file mode 100644 index 00000000000..7a947c48e60 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_state_buffer_size.cl" +#include "kernel_data_init.cl" +#include "kernel_path_init.cl" + +#include "kernel_scene_intersect.cl" +#include "kernel_lamp_emission.cl" +#include "kernel_do_volume.cl" +#include "kernel_indirect_background.cl" +#include "kernel_queue_enqueue.cl" +#include "kernel_shader_eval.cl" +#include "kernel_holdout_emission_blurring_pathtermination_ao.cl" +#include "kernel_subsurface_scatter.cl" +#include "kernel_direct_lighting.cl" +#include "kernel_shadow_blocked.cl" +#include "kernel_next_iteration_setup.cl" +#include "kernel_indirect_subsurface.cl" +#include "kernel_buffer_update.cl" + diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl new file mode 100644 index 00000000000..8dae79bacb0 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl @@ -0,0 +1,26 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel_compat_opencl.h" +#include "split/kernel_split_common.h" +#include "split/kernel_subsurface_scatter.h" + +__kernel void kernel_ocl_path_trace_subsurface_scatter( + KernelGlobals *kg, + ccl_constant KernelData *data) +{ + kernel_subsurface_scatter(kg); +} diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 04aaf1bbaad..e42605c88e7 100644 --- a/intern/cycles/kernel/split/kernel_background_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -69,7 +69,7 @@ CCL_NAMESPACE_BEGIN * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty */ -ccl_device void kernel_background_buffer_update(KernelGlobals *kg) +ccl_device void kernel_buffer_update(KernelGlobals *kg) { ccl_local unsigned int local_queue_atomics; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { @@ -141,26 +141,6 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg) rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride; buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride; - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - /* eval background shader if nothing hit */ - if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) { - *L_transparent = (*L_transparent) + average((*throughput)); -#ifdef __PASSES__ - if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) -#endif - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } - - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { -#ifdef __BACKGROUND__ - /* sample background shader */ - float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray); - path_radiance_accum_background(L, (*throughput), L_background, state->bounce); -#endif - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } - } - if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { float3 L_sum = path_radiance_clamp_and_sum(kg, L); kernel_write_light_passes(kg, buffer, L, sample); @@ -207,6 +187,9 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg) *L_transparent = 0.0f; path_radiance_init(L, kernel_data.film.use_light_pass); path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray); +#ifdef __SUBSURFACE__ + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); +#endif #ifdef __KERNEL_DEBUG__ debug_data_init(debug_data); #endif diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h new file mode 100644 index 00000000000..18da6e8aa3a --- /dev/null +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -0,0 +1,97 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + + +ccl_device void kernel_do_volume(KernelGlobals *kg) +{ +#ifdef __VOLUME__ + /* We will empty this queue in this kernel. */ + if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + } + /* Fetch use_queues_flag. */ + ccl_local char local_use_queues_flag; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_use_queues_flag = *kernel_split_params.use_queues_flag; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(local_use_queues_flag) { + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } + + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { + + bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND); + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global RNG *rng = &kernel_split_state.rng[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index]; + + /* Sanitize volume stack. */ + if(!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + /* volume attenuation, emission, scatter */ + if(state->volume_stack[0].shader != SHADER_NONE) { + Ray volume_ray = *ray; + volume_ray.t = (hit)? isect->t: FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + + { + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, state, sd, &volume_ray, L, throughput, rng, heterogeneous); + +# ifdef __VOLUME_SCATTER__ + if(result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, rng, sd, sd_input, *throughput, state, L); + + /* indirect light bounce */ + if(kernel_path_volume_bounce(kg, rng, sd, throughput, state, L, ray)) + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED); + else + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER); + } +# endif + } + } + } + +#endif +} + + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h new file mode 100644 index 00000000000..e314a98105e --- /dev/null +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -0,0 +1,87 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_indirect_background(KernelGlobals *kg) +{ + /* + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + // */ + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + + + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index]; + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + /* eval background shader if nothing hit */ + if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) { + *L_transparent = (*L_transparent) + average((*throughput)); +#ifdef __PASSES__ + if(!(kernel_data.film.pass_flag & PASS_BACKGROUND)) +#endif + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + + if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { +#ifdef __BACKGROUND__ + /* sample background shader */ + float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray); + path_radiance_accum_background(L, (*throughput), L_background, state->bounce); +#endif + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + } + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h new file mode 100644 index 00000000000..a56e85abeb9 --- /dev/null +++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h @@ -0,0 +1,77 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void kernel_indirect_subsurface(KernelGlobals *kg) +{ + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if(thread_index == 0) { + /* We will empty both queues in this kernel. */ + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + + int ray_index; + get_ray_index(kg, thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + ray_index = get_ray_index(kg, thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + +#ifdef __SUBSURFACE__ + + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } + + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + + if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + kernel_path_subsurface_accum_indirect(ss_indirect, L); + + /* Trace indirect subsurface rays by restarting the loop. this uses less + * stack memory than invoking kernel_path_indirect. + */ + if(ss_indirect->num_rays) { + kernel_path_subsurface_setup_indirect(kg, + ss_indirect, + state, + ray, + L, + throughput); + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + } + +#endif /* __SUBSURFACE__ */ + +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index 261625da31d..84de231b78c 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -38,10 +38,12 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_lamp_emission(KernelGlobals *kg) { +#ifndef __VOLUME__ /* We will empty this queue in this kernel. */ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; } +#endif /* Fetch use_queues_flag. */ ccl_local char local_use_queues_flag; if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { @@ -55,7 +57,12 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg) QUEUE_ACTIVE_AND_REGENERATED_RAYS, kernel_split_state.queue_data, kernel_split_params.queue_size, - 1); +#ifndef __VOLUME__ + 1 +#else + 0 +#endif + ); if(ray_index == QUEUE_EMPTY_SLOT) { return; } diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index fe3c9e1e8a2..f44aff30fa9 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -82,6 +82,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) { &kernel_split_state.rng[ray_index], my_sample, &kernel_split_state.ray[ray_index]); +#ifdef __SUBSURFACE__ + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); +#endif + #ifdef __KERNEL_DEBUG__ debug_data_init(&kernel_split_state.debug_data[ray_index]); #endif diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index 66aad705bd4..70ec92b394b 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -63,10 +63,12 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg) int queue_number = -1; - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { + if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) { queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; } - else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; } diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index a7e0c7692a2..144cba67e23 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -93,7 +93,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) #ifdef __KERNEL_DEBUG__ DebugData *debug_data = &kernel_split_state.debug_data[ray_index]; #endif - Intersection *isect = &kernel_split_state.isect[ray_index]; + Intersection isect; PathState state = kernel_split_state.path_state[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; @@ -116,16 +116,17 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg) lcg_state = lcg_state_init(&rng, &state, 0x51633e2d); } - bool hit = scene_intersect(kg, ray, visibility, isect, &lcg_state, difl, extmax); + bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax); #else - bool hit = scene_intersect(kg, ray, visibility, isect, NULL, 0.0f, 0.0f); + bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f); #endif + kernel_split_state.isect[ray_index] = isect; #ifdef __KERNEL_DEBUG__ if(state.flag & PATH_RAY_CAMERA) { - debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes; - debug_data->num_bvh_traversed_instances += isect->num_traversed_instances; - debug_data->num_bvh_intersections += isect->num_intersections; + debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes; + debug_data->num_bvh_traversed_instances += isect.num_traversed_instances; + debug_data->num_bvh_intersections += isect.num_intersections; } debug_data->num_ray_bounces++; #endif diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 35ee19ddf1b..4bd5c8b6eb0 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -76,14 +76,14 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) /* Continue on with shader evaluation. */ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - Intersection *isect = &kernel_split_state.isect[ray_index]; + Intersection isect = kernel_split_state.isect[ray_index]; ccl_global uint *rng = &kernel_split_state.rng[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; shader_setup_from_ray(kg, &kernel_split_state.sd[ray_index], - isect, + &isect, &ray); float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF); shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN); diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h index d532c7cf55b..52f7002acb3 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h @@ -93,12 +93,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg) : light_ray_dl_global; float3 shadow; + Ray ray = *light_ray_global; update_path_radiance = !(shadow_blocked(kg, &kernel_split_state.sd_DL_shadow[thread_index], state, - light_ray_global, + &ray, &shadow)); + *light_ray_global = ray; /* We use light_ray_global's P and t to store shadow and * update_path_radiance. */ diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index dd0c3f9c941..5c12fe426ac 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -52,11 +52,11 @@ #include "kernel_passes.h" #ifdef __SUBSURFACE__ -#include "kernel_subsurface.h" +# include "kernel_subsurface.h" #endif #ifdef __VOLUME__ -#include "kernel_volume.h" +# include "kernel_volume.h" #endif #include "kernel_path_state.h" @@ -65,9 +65,10 @@ #include "kernel_path_common.h" #include "kernel_path_surface.h" #include "kernel_path_volume.h" +#include "kernel_path_subsurface.h" #ifdef __KERNEL_DEBUG__ -#include "kernel_debug.h" +# include "kernel_debug.h" #endif #include "kernel_queues.h" diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h index 5380c0c5de6..81dcdbaedde 100644 --- a/intern/cycles/kernel/split/kernel_split_data.h +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -31,6 +31,14 @@ ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_el size = size SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY +#ifdef __SUBSURFACE__ + size += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); /* ss_rays */ +#endif + +#ifdef __VOLUME__ + size += align_up(2 * num_elements * sizeof(PathState), 16); /* state_shadow */ +#endif + return size; } @@ -46,9 +54,19 @@ ccl_device_inline void split_data_init(KernelGlobals *kg, #define SPLIT_DATA_ENTRY(type, name, num) \ split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16); - SPLIT_DATA_ENTRIES + SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY +#ifdef __SUBSURFACE__ + split_data->ss_rays = (ccl_global SubsurfaceIndirectRays*)p; + p += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); +#endif + +#ifdef __VOLUME__ + split_data->state_shadow = (ccl_global PathState*)p; + p += align_up(2 * num_elements * sizeof(PathState), 16); +#endif + split_data->ray_state = ray_state; } diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 62e3ea45ae2..b39ed4995dc 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -68,14 +68,13 @@ typedef struct SplitParams { SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ - SPLIT_DATA_ENTRY(Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \ SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \ SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ - SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \ SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \ SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ @@ -88,6 +87,14 @@ typedef struct SplitData { SPLIT_DATA_ENTRIES #undef SPLIT_DATA_ENTRY +#ifdef __SUBSURFACE__ + ccl_global SubsurfaceIndirectRays *ss_rays; +#endif + +#ifdef __VOLUME__ + ccl_global PathState *state_shadow; +#endif + /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from * the host easily) but is still used the same as the other data so we have it here in this struct as well */ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h new file mode 100644 index 00000000000..fcdd805f27b --- /dev/null +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -0,0 +1,86 @@ + + +CCL_NAMESPACE_BEGIN + + +ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) +{ +#ifdef __SUBSURFACE__ + + ccl_local unsigned int local_queue_atomics; + if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + +#ifdef __COMPUTE_DEVICE_GPU__ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if(ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif + +#ifndef __COMPUTE_DEVICE_GPU__ + if(ray_index != QUEUE_EMPTY_SLOT) { +#endif + + + char enqueue_flag = 0; + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global RNG *rng = &kernel_split_state.rng[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index]; + + if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + if(sd->flag & SD_BSSRDF) { + if(kernel_path_subsurface_scatter(kg, + sd, + emission_sd, + L, + state, + rng, + ray, + throughput, + ss_indirect)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + enqueue_flag = 1; + } + } + } + +#ifndef __COMPUTE_DEVICE_GPU__ + } +#endif + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + +#endif /* __SUBSURFACE__ */ + +} + +CCL_NAMESPACE_END |