diff options
author | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:17:24 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2019-04-17 07:21:24 +0300 |
commit | e12c08e8d170b7ca40f204a5b0423c23a9fbc2c1 (patch) | |
tree | 8cf3453d12edb177a218ef8009357518ec6cab6a /intern/cycles/kernel | |
parent | b3dabc200a4b0399ec6b81f2ff2730d07b44fcaa (diff) |
ClangFormat: apply to source, most of intern
Apply clang format as proposed in T53211.
For details on usage and instructions for migrating branches
without conflicts, see:
https://wiki.blender.org/wiki/Tools/ClangFormat
Diffstat (limited to 'intern/cycles/kernel')
310 files changed, 40081 insertions, 38747 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 01552dff9bb..8a8fee108ae 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -1,7 +1,7 @@ remove_extra_strict_flags() set(INC - .. + .. ) set(INC_SYS @@ -9,328 +9,328 @@ set(INC_SYS ) set(SRC_CPU_KERNELS - kernels/cpu/kernel.cpp - kernels/cpu/kernel_sse2.cpp - kernels/cpu/kernel_sse3.cpp - kernels/cpu/kernel_sse41.cpp - kernels/cpu/kernel_avx.cpp - kernels/cpu/kernel_avx2.cpp - kernels/cpu/kernel_split.cpp - kernels/cpu/kernel_split_sse2.cpp - kernels/cpu/kernel_split_sse3.cpp - kernels/cpu/kernel_split_sse41.cpp - kernels/cpu/kernel_split_avx.cpp - kernels/cpu/kernel_split_avx2.cpp - kernels/cpu/filter.cpp - kernels/cpu/filter_sse2.cpp - kernels/cpu/filter_sse3.cpp - kernels/cpu/filter_sse41.cpp - kernels/cpu/filter_avx.cpp - kernels/cpu/filter_avx2.cpp + kernels/cpu/kernel.cpp + kernels/cpu/kernel_sse2.cpp + kernels/cpu/kernel_sse3.cpp + kernels/cpu/kernel_sse41.cpp + kernels/cpu/kernel_avx.cpp + kernels/cpu/kernel_avx2.cpp + kernels/cpu/kernel_split.cpp + kernels/cpu/kernel_split_sse2.cpp + kernels/cpu/kernel_split_sse3.cpp + kernels/cpu/kernel_split_sse41.cpp + kernels/cpu/kernel_split_avx.cpp + kernels/cpu/kernel_split_avx2.cpp + kernels/cpu/filter.cpp + kernels/cpu/filter_sse2.cpp + kernels/cpu/filter_sse3.cpp + kernels/cpu/filter_sse41.cpp + kernels/cpu/filter_avx.cpp + kernels/cpu/filter_avx2.cpp ) set(SRC_CUDA_KERNELS - kernels/cuda/kernel.cu - kernels/cuda/kernel_split.cu - kernels/cuda/filter.cu + kernels/cuda/kernel.cu + kernels/cuda/kernel_split.cu + kernels/cuda/filter.cu ) set(SRC_OPENCL_KERNELS - kernels/opencl/kernel_bake.cl - kernels/opencl/kernel_base.cl - kernels/opencl/kernel_displace.cl - kernels/opencl/kernel_background.cl - kernels/opencl/kernel_state_buffer_size.cl - kernels/opencl/kernel_split_bundle.cl - kernels/opencl/kernel_data_init.cl - kernels/opencl/kernel_path_init.cl - kernels/opencl/kernel_queue_enqueue.cl - kernels/opencl/kernel_scene_intersect.cl - kernels/opencl/kernel_lamp_emission.cl - kernels/opencl/kernel_do_volume.cl - kernels/opencl/kernel_indirect_background.cl - kernels/opencl/kernel_shader_setup.cl - kernels/opencl/kernel_shader_sort.cl - kernels/opencl/kernel_shader_eval.cl - kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl - kernels/opencl/kernel_subsurface_scatter.cl - kernels/opencl/kernel_direct_lighting.cl - kernels/opencl/kernel_shadow_blocked_ao.cl - kernels/opencl/kernel_shadow_blocked_dl.cl - kernels/opencl/kernel_enqueue_inactive.cl - kernels/opencl/kernel_next_iteration_setup.cl - kernels/opencl/kernel_indirect_subsurface.cl - kernels/opencl/kernel_buffer_update.cl - kernels/opencl/filter.cl + kernels/opencl/kernel_bake.cl + kernels/opencl/kernel_base.cl + kernels/opencl/kernel_displace.cl + kernels/opencl/kernel_background.cl + kernels/opencl/kernel_state_buffer_size.cl + kernels/opencl/kernel_split_bundle.cl + kernels/opencl/kernel_data_init.cl + kernels/opencl/kernel_path_init.cl + kernels/opencl/kernel_queue_enqueue.cl + kernels/opencl/kernel_scene_intersect.cl + kernels/opencl/kernel_lamp_emission.cl + kernels/opencl/kernel_do_volume.cl + kernels/opencl/kernel_indirect_background.cl + kernels/opencl/kernel_shader_setup.cl + kernels/opencl/kernel_shader_sort.cl + kernels/opencl/kernel_shader_eval.cl + kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl + kernels/opencl/kernel_subsurface_scatter.cl + kernels/opencl/kernel_direct_lighting.cl + kernels/opencl/kernel_shadow_blocked_ao.cl + kernels/opencl/kernel_shadow_blocked_dl.cl + kernels/opencl/kernel_enqueue_inactive.cl + kernels/opencl/kernel_next_iteration_setup.cl + kernels/opencl/kernel_indirect_subsurface.cl + kernels/opencl/kernel_buffer_update.cl + kernels/opencl/filter.cl ) set(SRC_BVH_HEADERS - bvh/bvh.h - bvh/bvh_nodes.h - bvh/bvh_shadow_all.h - bvh/bvh_local.h - bvh/bvh_traversal.h - bvh/bvh_types.h - bvh/bvh_volume.h - bvh/bvh_volume_all.h - bvh/qbvh_nodes.h - bvh/qbvh_shadow_all.h - bvh/qbvh_local.h - bvh/qbvh_traversal.h - bvh/qbvh_volume.h - bvh/qbvh_volume_all.h - bvh/obvh_nodes.h - bvh/obvh_shadow_all.h - bvh/obvh_local.h - bvh/obvh_traversal.h - bvh/obvh_volume.h - bvh/obvh_volume_all.h - bvh/bvh_embree.h + bvh/bvh.h + bvh/bvh_nodes.h + bvh/bvh_shadow_all.h + bvh/bvh_local.h + bvh/bvh_traversal.h + bvh/bvh_types.h + bvh/bvh_volume.h + bvh/bvh_volume_all.h + bvh/qbvh_nodes.h + bvh/qbvh_shadow_all.h + bvh/qbvh_local.h + bvh/qbvh_traversal.h + bvh/qbvh_volume.h + bvh/qbvh_volume_all.h + bvh/obvh_nodes.h + bvh/obvh_shadow_all.h + bvh/obvh_local.h + bvh/obvh_traversal.h + bvh/obvh_volume.h + bvh/obvh_volume_all.h + bvh/bvh_embree.h ) set(SRC_HEADERS - kernel_accumulate.h - kernel_bake.h - kernel_camera.h - kernel_color.h - kernel_compat_cpu.h - kernel_compat_cuda.h - kernel_compat_opencl.h - kernel_differential.h - kernel_emission.h - kernel_film.h - kernel_globals.h - kernel_id_passes.h - kernel_jitter.h - kernel_light.h - kernel_math.h - kernel_montecarlo.h - kernel_passes.h - kernel_path.h - kernel_path_branched.h - kernel_path_common.h - kernel_path_state.h - kernel_path_surface.h - kernel_path_subsurface.h - kernel_path_volume.h - kernel_profiling.h - kernel_projection.h - kernel_queues.h - kernel_random.h - kernel_shader.h - kernel_shadow.h - kernel_subsurface.h - kernel_textures.h - kernel_types.h - kernel_volume.h - kernel_work_stealing.h + kernel_accumulate.h + kernel_bake.h + kernel_camera.h + kernel_color.h + kernel_compat_cpu.h + kernel_compat_cuda.h + kernel_compat_opencl.h + kernel_differential.h + kernel_emission.h + kernel_film.h + kernel_globals.h + kernel_id_passes.h + kernel_jitter.h + kernel_light.h + kernel_math.h + kernel_montecarlo.h + kernel_passes.h + kernel_path.h + kernel_path_branched.h + kernel_path_common.h + kernel_path_state.h + kernel_path_surface.h + kernel_path_subsurface.h + kernel_path_volume.h + kernel_profiling.h + kernel_projection.h + kernel_queues.h + kernel_random.h + kernel_shader.h + kernel_shadow.h + kernel_subsurface.h + kernel_textures.h + kernel_types.h + kernel_volume.h + kernel_work_stealing.h ) set(SRC_KERNELS_CPU_HEADERS - kernel.h - kernels/cpu/kernel_cpu.h - kernels/cpu/kernel_cpu_impl.h - kernels/cpu/kernel_cpu_image.h - kernels/cpu/filter_cpu.h - kernels/cpu/filter_cpu_impl.h + kernel.h + kernels/cpu/kernel_cpu.h + kernels/cpu/kernel_cpu_impl.h + kernels/cpu/kernel_cpu_image.h + kernels/cpu/filter_cpu.h + kernels/cpu/filter_cpu_impl.h ) set(SRC_KERNELS_CUDA_HEADERS - kernels/cuda/kernel_config.h - kernels/cuda/kernel_cuda_image.h + kernels/cuda/kernel_config.h + kernels/cuda/kernel_cuda_image.h ) set(SRC_KERNELS_OPENCL_HEADERS - kernels/opencl/kernel_split_function.h - kernels/opencl/kernel_opencl_image.h + kernels/opencl/kernel_split_function.h + kernels/opencl/kernel_opencl_image.h ) set(SRC_CLOSURE_HEADERS - closure/alloc.h - closure/bsdf.h - closure/bsdf_ashikhmin_velvet.h - closure/bsdf_diffuse.h - closure/bsdf_diffuse_ramp.h - closure/bsdf_microfacet.h - closure/bsdf_microfacet_multi.h - closure/bsdf_microfacet_multi_impl.h - closure/bsdf_oren_nayar.h - closure/bsdf_phong_ramp.h - closure/bsdf_reflection.h - closure/bsdf_refraction.h - closure/bsdf_toon.h - closure/bsdf_transparent.h - closure/bsdf_util.h - closure/bsdf_ashikhmin_shirley.h - closure/bsdf_hair.h - closure/bssrdf.h - closure/emissive.h - closure/volume.h - closure/bsdf_principled_diffuse.h - closure/bsdf_principled_sheen.h + closure/alloc.h + closure/bsdf.h + closure/bsdf_ashikhmin_velvet.h + closure/bsdf_diffuse.h + closure/bsdf_diffuse_ramp.h + closure/bsdf_microfacet.h + closure/bsdf_microfacet_multi.h + closure/bsdf_microfacet_multi_impl.h + closure/bsdf_oren_nayar.h + closure/bsdf_phong_ramp.h + closure/bsdf_reflection.h + closure/bsdf_refraction.h + closure/bsdf_toon.h + closure/bsdf_transparent.h + closure/bsdf_util.h + closure/bsdf_ashikhmin_shirley.h + closure/bsdf_hair.h + closure/bssrdf.h + closure/emissive.h + closure/volume.h + closure/bsdf_principled_diffuse.h + closure/bsdf_principled_sheen.h closure/bsdf_hair_principled.h ) set(SRC_SVM_HEADERS - svm/svm.h - svm/svm_ao.h - svm/svm_attribute.h - svm/svm_bevel.h - svm/svm_blackbody.h - svm/svm_bump.h - svm/svm_camera.h - svm/svm_closure.h - svm/svm_convert.h - svm/svm_checker.h - svm/svm_color_util.h - svm/svm_brick.h - svm/svm_displace.h - svm/svm_fresnel.h - svm/svm_wireframe.h - svm/svm_wavelength.h - svm/svm_gamma.h - svm/svm_brightness.h - svm/svm_geometry.h - svm/svm_gradient.h - svm/svm_hsv.h - svm/svm_ies.h - svm/svm_image.h - svm/svm_invert.h - svm/svm_light_path.h - svm/svm_magic.h - svm/svm_mapping.h - svm/svm_math.h - svm/svm_math_util.h - svm/svm_mix.h - svm/svm_musgrave.h - svm/svm_noise.h - svm/svm_noisetex.h - svm/svm_normal.h - svm/svm_ramp.h - svm/svm_ramp_util.h - svm/svm_sepcomb_hsv.h - svm/svm_sepcomb_vector.h - svm/svm_sky.h - svm/svm_tex_coord.h - svm/svm_texture.h - svm/svm_types.h - svm/svm_value.h - svm/svm_vector_transform.h - svm/svm_voronoi.h - svm/svm_voxel.h - svm/svm_wave.h + svm/svm.h + svm/svm_ao.h + svm/svm_attribute.h + svm/svm_bevel.h + svm/svm_blackbody.h + svm/svm_bump.h + svm/svm_camera.h + svm/svm_closure.h + svm/svm_convert.h + svm/svm_checker.h + svm/svm_color_util.h + svm/svm_brick.h + svm/svm_displace.h + svm/svm_fresnel.h + svm/svm_wireframe.h + svm/svm_wavelength.h + svm/svm_gamma.h + svm/svm_brightness.h + svm/svm_geometry.h + svm/svm_gradient.h + svm/svm_hsv.h + svm/svm_ies.h + svm/svm_image.h + svm/svm_invert.h + svm/svm_light_path.h + svm/svm_magic.h + svm/svm_mapping.h + svm/svm_math.h + svm/svm_math_util.h + svm/svm_mix.h + svm/svm_musgrave.h + svm/svm_noise.h + svm/svm_noisetex.h + svm/svm_normal.h + svm/svm_ramp.h + svm/svm_ramp_util.h + svm/svm_sepcomb_hsv.h + svm/svm_sepcomb_vector.h + svm/svm_sky.h + svm/svm_tex_coord.h + svm/svm_texture.h + svm/svm_types.h + svm/svm_value.h + svm/svm_vector_transform.h + svm/svm_voronoi.h + svm/svm_voxel.h + svm/svm_wave.h ) set(SRC_GEOM_HEADERS - geom/geom.h - geom/geom_attribute.h - geom/geom_curve.h - geom/geom_curve_intersect.h - geom/geom_motion_curve.h - geom/geom_motion_triangle.h - geom/geom_motion_triangle_intersect.h - geom/geom_motion_triangle_shader.h - geom/geom_object.h - geom/geom_patch.h - geom/geom_primitive.h - geom/geom_subd_triangle.h - geom/geom_triangle.h - geom/geom_triangle_intersect.h - geom/geom_volume.h + geom/geom.h + geom/geom_attribute.h + geom/geom_curve.h + geom/geom_curve_intersect.h + geom/geom_motion_curve.h + geom/geom_motion_triangle.h + geom/geom_motion_triangle_intersect.h + geom/geom_motion_triangle_shader.h + geom/geom_object.h + geom/geom_patch.h + geom/geom_primitive.h + geom/geom_subd_triangle.h + geom/geom_triangle.h + geom/geom_triangle_intersect.h + geom/geom_volume.h ) set(SRC_FILTER_HEADERS - filter/filter.h - filter/filter_defines.h - filter/filter_features.h - filter/filter_features_sse.h - filter/filter_kernel.h - filter/filter_nlm_cpu.h - filter/filter_nlm_gpu.h - filter/filter_prefilter.h - filter/filter_reconstruction.h - filter/filter_transform.h - filter/filter_transform_gpu.h - filter/filter_transform_sse.h + filter/filter.h + filter/filter_defines.h + filter/filter_features.h + filter/filter_features_sse.h + filter/filter_kernel.h + filter/filter_nlm_cpu.h + filter/filter_nlm_gpu.h + filter/filter_prefilter.h + filter/filter_reconstruction.h + filter/filter_transform.h + filter/filter_transform_gpu.h + filter/filter_transform_sse.h ) set(SRC_UTIL_HEADERS - ../util/util_atomic.h - ../util/util_color.h - ../util/util_defines.h - ../util/util_half.h - ../util/util_hash.h - ../util/util_math.h - ../util/util_math_fast.h - ../util/util_math_intersect.h - ../util/util_math_float2.h - ../util/util_math_float3.h - ../util/util_math_float4.h - ../util/util_math_int2.h - ../util/util_math_int3.h - ../util/util_math_int4.h - ../util/util_math_matrix.h - ../util/util_projection.h - ../util/util_rect.h - ../util/util_static_assert.h - ../util/util_transform.h - ../util/util_texture.h - ../util/util_types.h - ../util/util_types_float2.h - ../util/util_types_float2_impl.h - ../util/util_types_float3.h - ../util/util_types_float3_impl.h - ../util/util_types_float4.h - ../util/util_types_float4_impl.h - ../util/util_types_float8.h - ../util/util_types_float8_impl.h - ../util/util_types_int2.h - ../util/util_types_int2_impl.h - ../util/util_types_int3.h - ../util/util_types_int3_impl.h - ../util/util_types_int4.h - ../util/util_types_int4_impl.h - ../util/util_types_uchar2.h - ../util/util_types_uchar2_impl.h - ../util/util_types_uchar3.h - ../util/util_types_uchar3_impl.h - ../util/util_types_uchar4.h - ../util/util_types_uchar4_impl.h - ../util/util_types_uint2.h - ../util/util_types_uint2_impl.h - ../util/util_types_uint3.h - ../util/util_types_uint3_impl.h - ../util/util_types_uint4.h - ../util/util_types_uint4_impl.h - ../util/util_types_ushort4.h - ../util/util_types_vector3.h - ../util/util_types_vector3_impl.h + ../util/util_atomic.h + ../util/util_color.h + ../util/util_defines.h + ../util/util_half.h + ../util/util_hash.h + ../util/util_math.h + ../util/util_math_fast.h + ../util/util_math_intersect.h + ../util/util_math_float2.h + ../util/util_math_float3.h + ../util/util_math_float4.h + ../util/util_math_int2.h + ../util/util_math_int3.h + ../util/util_math_int4.h + ../util/util_math_matrix.h + ../util/util_projection.h + ../util/util_rect.h + ../util/util_static_assert.h + ../util/util_transform.h + ../util/util_texture.h + ../util/util_types.h + ../util/util_types_float2.h + ../util/util_types_float2_impl.h + ../util/util_types_float3.h + ../util/util_types_float3_impl.h + ../util/util_types_float4.h + ../util/util_types_float4_impl.h + ../util/util_types_float8.h + ../util/util_types_float8_impl.h + ../util/util_types_int2.h + ../util/util_types_int2_impl.h + ../util/util_types_int3.h + ../util/util_types_int3_impl.h + ../util/util_types_int4.h + ../util/util_types_int4_impl.h + ../util/util_types_uchar2.h + ../util/util_types_uchar2_impl.h + ../util/util_types_uchar3.h + ../util/util_types_uchar3_impl.h + ../util/util_types_uchar4.h + ../util/util_types_uchar4_impl.h + ../util/util_types_uint2.h + ../util/util_types_uint2_impl.h + ../util/util_types_uint3.h + ../util/util_types_uint3_impl.h + ../util/util_types_uint4.h + ../util/util_types_uint4_impl.h + ../util/util_types_ushort4.h + ../util/util_types_vector3.h + ../util/util_types_vector3_impl.h ) set(SRC_SPLIT_HEADERS - split/kernel_branched.h - split/kernel_buffer_update.h - split/kernel_data_init.h - split/kernel_direct_lighting.h - split/kernel_do_volume.h - split/kernel_enqueue_inactive.h - split/kernel_holdout_emission_blurring_pathtermination_ao.h - split/kernel_indirect_background.h - split/kernel_indirect_subsurface.h - split/kernel_lamp_emission.h - split/kernel_next_iteration_setup.h - split/kernel_path_init.h - split/kernel_queue_enqueue.h - split/kernel_scene_intersect.h - split/kernel_shader_setup.h - split/kernel_shader_sort.h - split/kernel_shader_eval.h - split/kernel_shadow_blocked_ao.h - split/kernel_shadow_blocked_dl.h - split/kernel_split_common.h - split/kernel_split_data.h - split/kernel_split_data_types.h - split/kernel_subsurface_scatter.h + split/kernel_branched.h + split/kernel_buffer_update.h + split/kernel_data_init.h + split/kernel_direct_lighting.h + split/kernel_do_volume.h + split/kernel_enqueue_inactive.h + split/kernel_holdout_emission_blurring_pathtermination_ao.h + split/kernel_indirect_background.h + split/kernel_indirect_subsurface.h + split/kernel_lamp_emission.h + split/kernel_next_iteration_setup.h + split/kernel_path_init.h + split/kernel_queue_enqueue.h + split/kernel_scene_intersect.h + split/kernel_shader_setup.h + split/kernel_shader_sort.h + split/kernel_shader_eval.h + split/kernel_shadow_blocked_ao.h + split/kernel_shadow_blocked_dl.h + split/kernel_split_common.h + split/kernel_split_data.h + split/kernel_split_data_types.h + split/kernel_subsurface_scatter.h ) set(LIB @@ -340,145 +340,145 @@ set(LIB # CUDA module if(WITH_CYCLES_CUDA_BINARIES) - # 64 bit only - set(CUDA_BITS 64) - - # CUDA version - execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}") - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}") - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") - - # warn for other versions - if(CUDA_VERSION MATCHES "101") - else() - message(WARNING - "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " - "build may succeed but only CUDA 10.1 is officially supported") - endif() - - # build for each arch - set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu - ${SRC_HEADERS} - ${SRC_KERNELS_CUDA_HEADERS} - ${SRC_BVH_HEADERS} - ${SRC_SVM_HEADERS} - ${SRC_GEOM_HEADERS} - ${SRC_CLOSURE_HEADERS} - ${SRC_UTIL_HEADERS} - ) - set(cuda_filter_sources kernels/cuda/filter.cu - ${SRC_HEADERS} - ${SRC_KERNELS_CUDA_HEADERS} - ${SRC_FILTER_HEADERS} - ${SRC_UTIL_HEADERS} - ) - set(cuda_cubins) - - macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) - set(cuda_cubin ${name}_${arch}.cubin) - - set(kernel_sources ${sources}) - if(NOT ${prev_arch} STREQUAL "none") - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) - endif() - - set(cuda_kernel_src "/kernels/cuda/${name}.cu") - - set(cuda_flags - -D CCL_NAMESPACE_BEGIN= - -D CCL_NAMESPACE_END= - -D NVCC - -m ${CUDA_BITS} - -I ${CMAKE_CURRENT_SOURCE_DIR}/.. - -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda - --use_fast_math - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) - - if(${experimental}) - set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) - set(name ${name}_experimental) - endif() - - if(WITH_CYCLES_DEBUG) - set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) - endif() - - if(WITH_CYCLES_CUBIN_COMPILER) - string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) - - # Needed to find libnvrtc-builtins.so. Can't do it from inside - # cycles_cubin_cc since the env variable is read before main() - if(APPLE) - set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") - elseif(UNIX) - set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") - endif() - - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${CUBIN_CC_ENV} - "$<TARGET_FILE:cycles_cubin_cc>" - -target ${CUDA_ARCH} - -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} - ${cuda_flags} - -v - -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" - DEPENDS ${kernel_sources} cycles_cubin_cc) - else() - add_custom_command( - OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} - -arch=${arch} - ${CUDA_NVCC_FLAGS} - --cubin - ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} - --ptxas-options="-v" - ${cuda_flags} - DEPENDS ${kernel_sources}) - endif() - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) - - unset(cuda_debug_flags) - endmacro() - - set(prev_arch "none") - foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) - if(${arch} MATCHES "sm_2.") - message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.") - elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100) - message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.") - else() - # Compile regular kernel - CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE) - CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE) - - if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) - # Compile split kernel - CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE) - endif() - - if(WITH_CYCLES_CUDA_BUILD_SERIAL) - set(prev_arch ${arch}) - endif() - endif() - endforeach() - - add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) - cycles_set_solution_folder(cycles_kernel_cuda) + # 64 bit only + set(CUDA_BITS 64) + + # CUDA version + execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) + string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}") + string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}") + set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") + + # warn for other versions + if(CUDA_VERSION MATCHES "101") + else() + message(WARNING + "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " + "build may succeed but only CUDA 10.1 is officially supported") + endif() + + # build for each arch + set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu + ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_BVH_HEADERS} + ${SRC_SVM_HEADERS} + ${SRC_GEOM_HEADERS} + ${SRC_CLOSURE_HEADERS} + ${SRC_UTIL_HEADERS} + ) + set(cuda_filter_sources kernels/cuda/filter.cu + ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_FILTER_HEADERS} + ${SRC_UTIL_HEADERS} + ) + set(cuda_cubins) + + macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) + set(cuda_cubin ${name}_${arch}.cubin) + + set(kernel_sources ${sources}) + if(NOT ${prev_arch} STREQUAL "none") + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + endif() + + set(cuda_kernel_src "/kernels/cuda/${name}.cu") + + set(cuda_flags + -D CCL_NAMESPACE_BEGIN= + -D CCL_NAMESPACE_END= + -D NVCC + -m ${CUDA_BITS} + -I ${CMAKE_CURRENT_SOURCE_DIR}/.. + -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda + --use_fast_math + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) + + if(${experimental}) + set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) + set(name ${name}_experimental) + endif() + + if(WITH_CYCLES_DEBUG) + set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__) + endif() + + if(WITH_CYCLES_CUBIN_COMPILER) + string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) + + # Needed to find libnvrtc-builtins.so. Can't do it from inside + # cycles_cubin_cc since the env variable is read before main() + if(APPLE) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") + elseif(UNIX) + set(CUBIN_CC_ENV ${CMAKE_COMMAND} + -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") + endif() + + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUBIN_CC_ENV} + "$<TARGET_FILE:cycles_cubin_cc>" + -target ${CUDA_ARCH} + -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} + ${cuda_flags} + -v + -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" + DEPENDS ${kernel_sources} cycles_cubin_cc) + else() + add_custom_command( + OUTPUT ${cuda_cubin} + COMMAND ${CUDA_NVCC_EXECUTABLE} + -arch=${arch} + ${CUDA_NVCC_FLAGS} + --cubin + ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} + --ptxas-options="-v" + ${cuda_flags} + DEPENDS ${kernel_sources}) + endif() + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_cubin}) + + unset(cuda_debug_flags) + endmacro() + + set(prev_arch "none") + foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) + if(${arch} MATCHES "sm_2.") + message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.") + elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100) + message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.") + else() + # Compile regular kernel + CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE) + CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE) + + if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) + # Compile split kernel + CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE) + endif() + + if(WITH_CYCLES_CUDA_BUILD_SERIAL) + set(prev_arch ${arch}) + endif() + endif() + endforeach() + + add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) + cycles_set_solution_folder(cycles_kernel_cuda) endif() # OSL module if(WITH_CYCLES_OSL) - list(APPEND LIB - cycles_kernel_osl - ) - add_subdirectory(osl) - add_subdirectory(shaders) + list(APPEND LIB + cycles_kernel_osl + ) + add_subdirectory(osl) + add_subdirectory(shaders) endif() # CPU module @@ -491,56 +491,56 @@ set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAG set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") if(CXX_HAS_SSE) - set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX) - set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX2) - set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") - set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") endif() cycles_add_library(cycles_kernel "${LIB}" - ${SRC_CPU_KERNELS} - ${SRC_CUDA_KERNELS} - ${SRC_OPENCL_KERNELS} - ${SRC_HEADERS} - ${SRC_KERNELS_CPU_HEADERS} - ${SRC_KERNELS_CUDA_HEADERS} - ${SRC_KERNELS_OPENCL_HEADERS} - ${SRC_BVH_HEADERS} - ${SRC_CLOSURE_HEADERS} - ${SRC_FILTER_HEADERS} - ${SRC_SVM_HEADERS} - ${SRC_GEOM_HEADERS} - ${SRC_SPLIT_HEADERS} + ${SRC_CPU_KERNELS} + ${SRC_CUDA_KERNELS} + ${SRC_OPENCL_KERNELS} + ${SRC_HEADERS} + ${SRC_KERNELS_CPU_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPENCL_HEADERS} + ${SRC_BVH_HEADERS} + ${SRC_CLOSURE_HEADERS} + ${SRC_FILTER_HEADERS} + ${SRC_SVM_HEADERS} + ${SRC_GEOM_HEADERS} + ${SRC_SPLIT_HEADERS} ) if(WITH_CYCLES_CUDA) - add_dependencies(cycles_kernel cycles_kernel_cuda) + add_dependencies(cycles_kernel cycles_kernel_cuda) endif() # OpenCL kernel #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl) #add_custom_command( -# OUTPUT ${KERNEL_PREPROCESSED} -# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED} -# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS}) +# OUTPUT ${KERNEL_PREPROCESSED} +# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED} +# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS}) #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index e5f807833f3..13e72ed299f 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -57,19 +57,19 @@ CCL_NAMESPACE_BEGIN #if defined(__HAIR__) # define BVH_FUNCTION_NAME bvh_intersect_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH # include "kernel/bvh/bvh_traversal.h" #endif #if defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION # include "kernel/bvh/bvh_traversal.h" #endif #if defined(__HAIR__) && defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_HAIR_MINIMUM_WIDTH | BVH_MOTION # include "kernel/bvh/bvh_traversal.h" #endif @@ -82,10 +82,10 @@ CCL_NAMESPACE_BEGIN # if defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_local_motion -# define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR +# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR # include "kernel/bvh/bvh_local.h" # endif -#endif /* __BVH_LOCAL__ */ +#endif /* __BVH_LOCAL__ */ /* Volume BVH traversal */ @@ -96,16 +96,16 @@ CCL_NAMESPACE_BEGIN # if defined(__INSTANCING__) # define BVH_FUNCTION_NAME bvh_intersect_volume_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR # include "kernel/bvh/bvh_volume.h" # endif # if defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_volume_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR # include "kernel/bvh/bvh_volume.h" # endif -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ /* Record all intersections - Shadow BVH traversal */ @@ -122,22 +122,22 @@ CCL_NAMESPACE_BEGIN # if defined(__HAIR__) # define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR # include "kernel/bvh/bvh_shadow_all.h" # endif # if defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION # include "kernel/bvh/bvh_shadow_all.h" # endif # if defined(__HAIR__) && defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION # include "kernel/bvh/bvh_shadow_all.h" # endif -#endif /* __SHADOW_RECORD_ALL__ */ +#endif /* __SHADOW_RECORD_ALL__ */ /* Record all intersections - Volume BVH traversal */ @@ -148,16 +148,16 @@ CCL_NAMESPACE_BEGIN # if defined(__INSTANCING__) # define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR # include "kernel/bvh/bvh_volume_all.h" # endif # if defined(__OBJECT_MOTION__) # define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion -# define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR +# define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR # include "kernel/bvh/bvh_volume_all.h" # endif -#endif /* __VOLUME_RECORD_ALL__ */ +#endif /* __VOLUME_RECORD_ALL__ */ #undef BVH_FEATURE #undef BVH_NAME_JOIN @@ -166,15 +166,15 @@ CCL_NAMESPACE_BEGIN ccl_device_inline bool scene_intersect_valid(const Ray *ray) { - /* NOTE: Due to some vectorization code non-finite origin point might - * cause lots of false-positive intersections which will overflow traversal - * stack. - * This code is a quick way to perform early output, to avoid crashes in - * such cases. - * From production scenes so far it seems it's enough to test first element - * only. - */ - return isfinite(ray->P.x); + /* NOTE: Due to some vectorization code non-finite origin point might + * cause lots of false-positive intersections which will overflow traversal + * stack. + * This code is a quick way to perform early output, to avoid crashes in + * such cases. + * From production scenes so far it seems it's enough to test first element + * only. + */ + return isfinite(ray->P.x); } /* Note: ray is passed by value to work around a possible CUDA compiler bug. */ @@ -186,59 +186,60 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, float difl, float extmax) { - PROFILING_INIT(kg, PROFILING_INTERSECT); + PROFILING_INIT(kg, PROFILING_INTERSECT); - if(!scene_intersect_valid(&ray)) { - return false; - } + if (!scene_intersect_valid(&ray)) { + return false; + } #ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - isect->t = ray.t; - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); - IntersectContext rtc_ctx(&ctx); - RTCRayHit ray_hit; - kernel_embree_setup_rayhit(ray, ray_hit, visibility); - rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); - if(ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) { - kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect); - return true; - } - return false; - } -#endif /* __EMBREE__ */ + if (kernel_data.bvh.scene) { + isect->t = ray.t; + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); + IntersectContext rtc_ctx(&ctx); + RTCRayHit ray_hit; + kernel_embree_setup_rayhit(ray, ray_hit, visibility); + rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); + if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && + ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) { + kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect); + return true; + } + return false; + } +#endif /* __EMBREE__ */ #ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { + if (kernel_data.bvh.have_motion) { # ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax); -# endif /* __HAIR__ */ + if (kernel_data.bvh.have_curves) + return bvh_intersect_hair_motion(kg, &ray, isect, visibility, lcg_state, difl, extmax); +# endif /* __HAIR__ */ - return bvh_intersect_motion(kg, &ray, isect, visibility); - } -#endif /* __OBJECT_MOTION__ */ + return bvh_intersect_motion(kg, &ray, isect, visibility); + } +#endif /* __OBJECT_MOTION__ */ #ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax); -#endif /* __HAIR__ */ + if (kernel_data.bvh.have_curves) + return bvh_intersect_hair(kg, &ray, isect, visibility, lcg_state, difl, extmax); +#endif /* __HAIR__ */ #ifdef __KERNEL_CPU__ # ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_instancing(kg, &ray, isect, visibility); -# endif /* __INSTANCING__ */ + if (kernel_data.bvh.have_instancing) + return bvh_intersect_instancing(kg, &ray, isect, visibility); +# endif /* __INSTANCING__ */ - return bvh_intersect(kg, &ray, isect, visibility); -#else /* __KERNEL_CPU__ */ + return bvh_intersect(kg, &ray, isect, visibility); +#else /* __KERNEL_CPU__ */ # ifdef __INSTANCING__ - return bvh_intersect_instancing(kg, &ray, isect, visibility); + return bvh_intersect_instancing(kg, &ray, isect, visibility); # else - return bvh_intersect(kg, &ray, isect, visibility); -# endif /* __INSTANCING__ */ + return bvh_intersect(kg, &ray, isect, visibility); +# endif /* __INSTANCING__ */ -#endif /* __KERNEL_CPU__ */ +#endif /* __KERNEL_CPU__ */ } #ifdef __BVH_LOCAL__ @@ -250,77 +251,61 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, uint *lcg_state, int max_hits) { - PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL); + PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL); - if(!scene_intersect_valid(&ray)) { - local_isect->num_hits = 0; - return false; - } -#ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS); - ctx.lcg_state = lcg_state; - ctx.max_hits = max_hits; - ctx.ss_isect = local_isect; - local_isect->num_hits = 0; - ctx.sss_object_id = local_object; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); - - /* Get the Embree scene for this intersection. */ - RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2); - if(geom) { - float3 P = ray.P; - float3 dir = ray.D; - float3 idir = ray.D; - const int object_flag = kernel_tex_fetch(__object_flag, local_object); - if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - Transform ob_itfm; - rtc_ray.tfar = bvh_instance_motion_push(kg, - local_object, - &ray, - &P, - &dir, - &idir, - ray.t, - &ob_itfm); - /* bvh_instance_motion_push() returns the inverse transform but - * it's not needed here. */ - (void) ob_itfm; - - rtc_ray.org_x = P.x; - rtc_ray.org_y = P.y; - rtc_ray.org_z = P.z; - rtc_ray.dir_x = dir.x; - rtc_ray.dir_y = dir.y; - rtc_ray.dir_z = dir.z; - } - RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom); - if(scene) { - rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray); - } - } - - return local_isect->num_hits > 0; - } -#endif /* __EMBREE__ */ -#ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_local_motion(kg, - &ray, - local_isect, - local_object, - lcg_state, - max_hits); - } -#endif /* __OBJECT_MOTION__ */ - return bvh_intersect_local(kg, - &ray, - local_isect, - local_object, - lcg_state, - max_hits); + if (!scene_intersect_valid(&ray)) { + local_isect->num_hits = 0; + return false; + } +# ifdef __EMBREE__ + if (kernel_data.bvh.scene) { + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS); + ctx.lcg_state = lcg_state; + ctx.max_hits = max_hits; + ctx.ss_isect = local_isect; + local_isect->num_hits = 0; + ctx.sss_object_id = local_object; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); + + /* Get the Embree scene for this intersection. */ + RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2); + if (geom) { + float3 P = ray.P; + float3 dir = ray.D; + float3 idir = ray.D; + const int object_flag = kernel_tex_fetch(__object_flag, local_object); + if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + Transform ob_itfm; + rtc_ray.tfar = bvh_instance_motion_push( + kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm); + /* bvh_instance_motion_push() returns the inverse transform but + * it's not needed here. */ + (void)ob_itfm; + + rtc_ray.org_x = P.x; + rtc_ray.org_y = P.y; + rtc_ray.org_z = P.z; + rtc_ray.dir_x = dir.x; + rtc_ray.dir_y = dir.y; + rtc_ray.dir_z = dir.z; + } + RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom); + if (scene) { + rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray); + } + } + + return local_isect->num_hits > 0; + } +# endif /* __EMBREE__ */ +# ifdef __OBJECT_MOTION__ + if (kernel_data.bvh.have_motion) { + return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits); + } +# endif /* __OBJECT_MOTION__ */ + return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits); } #endif @@ -332,82 +317,57 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, uint max_hits, uint *num_hits) { - PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL); + PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL); - if(!scene_intersect_valid(ray)) { - *num_hits = 0; - return false; - } + if (!scene_intersect_valid(ray)) { + *num_hits = 0; + return false; + } # ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); - ctx.isect_s = isect; - ctx.max_hits = max_hits; - ctx.num_hits = 0; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - - if(ctx.num_hits > max_hits) { - return true; - } - *num_hits = ctx.num_hits; - return rtc_ray.tfar == -INFINITY; - } + if (kernel_data.bvh.scene) { + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); + ctx.isect_s = isect; + ctx.max_hits = max_hits; + ctx.num_hits = 0; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_SHADOW); + rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); + + if (ctx.num_hits > max_hits) { + return true; + } + *num_hits = ctx.num_hits; + return rtc_ray.tfar == -INFINITY; + } # endif # ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { + if (kernel_data.bvh.have_motion) { # ifdef __HAIR__ - if(kernel_data.bvh.have_curves) { - return bvh_intersect_shadow_all_hair_motion(kg, - ray, - isect, - visibility, - max_hits, - num_hits); - } -# endif /* __HAIR__ */ - - return bvh_intersect_shadow_all_motion(kg, - ray, - isect, - visibility, - max_hits, - num_hits); - } -# endif /* __OBJECT_MOTION__ */ + if (kernel_data.bvh.have_curves) { + return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, visibility, max_hits, num_hits); + } +# endif /* __HAIR__ */ + + return bvh_intersect_shadow_all_motion(kg, ray, isect, visibility, max_hits, num_hits); + } +# endif /* __OBJECT_MOTION__ */ # ifdef __HAIR__ - if(kernel_data.bvh.have_curves) { - return bvh_intersect_shadow_all_hair(kg, - ray, - isect, - visibility, - max_hits, - num_hits); - } -# endif /* __HAIR__ */ + if (kernel_data.bvh.have_curves) { + return bvh_intersect_shadow_all_hair(kg, ray, isect, visibility, max_hits, num_hits); + } +# endif /* __HAIR__ */ # ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) { - return bvh_intersect_shadow_all_instancing(kg, - ray, - isect, - visibility, - max_hits, - num_hits); - } -# endif /* __INSTANCING__ */ - - return bvh_intersect_shadow_all(kg, - ray, - isect, - visibility, - max_hits, - num_hits); + if (kernel_data.bvh.have_instancing) { + return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits); + } +# endif /* __INSTANCING__ */ + + return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits); } -#endif /* __SHADOW_RECORD_ALL__ */ +#endif /* __SHADOW_RECORD_ALL__ */ #ifdef __VOLUME__ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, @@ -415,31 +375,31 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, Intersection *isect, const uint visibility) { - PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME); + PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME); - if(!scene_intersect_valid(ray)) { - return false; - } + if (!scene_intersect_valid(ray)) { + return false; + } # ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_volume_motion(kg, ray, isect, visibility); - } -# endif /* __OBJECT_MOTION__ */ + if (kernel_data.bvh.have_motion) { + return bvh_intersect_volume_motion(kg, ray, isect, visibility); + } +# endif /* __OBJECT_MOTION__ */ # ifdef __KERNEL_CPU__ # ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_volume_instancing(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ - return bvh_intersect_volume(kg, ray, isect, visibility); -# else /* __KERNEL_CPU__ */ + if (kernel_data.bvh.have_instancing) + return bvh_intersect_volume_instancing(kg, ray, isect, visibility); +# endif /* __INSTANCING__ */ + return bvh_intersect_volume(kg, ray, isect, visibility); +# else /* __KERNEL_CPU__ */ # ifdef __INSTANCING__ - return bvh_intersect_volume_instancing(kg, ray, isect, visibility); + return bvh_intersect_volume_instancing(kg, ray, isect, visibility); # else - return bvh_intersect_volume(kg, ray, isect, visibility); -# endif /* __INSTANCING__ */ -# endif /* __KERNEL_CPU__ */ + return bvh_intersect_volume(kg, ray, isect, visibility); +# endif /* __INSTANCING__ */ +# endif /* __KERNEL_CPU__ */ } -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ #ifdef __VOLUME_RECORD_ALL__ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, @@ -448,37 +408,36 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, const uint max_hits, const uint visibility) { - PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL); + PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL); - if(!scene_intersect_valid(ray)) { - return false; - } + if (!scene_intersect_valid(ray)) { + return false; + } # ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); - ctx.isect_s = isect; - ctx.max_hits = max_hits; - ctx.num_hits = 0; - IntersectContext rtc_ctx(&ctx); - RTCRay rtc_ray; - kernel_embree_setup_ray(*ray, rtc_ray, visibility); - rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); - return rtc_ray.tfar == -INFINITY; - } + if (kernel_data.bvh.scene) { + CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); + ctx.isect_s = isect; + ctx.max_hits = max_hits; + ctx.num_hits = 0; + IntersectContext rtc_ctx(&ctx); + RTCRay rtc_ray; + kernel_embree_setup_ray(*ray, rtc_ray, visibility); + rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); + return rtc_ray.tfar == -INFINITY; + } # endif # ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { - return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); - } -# endif /* __OBJECT_MOTION__ */ + if (kernel_data.bvh.have_motion) { + return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); + } +# endif /* __OBJECT_MOTION__ */ # ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility); -# endif /* __INSTANCING__ */ - return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); + if (kernel_data.bvh.have_instancing) + return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility); +# endif /* __INSTANCING__ */ + return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); } -#endif /* __VOLUME_RECORD_ALL__ */ - +#endif /* __VOLUME_RECORD_ALL__ */ /* Ray offset to avoid self intersection. * @@ -488,48 +447,48 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, ccl_device_inline float3 ray_offset(float3 P, float3 Ng) { #ifdef __INTERSECTION_REFINE__ - const float epsilon_f = 1e-5f; - /* ideally this should match epsilon_f, but instancing and motion blur - * precision makes it problematic */ - const float epsilon_test = 1.0f; - const int epsilon_i = 32; - - float3 res; - - /* x component */ - if(fabsf(P.x) < epsilon_test) { - res.x = P.x + Ng.x*epsilon_f; - } - else { - uint ix = __float_as_uint(P.x); - ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i; - res.x = __uint_as_float(ix); - } - - /* y component */ - if(fabsf(P.y) < epsilon_test) { - res.y = P.y + Ng.y*epsilon_f; - } - else { - uint iy = __float_as_uint(P.y); - iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i; - res.y = __uint_as_float(iy); - } - - /* z component */ - if(fabsf(P.z) < epsilon_test) { - res.z = P.z + Ng.z*epsilon_f; - } - else { - uint iz = __float_as_uint(P.z); - iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i; - res.z = __uint_as_float(iz); - } - - return res; + const float epsilon_f = 1e-5f; + /* ideally this should match epsilon_f, but instancing and motion blur + * precision makes it problematic */ + const float epsilon_test = 1.0f; + const int epsilon_i = 32; + + float3 res; + + /* x component */ + if (fabsf(P.x) < epsilon_test) { + res.x = P.x + Ng.x * epsilon_f; + } + else { + uint ix = __float_as_uint(P.x); + ix += ((ix ^ __float_as_uint(Ng.x)) >> 31) ? -epsilon_i : epsilon_i; + res.x = __uint_as_float(ix); + } + + /* y component */ + if (fabsf(P.y) < epsilon_test) { + res.y = P.y + Ng.y * epsilon_f; + } + else { + uint iy = __float_as_uint(P.y); + iy += ((iy ^ __float_as_uint(Ng.y)) >> 31) ? -epsilon_i : epsilon_i; + res.y = __uint_as_float(iy); + } + + /* z component */ + if (fabsf(P.z) < epsilon_test) { + res.z = P.z + Ng.z * epsilon_f; + } + else { + uint iz = __float_as_uint(P.z); + iz += ((iz ^ __float_as_uint(Ng.z)) >> 31) ? -epsilon_i : epsilon_i; + res.z = __uint_as_float(iz); + } + + return res; #else - const float epsilon_f = 1e-4f; - return P + epsilon_f*Ng; + const float epsilon_f = 1e-4f; + return P + epsilon_f * Ng; #endif } @@ -537,40 +496,40 @@ ccl_device_inline float3 ray_offset(float3 P, float3 Ng) /* ToDo: Move to another file? */ ccl_device int intersections_compare(const void *a, const void *b) { - const Intersection *isect_a = (const Intersection*)a; - const Intersection *isect_b = (const Intersection*)b; - - if(isect_a->t < isect_b->t) - return -1; - else if(isect_a->t > isect_b->t) - return 1; - else - return 0; + const Intersection *isect_a = (const Intersection *)a; + const Intersection *isect_b = (const Intersection *)b; + + if (isect_a->t < isect_b->t) + return -1; + else if (isect_a->t > isect_b->t) + return 1; + else + return 0; } #endif #if defined(__SHADOW_RECORD_ALL__) ccl_device_inline void sort_intersections(Intersection *hits, uint num_hits) { -#ifdef __KERNEL_GPU__ - /* Use bubble sort which has more friendly memory pattern on GPU. */ - bool swapped; - do { - swapped = false; - for(int j = 0; j < num_hits - 1; ++j) { - if(hits[j].t > hits[j + 1].t) { - struct Intersection tmp = hits[j]; - hits[j] = hits[j + 1]; - hits[j + 1] = tmp; - swapped = true; - } - } - --num_hits; - } while(swapped); -#else - qsort(hits, num_hits, sizeof(Intersection), intersections_compare); -#endif +# ifdef __KERNEL_GPU__ + /* Use bubble sort which has more friendly memory pattern on GPU. */ + bool swapped; + do { + swapped = false; + for (int j = 0; j < num_hits - 1; ++j) { + if (hits[j].t > hits[j + 1].t) { + struct Intersection tmp = hits[j]; + hits[j] = hits[j + 1]; + hits[j + 1] = tmp; + swapped = true; + } + } + --num_hits; + } while (swapped); +# else + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); +# endif } -#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */ +#endif /* __SHADOW_RECORD_ALL__ | __VOLUME_RECORD_ALL__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h index bfc911a1e76..661bba54fd4 100644 --- a/intern/cycles/kernel/bvh/bvh_embree.h +++ b/intern/cycles/kernel/bvh/bvh_embree.h @@ -24,103 +24,120 @@ CCL_NAMESPACE_BEGIN -struct CCLIntersectContext { - typedef enum { - RAY_REGULAR = 0, - RAY_SHADOW_ALL = 1, - RAY_SSS = 2, - RAY_VOLUME_ALL = 3, +struct CCLIntersectContext { + typedef enum { + RAY_REGULAR = 0, + RAY_SHADOW_ALL = 1, + RAY_SSS = 2, + RAY_VOLUME_ALL = 3, - } RayType; + } RayType; - KernelGlobals *kg; - RayType type; + KernelGlobals *kg; + RayType type; - /* for shadow rays */ - Intersection *isect_s; - int max_hits; - int num_hits; + /* for shadow rays */ + Intersection *isect_s; + int max_hits; + int num_hits; - /* for SSS Rays: */ - LocalIntersection *ss_isect; - int sss_object_id; - uint *lcg_state; + /* for SSS Rays: */ + LocalIntersection *ss_isect; + int sss_object_id; + uint *lcg_state; - CCLIntersectContext(KernelGlobals *kg_, RayType type_) - { - kg = kg_; - type = type_; - max_hits = 1; - num_hits = 0; - isect_s = NULL; - ss_isect = NULL; - sss_object_id = -1; - lcg_state = NULL; - } + CCLIntersectContext(KernelGlobals *kg_, RayType type_) + { + kg = kg_; + type = type_; + max_hits = 1; + num_hits = 0; + isect_s = NULL; + ss_isect = NULL; + sss_object_id = -1; + lcg_state = NULL; + } }; -class IntersectContext -{ -public: - IntersectContext(CCLIntersectContext* ctx) - { - rtcInitIntersectContext(&context); - userRayExt = ctx; - } - RTCIntersectContext context; - CCLIntersectContext* userRayExt; +class IntersectContext { + public: + IntersectContext(CCLIntersectContext *ctx) + { + rtcInitIntersectContext(&context); + userRayExt = ctx; + } + RTCIntersectContext context; + CCLIntersectContext *userRayExt; }; -ccl_device_inline void kernel_embree_setup_ray(const Ray& ray, RTCRay& rtc_ray, const uint visibility) +ccl_device_inline void kernel_embree_setup_ray(const Ray &ray, + RTCRay &rtc_ray, + const uint visibility) { - rtc_ray.org_x = ray.P.x; - rtc_ray.org_y = ray.P.y; - rtc_ray.org_z = ray.P.z; - rtc_ray.dir_x = ray.D.x; - rtc_ray.dir_y = ray.D.y; - rtc_ray.dir_z = ray.D.z; - rtc_ray.tnear = 0.0f; - rtc_ray.tfar = ray.t; - rtc_ray.time = ray.time; - rtc_ray.mask = visibility; + rtc_ray.org_x = ray.P.x; + rtc_ray.org_y = ray.P.y; + rtc_ray.org_z = ray.P.z; + rtc_ray.dir_x = ray.D.x; + rtc_ray.dir_y = ray.D.y; + rtc_ray.dir_z = ray.D.z; + rtc_ray.tnear = 0.0f; + rtc_ray.tfar = ray.t; + rtc_ray.time = ray.time; + rtc_ray.mask = visibility; } -ccl_device_inline void kernel_embree_setup_rayhit(const Ray& ray, RTCRayHit& rayhit, const uint visibility) +ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray, + RTCRayHit &rayhit, + const uint visibility) { - kernel_embree_setup_ray(ray, rayhit.ray, visibility); - rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; - rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID; + kernel_embree_setup_ray(ray, rayhit.ray, visibility); + rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID; + rayhit.hit.primID = RTC_INVALID_GEOMETRY_ID; } -ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect) +ccl_device_inline void kernel_embree_convert_hit(KernelGlobals *kg, + const RTCRay *ray, + const RTCHit *hit, + Intersection *isect) { - bool is_hair = hit->geomID & 1; - isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u; - isect->v = is_hair ? hit->v : hit->u; - isect->t = ray->tfar; - isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); - if(hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); - isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, hit->instID[0]/2); - isect->object = hit->instID[0]/2; - } - else { - isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); - isect->object = OBJECT_NONE; - } - isect->type = kernel_tex_fetch(__prim_type, isect->prim); + bool is_hair = hit->geomID & 1; + isect->u = is_hair ? hit->u : 1.0f - hit->v - hit->u; + isect->v = is_hair ? hit->v : hit->u; + isect->t = ray->tfar; + isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); + if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) { + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0])); + isect->prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + + kernel_tex_fetch(__object_node, hit->instID[0] / 2); + isect->object = hit->instID[0] / 2; + } + else { + isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.bvh.scene, hit->geomID)); + isect->object = OBJECT_NONE; + } + isect->type = kernel_tex_fetch(__prim_type, isect->prim); } -ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int local_object_id) +ccl_device_inline void kernel_embree_convert_local_hit(KernelGlobals *kg, + const RTCRay *ray, + const RTCHit *hit, + Intersection *isect, + int local_object_id) { - isect->u = 1.0f - hit->v - hit->u; - isect->v = hit->u; - isect->t = ray->tfar; - isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); - RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2)); - isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + kernel_tex_fetch(__object_node, local_object_id); - isect->object = local_object_id; - isect->type = kernel_tex_fetch(__prim_type, isect->prim); + isect->u = 1.0f - hit->v - hit->u; + isect->v = hit->u; + isect->t = ray->tfar; + isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z); + RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData( + rtcGetGeometry(kernel_data.bvh.scene, local_object_id * 2)); + isect->prim = hit->primID + + (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID)) + + kernel_tex_fetch(__object_node, local_object_id); + isect->object = local_object_id; + isect->type = kernel_tex_fetch(__prim_type, isect->prim); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/bvh/bvh_local.h b/intern/cycles/kernel/bvh/bvh_local.h index 3bdc9293a6c..7a069ef1108 100644 --- a/intern/cycles/kernel/bvh/bvh_local.h +++ b/intern/cycles/kernel/bvh/bvh_local.h @@ -43,208 +43,201 @@ ccl_device #else ccl_device_inline #endif -bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - LocalIntersection *local_isect, - int local_object, - uint *lcg_state, - int max_hits) + bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + LocalIntersection *local_isect, + int local_object, + uint *lcg_state, + int max_hits) { - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ - /* traversal stack in CUDA thread-local memory */ - int traversal_stack[BVH_STACK_SIZE]; - traversal_stack[0] = ENTRYPOINT_SENTINEL; + /* traversal stack in CUDA thread-local memory */ + int traversal_stack[BVH_STACK_SIZE]; + traversal_stack[0] = ENTRYPOINT_SENTINEL; - /* traversal variables in registers */ - int stack_ptr = 0; - int node_addr = kernel_tex_fetch(__object_node, local_object); + /* traversal variables in registers */ + int stack_ptr = 0; + int node_addr = kernel_tex_fetch(__object_node, local_object); - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = ray->t; + /* ray parameters in registers */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = ray->t; - if(local_isect != NULL) { - local_isect->num_hits = 0; - } - kernel_assert((local_isect == NULL) == (max_hits == 0)); + if (local_isect != NULL) { + local_isect->num_hits = 0; + } + kernel_assert((local_isect == NULL) == (max_hits == 0)); - const int object_flag = kernel_tex_fetch(__object_flag, local_object); - if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + const int object_flag = kernel_tex_fetch(__object_flag, local_object); + if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - isect_t = bvh_instance_motion_push(kg, - local_object, - ray, - &P, - &dir, - &idir, - isect_t, - &ob_itfm); + Transform ob_itfm; + isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm); #else - isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t); #endif - object = local_object; - } + object = local_object; + } #if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; # if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect_t); + ssef tnear(0.0f), tfar(isect_t); # endif - shuffle_swap_t shufflexyz[3]; + shuffle_swap_t shufflexyz[3]; - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - int node_addr_child1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + int node_addr_child1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, + traverse_mask = NODE_INTERSECT(kg, + P, # if BVH_FEATURE(BVH_HAIR) - dir, + dir, # endif - idir, - isect_t, - node_addr, - PATH_RAY_ALL_VISIBILITY, - dist); + idir, + isect_t, + node_addr, + PATH_RAY_ALL_VISIBILITY, + dist); #else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, + traverse_mask = NODE_INTERSECT(kg, + P, + dir, # if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, + tnear, + tfar, # endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - node_addr, - PATH_RAY_ALL_VISIBILITY, - dist); + tsplat, + Psplat, + idirsplat, + shufflexyz, + node_addr, + PATH_RAY_ALL_VISIBILITY, + dist); #endif // __KERNEL_SSE2__ - node_addr = __float_as_int(cnodes.z); - node_addr_child1 = __float_as_int(cnodes.w); + node_addr = __float_as_int(cnodes.z); + node_addr_child1 = __float_as_int(cnodes.w); - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool is_closest_child1 = (dist[1] < dist[0]); - if(is_closest_child1) { - int tmp = node_addr; - node_addr = node_addr_child1; - node_addr_child1 = tmp; - } + if (traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool is_closest_child1 = (dist[1] < dist[0]); + if (is_closest_child1) { + int tmp = node_addr; + node_addr = node_addr_child1; + node_addr_child1 = tmp; + } - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = node_addr_child1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - node_addr = node_addr_child1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } - } - } + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = node_addr_child1; + } + else { + /* One child was intersected. */ + if (traverse_mask == 2) { + node_addr = node_addr_child1; + } + else if (traverse_mask == 0) { + /* Neither child was intersected. */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } + } + } - /* if node is leaf, fetch triangle list */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - int prim_addr = __float_as_int(leaf.x); + /* if node is leaf, fetch triangle list */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + int prim_addr = __float_as_int(leaf.x); - const int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); + const int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); - /* pop */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; + /* pop */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(triangle_intersect_local(kg, - local_isect, - P, - dir, - object, - local_object, - prim_addr, - isect_t, - lcg_state, - max_hits)) { - return true; - } - } - break; - } + /* primitive intersection */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (triangle_intersect_local(kg, + local_isect, + P, + dir, + object, + local_object, + prim_addr, + isect_t, + lcg_state, + max_hits)) { + return true; + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(motion_triangle_intersect_local(kg, - local_isect, - P, - dir, - ray->time, - object, - local_object, - prim_addr, - isect_t, - lcg_state, - max_hits)) { - return true; - } - } - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (motion_triangle_intersect_local(kg, + local_isect, + P, + dir, + ray->time, + object, + local_object, + prim_addr, + isect_t, + lcg_state, + max_hits)) { + return true; + } + } + break; + } #endif - default: { - break; - } - } - } - } while(node_addr != ENTRYPOINT_SENTINEL); - } while(node_addr != ENTRYPOINT_SENTINEL); + default: { + break; + } + } + } + } while (node_addr != ENTRYPOINT_SENTINEL); + } while (node_addr != ENTRYPOINT_SENTINEL); - return false; + return false; } ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, @@ -254,35 +247,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, uint *lcg_state, int max_hits) { - switch(kernel_data.bvh.bvh_layout) { + switch (kernel_data.bvh.bvh_layout) { #ifdef __KERNEL_AVX2__ - case BVH_LAYOUT_BVH8: - return BVH_FUNCTION_FULL_NAME(OBVH)(kg, - ray, - local_isect, - local_object, - lcg_state, - max_hits); + case BVH_LAYOUT_BVH8: + return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits); #endif #ifdef __QBVH__ - case BVH_LAYOUT_BVH4: - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - local_isect, - local_object, - lcg_state, - max_hits); + case BVH_LAYOUT_BVH4: + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, local_isect, local_object, lcg_state, max_hits); #endif - case BVH_LAYOUT_BVH2: - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - local_isect, - local_object, - lcg_state, - max_hits); - } - kernel_assert(!"Should not happen"); - return false; + case BVH_LAYOUT_BVH2: + return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, local_isect, local_object, lcg_state, max_hits); + } + kernel_assert(!"Should not happen"); + return false; } #undef BVH_FUNCTION_NAME diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h index 060b3934a41..042630121c8 100644 --- a/intern/cycles/kernel/bvh/bvh_nodes.h +++ b/intern/cycles/kernel/bvh/bvh_nodes.h @@ -20,12 +20,12 @@ ccl_device_forceinline Transform bvh_unaligned_node_fetch_space(KernelGlobals *k int node_addr, int child) { - Transform space; - const int child_addr = node_addr + child * 3; - space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1); - space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2); - space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3); - return space; + Transform space; + const int child_addr = node_addr + child * 3; + space.x = kernel_tex_fetch(__bvh_nodes, child_addr + 1); + space.y = kernel_tex_fetch(__bvh_nodes, child_addr + 2); + space.z = kernel_tex_fetch(__bvh_nodes, child_addr + 3); + return space; } #if !defined(__KERNEL_SSE2__) @@ -38,42 +38,41 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg, float dist[2]) { - /* fetch node data */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1); - float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2); - float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3); - - /* intersect ray against child nodes */ - float c0lox = (node0.x - P.x) * idir.x; - float c0hix = (node0.z - P.x) * idir.x; - float c0loy = (node1.x - P.y) * idir.y; - float c0hiy = (node1.z - P.y) * idir.y; - float c0loz = (node2.x - P.z) * idir.z; - float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); - float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); - - float c1lox = (node0.y - P.x) * idir.x; - float c1hix = (node0.w - P.x) * idir.x; - float c1loy = (node1.y - P.y) * idir.y; - float c1hiy = (node1.w - P.y) * idir.y; - float c1loz = (node2.y - P.z) * idir.z; - float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); - float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); - - dist[0] = c0min; - dist[1] = c1min; - -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); -#else - return ((c0max >= c0min)? 1: 0) | - ((c1max >= c1min)? 2: 0); -#endif + /* fetch node data */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1); + float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2); + float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3); + + /* intersect ray against child nodes */ + float c0lox = (node0.x - P.x) * idir.x; + float c0hix = (node0.z - P.x) * idir.x; + float c0loy = (node1.x - P.y) * idir.y; + float c0hiy = (node1.z - P.y) * idir.y; + float c0loz = (node2.x - P.z) * idir.z; + float c0hiz = (node2.z - P.z) * idir.z; + float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); + float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); + + float c1lox = (node0.y - P.x) * idir.x; + float c1hix = (node0.w - P.x) * idir.x; + float c1loy = (node1.y - P.y) * idir.y; + float c1hiy = (node1.w - P.y) * idir.y; + float c1loz = (node2.y - P.z) * idir.z; + float c1hiz = (node2.w - P.z) * idir.z; + float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); + float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); + + dist[0] = c0min; + dist[1] = c1min; + +# ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | + (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); +# else + return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0); +# endif } ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, @@ -87,118 +86,115 @@ ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, float dist[2]) { - /* fetch node data */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr+1); - float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr+2); - float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr+3); - - /* intersect ray against child nodes */ - float c0lox = (node0.x - P.x) * idir.x; - float c0hix = (node0.z - P.x) * idir.x; - float c0loy = (node1.x - P.y) * idir.y; - float c0hiy = (node1.z - P.y) * idir.y; - float c0loz = (node2.x - P.z) * idir.z; - float c0hiz = (node2.z - P.z) * idir.z; - float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); - float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); - - float c1lox = (node0.y - P.x) * idir.x; - float c1hix = (node0.w - P.x) * idir.x; - float c1loy = (node1.y - P.y) * idir.y; - float c1hiy = (node1.w - P.y) * idir.y; - float c1loz = (node2.y - P.z) * idir.z; - float c1hiz = (node2.w - P.z) * idir.z; - float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); - float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); - - if(difl != 0.0f) { - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } - - dist[0] = c0min; - dist[1] = c1min; - -#ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); -#else - return ((c0max >= c0min)? 1: 0) | - ((c1max >= c1min)? 2: 0); -#endif + /* fetch node data */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1); + float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2); + float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3); + + /* intersect ray against child nodes */ + float c0lox = (node0.x - P.x) * idir.x; + float c0hix = (node0.z - P.x) * idir.x; + float c0loy = (node1.x - P.y) * idir.y; + float c0hiy = (node1.z - P.y) * idir.y; + float c0loz = (node2.x - P.z) * idir.z; + float c0hiz = (node2.z - P.z) * idir.z; + float c0min = max4(0.0f, min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz)); + float c0max = min4(t, max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz)); + + float c1lox = (node0.y - P.x) * idir.x; + float c1hix = (node0.w - P.x) * idir.x; + float c1loy = (node1.y - P.y) * idir.y; + float c1hiy = (node1.w - P.y) * idir.y; + float c1loz = (node2.y - P.z) * idir.z; + float c1hiz = (node2.w - P.z) * idir.z; + float c1min = max4(0.0f, min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz)); + float c1max = min4(t, max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz)); + + if (difl != 0.0f) { + float hdiff = 1.0f + difl; + float ldiff = 1.0f - difl; + if (__float_as_int(cnodes.z) & PATH_RAY_CURVE) { + c0min = max(ldiff * c0min, c0min - extmax); + c0max = min(hdiff * c0max, c0max + extmax); + } + if (__float_as_int(cnodes.w) & PATH_RAY_CURVE) { + c1min = max(ldiff * c1min, c1min - extmax); + c1max = min(hdiff * c1max, c1max + extmax); + } + } + + dist[0] = c0min; + dist[1] = c1min; + +# ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | + (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); +# else + return ((c0max >= c0min) ? 1 : 0) | ((c1max >= c1min) ? 2 : 0); +# endif } -ccl_device_forceinline bool bvh_unaligned_node_intersect_child( - KernelGlobals *kg, - const float3 P, - const float3 dir, - const float t, - int node_addr, - int child, - float dist[2]) +ccl_device_forceinline bool bvh_unaligned_node_intersect_child(KernelGlobals *kg, + const float3 P, + const float3 dir, + const float t, + int node_addr, + int child, + float dist[2]) { - Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child); - float3 aligned_dir = transform_direction(&space, dir); - float3 aligned_P = transform_point(&space, P); - float3 nrdir = -bvh_inverse_direction(aligned_dir); - float3 lower_xyz = aligned_P * nrdir; - float3 upper_xyz = lower_xyz - nrdir; - const float near_x = min(lower_xyz.x, upper_xyz.x); - const float near_y = min(lower_xyz.y, upper_xyz.y); - const float near_z = min(lower_xyz.z, upper_xyz.z); - const float far_x = max(lower_xyz.x, upper_xyz.x); - const float far_y = max(lower_xyz.y, upper_xyz.y); - const float far_z = max(lower_xyz.z, upper_xyz.z); - const float tnear = max4(0.0f, near_x, near_y, near_z); - const float tfar = min4(t, far_x, far_y, far_z); - *dist = tnear; - return tnear <= tfar; + Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child); + float3 aligned_dir = transform_direction(&space, dir); + float3 aligned_P = transform_point(&space, P); + float3 nrdir = -bvh_inverse_direction(aligned_dir); + float3 lower_xyz = aligned_P * nrdir; + float3 upper_xyz = lower_xyz - nrdir; + const float near_x = min(lower_xyz.x, upper_xyz.x); + const float near_y = min(lower_xyz.y, upper_xyz.y); + const float near_z = min(lower_xyz.z, upper_xyz.z); + const float far_x = max(lower_xyz.x, upper_xyz.x); + const float far_y = max(lower_xyz.y, upper_xyz.y); + const float far_z = max(lower_xyz.z, upper_xyz.z); + const float tnear = max4(0.0f, near_x, near_y, near_z); + const float tfar = min4(t, far_x, far_y, far_z); + *dist = tnear; + return tnear <= tfar; } -ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust( - KernelGlobals *kg, - const float3 P, - const float3 dir, - const float t, - const float difl, - int node_addr, - int child, - float dist[2]) +ccl_device_forceinline bool bvh_unaligned_node_intersect_child_robust(KernelGlobals *kg, + const float3 P, + const float3 dir, + const float t, + const float difl, + int node_addr, + int child, + float dist[2]) { - Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child); - float3 aligned_dir = transform_direction(&space, dir); - float3 aligned_P = transform_point(&space, P); - float3 nrdir = -bvh_inverse_direction(aligned_dir); - float3 tLowerXYZ = aligned_P * nrdir; - float3 tUpperXYZ = tLowerXYZ - nrdir; - const float near_x = min(tLowerXYZ.x, tUpperXYZ.x); - const float near_y = min(tLowerXYZ.y, tUpperXYZ.y); - const float near_z = min(tLowerXYZ.z, tUpperXYZ.z); - const float far_x = max(tLowerXYZ.x, tUpperXYZ.x); - const float far_y = max(tLowerXYZ.y, tUpperXYZ.y); - const float far_z = max(tLowerXYZ.z, tUpperXYZ.z); - const float tnear = max4(0.0f, near_x, near_y, near_z); - const float tfar = min4(t, far_x, far_y, far_z); - *dist = tnear; - if(difl != 0.0f) { - /* TODO(sergey): Same as for QBVH, needs a proper use. */ - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - return round_down*tnear <= round_up*tfar; - } - else { - return tnear <= tfar; - } + Transform space = bvh_unaligned_node_fetch_space(kg, node_addr, child); + float3 aligned_dir = transform_direction(&space, dir); + float3 aligned_P = transform_point(&space, P); + float3 nrdir = -bvh_inverse_direction(aligned_dir); + float3 tLowerXYZ = aligned_P * nrdir; + float3 tUpperXYZ = tLowerXYZ - nrdir; + const float near_x = min(tLowerXYZ.x, tUpperXYZ.x); + const float near_y = min(tLowerXYZ.y, tUpperXYZ.y); + const float near_z = min(tLowerXYZ.z, tUpperXYZ.z); + const float far_x = max(tLowerXYZ.x, tUpperXYZ.x); + const float far_y = max(tLowerXYZ.y, tUpperXYZ.y); + const float far_z = max(tLowerXYZ.z, tUpperXYZ.z); + const float tnear = max4(0.0f, near_x, near_y, near_z); + const float tfar = min4(t, far_x, far_y, far_z); + *dist = tnear; + if (difl != 0.0f) { + /* TODO(sergey): Same as for QBVH, needs a proper use. */ + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + return round_down * tnear <= round_up * tfar; + } + else { + return tnear <= tfar; + } } ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, @@ -210,25 +206,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, const uint visibility, float dist[2]) { - int mask = 0; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.x) & visibility)) -#endif - { - mask |= 1; - } - } - if(bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.y) & visibility)) -#endif - { - mask |= 2; - } - } - return mask; + int mask = 0; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) { +# ifdef __VISIBILITY_FLAG__ + if ((__float_as_uint(cnodes.x) & visibility)) +# endif + { + mask |= 1; + } + } + if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 1, &dist[1])) { +# ifdef __VISIBILITY_FLAG__ + if ((__float_as_uint(cnodes.y) & visibility)) +# endif + { + mask |= 2; + } + } + return mask; } ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, @@ -242,25 +238,25 @@ ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg const uint visibility, float dist[2]) { - int mask = 0; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.x) & visibility)) -#endif - { - mask |= 1; - } - } - if(bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) { -#ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(cnodes.y) & visibility)) -#endif - { - mask |= 2; - } - } - return mask; + int mask = 0; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 0, &dist[0])) { +# ifdef __VISIBILITY_FLAG__ + if ((__float_as_uint(cnodes.x) & visibility)) +# endif + { + mask |= 1; + } + } + if (bvh_unaligned_node_intersect_child_robust(kg, P, dir, t, difl, node_addr, 1, &dist[1])) { +# ifdef __VISIBILITY_FLAG__ + if ((__float_as_uint(cnodes.y) & visibility)) +# endif + { + mask |= 2; + } + } + return mask; } ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, @@ -272,26 +268,13 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, const uint visibility, float dist[2]) { - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect(kg, - P, - dir, - idir, - t, - node_addr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect(kg, - P, - idir, - t, - node_addr, - visibility, - dist); - } + float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect(kg, P, dir, idir, t, node_addr, visibility, dist); + } + else { + return bvh_aligned_node_intersect(kg, P, idir, t, node_addr, visibility, dist); + } } ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, @@ -305,279 +288,244 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, const uint visibility, float dist[2]) { - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect_robust(kg, - P, - dir, - idir, - t, - difl, - extmax, - node_addr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect_robust(kg, - P, - idir, - t, - difl, - extmax, - node_addr, - visibility, - dist); - } + float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect_robust( + kg, P, dir, idir, t, difl, extmax, node_addr, visibility, dist); + } + else { + return bvh_aligned_node_intersect_robust( + kg, P, idir, t, difl, extmax, node_addr, visibility, dist); + } } -#else /* !defined(__KERNEL_SSE2__) */ - -int ccl_device_forceinline bvh_aligned_node_intersect( - KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const int node_addr, - const uint visibility, - float dist[2]) +#else /* !defined(__KERNEL_SSE2__) */ + +int ccl_device_forceinline bvh_aligned_node_intersect(KernelGlobals *kg, + const float3 &P, + const float3 &dir, + const ssef &tsplat, + const ssef Psplat[3], + const ssef idirsplat[3], + const shuffle_swap_t shufflexyz[3], + const int node_addr, + const uint visibility, + float dist[2]) { - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + node_addr; + /* fetch node data */ + const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + node_addr; - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - dist[0] = tminmax[0]; - dist[1] = tminmax[1]; + dist[0] = tminmax[0]; + dist[1] = tminmax[1]; - int mask = movemask(lrhit); + int mask = movemask(lrhit); # ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); + return cmask; # else - return mask & 3; + return mask & 3; # endif } -ccl_device_forceinline int bvh_aligned_node_intersect_robust( - KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& tsplat, - const ssef Psplat[3], - const ssef idirsplat[3], - const shuffle_swap_t shufflexyz[3], - const float difl, - const float extmax, - const int nodeAddr, - const uint visibility, - float dist[2]) +ccl_device_forceinline int bvh_aligned_node_intersect_robust(KernelGlobals *kg, + const float3 &P, + const float3 &dir, + const ssef &tsplat, + const ssef Psplat[3], + const ssef idirsplat[3], + const shuffle_swap_t shufflexyz[3], + const float difl, + const float extmax, + const int nodeAddr, + const uint visibility, + float dist[2]) { - /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - - /* fetch node data */ - const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr; - - /* intersect ray against child nodes */ - const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; - const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; - const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; - - /* calculate { c0min, c1min, -c0max, -c1max} */ - ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); - const ssef tminmax = minmax ^ pn; - - if(difl != 0.0f) { - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - float4 *tminmaxview = (float4*)&tminmax; - float& c0min = tminmaxview->x, &c1min = tminmaxview->y; - float& c0max = tminmaxview->z, &c1max = tminmaxview->w; - float hdiff = 1.0f + difl; - float ldiff = 1.0f - difl; - if(__float_as_int(cnodes.x) & PATH_RAY_CURVE) { - c0min = max(ldiff * c0min, c0min - extmax); - c0max = min(hdiff * c0max, c0max + extmax); - } - if(__float_as_int(cnodes.y) & PATH_RAY_CURVE) { - c1min = max(ldiff * c1min, c1min - extmax); - c1max = min(hdiff * c1max, c1max + extmax); - } - } - - const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); - - dist[0] = tminmax[0]; - dist[1] = tminmax[1]; - - int mask = movemask(lrhit); + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + + /* fetch node data */ + const ssef *bvh_nodes = (ssef *)kg->__bvh_nodes.data + nodeAddr; + + /* intersect ray against child nodes */ + const ssef tminmaxx = (shuffle_swap(bvh_nodes[1], shufflexyz[0]) - Psplat[0]) * idirsplat[0]; + const ssef tminmaxy = (shuffle_swap(bvh_nodes[2], shufflexyz[1]) - Psplat[1]) * idirsplat[1]; + const ssef tminmaxz = (shuffle_swap(bvh_nodes[3], shufflexyz[2]) - Psplat[2]) * idirsplat[2]; + + /* calculate { c0min, c1min, -c0max, -c1max} */ + ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat)); + const ssef tminmax = minmax ^ pn; + + if (difl != 0.0f) { + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0); + float4 *tminmaxview = (float4 *)&tminmax; + float &c0min = tminmaxview->x, &c1min = tminmaxview->y; + float &c0max = tminmaxview->z, &c1max = tminmaxview->w; + float hdiff = 1.0f + difl; + float ldiff = 1.0f - difl; + if (__float_as_int(cnodes.x) & PATH_RAY_CURVE) { + c0min = max(ldiff * c0min, c0min - extmax); + c0max = min(hdiff * c0max, c0max + extmax); + } + if (__float_as_int(cnodes.y) & PATH_RAY_CURVE) { + c1min = max(ldiff * c1min, c1min - extmax); + c1max = min(hdiff * c1max, c1max + extmax); + } + } + + const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax); + + dist[0] = tminmax[0]; + dist[1] = tminmax[1]; + + int mask = movemask(lrhit); # ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr + 0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); + return cmask; # else - return mask & 3; + return mask & 3; # endif } ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, const float3 P, const float3 dir, - const ssef& isect_near, - const ssef& isect_far, + const ssef &isect_near, + const ssef &isect_far, const int node_addr, const uint visibility, float dist[2]) { - Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0); - Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1); - - float3 aligned_dir0 = transform_direction(&space0, dir), - aligned_dir1 = transform_direction(&space1, dir); - float3 aligned_P0 = transform_point(&space0, P), - aligned_P1 = transform_point(&space1, P); - float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), - nrdir1 = -bvh_inverse_direction(aligned_dir1); - - ssef lower_x = ssef(aligned_P0.x * nrdir0.x, - aligned_P1.x * nrdir1.x, - 0.0f, 0.0f), - lower_y = ssef(aligned_P0.y * nrdir0.y, - aligned_P1.y * nrdir1.y, - 0.0f, - 0.0f), - lower_z = ssef(aligned_P0.z * nrdir0.z, - aligned_P1.z * nrdir1.z, - 0.0f, - 0.0f); - - ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), - upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), - upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); - - ssef tnear_x = min(lower_x, upper_x); - ssef tnear_y = min(lower_y, upper_y); - ssef tnear_z = min(lower_z, upper_z); - ssef tfar_x = max(lower_x, upper_x); - ssef tfar_y = max(lower_y, upper_y); - ssef tfar_z = max(lower_z, upper_z); - - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - sseb vmask = tnear <= tfar; - dist[0] = tnear.f[0]; - dist[1] = tnear.f[1]; - - int mask = (int)movemask(vmask); + Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0); + Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1); + + float3 aligned_dir0 = transform_direction(&space0, dir), + aligned_dir1 = transform_direction(&space1, dir); + float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P); + float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), + nrdir1 = -bvh_inverse_direction(aligned_dir1); + + ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f), + lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f), + lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f); + + ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), + upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), + upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); + + ssef tnear_x = min(lower_x, upper_x); + ssef tnear_y = min(lower_y, upper_y); + ssef tnear_z = min(lower_z, upper_z); + ssef tfar_x = max(lower_x, upper_x); + ssef tfar_y = max(lower_y, upper_y); + ssef tfar_z = max(lower_z, upper_z); + + const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + sseb vmask = tnear <= tfar; + dist[0] = tnear.f[0]; + dist[1] = tnear.f[1]; + + int mask = (int)movemask(vmask); # ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); + return cmask; # else - return mask & 3; + return mask & 3; # endif } ccl_device_forceinline int bvh_unaligned_node_intersect_robust(KernelGlobals *kg, const float3 P, const float3 dir, - const ssef& isect_near, - const ssef& isect_far, + const ssef &isect_near, + const ssef &isect_far, const float difl, const int node_addr, const uint visibility, float dist[2]) { - Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0); - Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1); - - float3 aligned_dir0 = transform_direction(&space0, dir), - aligned_dir1 = transform_direction(&space1, dir); - float3 aligned_P0 = transform_point(&space0, P), - aligned_P1 = transform_point(&space1, P); - float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), - nrdir1 = -bvh_inverse_direction(aligned_dir1); - - ssef lower_x = ssef(aligned_P0.x * nrdir0.x, - aligned_P1.x * nrdir1.x, - 0.0f, 0.0f), - lower_y = ssef(aligned_P0.y * nrdir0.y, - aligned_P1.y * nrdir1.y, - 0.0f, - 0.0f), - lower_z = ssef(aligned_P0.z * nrdir0.z, - aligned_P1.z * nrdir1.z, - 0.0f, - 0.0f); - - ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), - upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), - upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); - - ssef tnear_x = min(lower_x, upper_x); - ssef tnear_y = min(lower_y, upper_y); - ssef tnear_z = min(lower_z, upper_z); - ssef tfar_x = max(lower_x, upper_x); - ssef tfar_y = max(lower_y, upper_y); - ssef tfar_z = max(lower_z, upper_z); - - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - sseb vmask; - if(difl != 0.0f) { - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - vmask = round_down*tnear <= round_up*tfar; - } - else { - vmask = tnear <= tfar; - } - - dist[0] = tnear.f[0]; - dist[1] = tnear.f[1]; - - int mask = (int)movemask(vmask); + Transform space0 = bvh_unaligned_node_fetch_space(kg, node_addr, 0); + Transform space1 = bvh_unaligned_node_fetch_space(kg, node_addr, 1); + + float3 aligned_dir0 = transform_direction(&space0, dir), + aligned_dir1 = transform_direction(&space1, dir); + float3 aligned_P0 = transform_point(&space0, P), aligned_P1 = transform_point(&space1, P); + float3 nrdir0 = -bvh_inverse_direction(aligned_dir0), + nrdir1 = -bvh_inverse_direction(aligned_dir1); + + ssef lower_x = ssef(aligned_P0.x * nrdir0.x, aligned_P1.x * nrdir1.x, 0.0f, 0.0f), + lower_y = ssef(aligned_P0.y * nrdir0.y, aligned_P1.y * nrdir1.y, 0.0f, 0.0f), + lower_z = ssef(aligned_P0.z * nrdir0.z, aligned_P1.z * nrdir1.z, 0.0f, 0.0f); + + ssef upper_x = lower_x - ssef(nrdir0.x, nrdir1.x, 0.0f, 0.0f), + upper_y = lower_y - ssef(nrdir0.y, nrdir1.y, 0.0f, 0.0f), + upper_z = lower_z - ssef(nrdir0.z, nrdir1.z, 0.0f, 0.0f); + + ssef tnear_x = min(lower_x, upper_x); + ssef tnear_y = min(lower_y, upper_y); + ssef tnear_z = min(lower_z, upper_z); + ssef tfar_x = max(lower_x, upper_x); + ssef tfar_y = max(lower_y, upper_y); + ssef tfar_z = max(lower_z, upper_z); + + const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + sseb vmask; + if (difl != 0.0f) { + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + vmask = round_down * tnear <= round_up * tfar; + } + else { + vmask = tnear <= tfar; + } + + dist[0] = tnear.f[0]; + dist[1] = tnear.f[1]; + + int mask = (int)movemask(vmask); # ifdef __VISIBILITY_FLAG__ - /* this visibility test gives a 5% performance hit, how to solve? */ - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) | - (((mask & 2) && (__float_as_uint(cnodes.y) & visibility))? 2: 0); - return cmask; + /* this visibility test gives a 5% performance hit, how to solve? */ + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + int cmask = (((mask & 1) && (__float_as_uint(cnodes.x) & visibility)) ? 1 : 0) | + (((mask & 2) && (__float_as_uint(cnodes.y) & visibility)) ? 2 : 0); + return cmask; # else - return mask & 3; + return mask & 3; # endif } ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& isect_near, - const ssef& isect_far, - const ssef& tsplat, + const float3 &P, + const float3 &dir, + const ssef &isect_near, + const ssef &isect_far, + const ssef &tsplat, const ssef Psplat[3], const ssef idirsplat[3], const shuffle_swap_t shufflexyz[3], @@ -585,37 +533,23 @@ ccl_device_forceinline int bvh_node_intersect(KernelGlobals *kg, const uint visibility, float dist[2]) { - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect(kg, - P, - dir, - isect_near, - isect_far, - node_addr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect(kg, - P, - dir, - tsplat, - Psplat, - idirsplat, - shufflexyz, - node_addr, - visibility, - dist); - } + float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect( + kg, P, dir, isect_near, isect_far, node_addr, visibility, dist); + } + else { + return bvh_aligned_node_intersect( + kg, P, dir, tsplat, Psplat, idirsplat, shufflexyz, node_addr, visibility, dist); + } } ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, - const float3& P, - const float3& dir, - const ssef& isect_near, - const ssef& isect_far, - const ssef& tsplat, + const float3 &P, + const float3 &dir, + const ssef &isect_near, + const ssef &isect_far, + const ssef &tsplat, const ssef Psplat[3], const ssef idirsplat[3], const shuffle_swap_t shufflexyz[3], @@ -625,31 +559,24 @@ ccl_device_forceinline int bvh_node_intersect_robust(KernelGlobals *kg, const uint visibility, float dist[2]) { - float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return bvh_unaligned_node_intersect_robust(kg, - P, - dir, - isect_near, - isect_far, - difl, - node_addr, - visibility, - dist); - } - else { - return bvh_aligned_node_intersect_robust(kg, - P, - dir, - tsplat, - Psplat, - idirsplat, - shufflexyz, - difl, - extmax, - node_addr, - visibility, - dist); - } + float4 node = kernel_tex_fetch(__bvh_nodes, node_addr); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return bvh_unaligned_node_intersect_robust( + kg, P, dir, isect_near, isect_far, difl, node_addr, visibility, dist); + } + else { + return bvh_aligned_node_intersect_robust(kg, + P, + dir, + tsplat, + Psplat, + idirsplat, + shufflexyz, + difl, + extmax, + node_addr, + visibility, + dist); + } } -#endif /* !defined(__KERNEL_SSE2__) */ +#endif /* !defined(__KERNEL_SSE2__) */ diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h index d8e089711ee..b362779549c 100644 --- a/intern/cycles/kernel/bvh/bvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h @@ -19,9 +19,9 @@ #ifdef __QBVH__ # include "kernel/bvh/qbvh_shadow_all.h" -#ifdef __KERNEL_AVX2__ -# include "kernel/bvh/obvh_shadow_all.h" -#endif +# ifdef __KERNEL_AVX2__ +# include "kernel/bvh/obvh_shadow_all.h" +# endif #endif #if BVH_FEATURE(BVH_HAIR) @@ -44,350 +44,340 @@ ccl_device #else ccl_device_inline #endif -bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint visibility, - const uint max_hits, - uint *num_hits) + bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint visibility, + const uint max_hits, + uint *num_hits) { - /* todo: - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversal_stack[BVH_STACK_SIZE]; - traversal_stack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* ray parameters in registers */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; + /* todo: + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversal_stack[BVH_STACK_SIZE]; + traversal_stack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif #if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; + int num_hits_in_instance = 0; #endif - *num_hits = 0; - isect_array->t = tmax; + *num_hits = 0; + isect_array->t = tmax; #if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; # if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect_t); + ssef tnear(0.0f), tfar(isect_t); # endif - shuffle_swap_t shufflexyz[3]; + shuffle_swap_t shufflexyz[3]; - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif /* __KERNEL_SSE2__ */ + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif /* __KERNEL_SSE2__ */ - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - int node_addr_child1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + int node_addr_child1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, + traverse_mask = NODE_INTERSECT(kg, + P, # if BVH_FEATURE(BVH_HAIR) - dir, + dir, # endif - idir, - isect_t, - node_addr, - visibility, - dist); + idir, + isect_t, + node_addr, + visibility, + dist); #else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, + traverse_mask = NODE_INTERSECT(kg, + P, + dir, # if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, + tnear, + tfar, # endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - node_addr, - visibility, - dist); + tsplat, + Psplat, + idirsplat, + shufflexyz, + node_addr, + visibility, + dist); #endif // __KERNEL_SSE2__ - node_addr = __float_as_int(cnodes.z); - node_addr_child1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool is_closest_child1 = (dist[1] < dist[0]); - if(is_closest_child1) { - int tmp = node_addr; - node_addr = node_addr_child1; - node_addr_child1 = tmp; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = node_addr_child1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - node_addr = node_addr_child1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - int prim_addr = __float_as_int(leaf.x); + node_addr = __float_as_int(cnodes.z); + node_addr_child1 = __float_as_int(cnodes.w); + + if (traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool is_closest_child1 = (dist[1] < dist[0]); + if (is_closest_child1) { + int tmp = node_addr; + node_addr = node_addr_child1; + node_addr_child1 = tmp; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = node_addr_child1; + } + else { + /* One child was intersected. */ + if (traverse_mask == 2) { + node_addr = node_addr_child1; + } + else if (traverse_mask == 0) { + /* Neither child was intersected. */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - const int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* pop */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - - /* primitive intersection */ - while(prim_addr < prim_addr2) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); - bool hit; - - /* todo: specialized intersect functions which don't fill in - * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? - * might give a few % performance improvement */ - - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, - isect_array, - P, - dir, - visibility, - object, - prim_addr); - break; - } + const int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* pop */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + + /* primitive intersection */ + while (prim_addr < prim_addr2) { + kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); + bool hit; + + /* todo: specialized intersect functions which don't fill in + * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? + * might give a few % performance improvement */ + + switch (p_type) { + case PRIMITIVE_TRIANGLE: { + hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr); + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, - isect_array, - P, - dir, - ray->time, - visibility, - object, - prim_addr); - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect( + kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); + break; + } #endif #if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect_array, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - NULL, - 0, 0); - } - else { - hit = curve_intersect(kg, - isect_array, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - NULL, - 0, 0); - } - break; - } + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); + if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { + hit = cardinal_curve_intersect(kg, + isect_array, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + NULL, + 0, + 0); + } + else { + hit = curve_intersect(kg, + isect_array, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + NULL, + 0, + 0); + } + break; + } #endif - default: { - hit = false; - break; - } - } + default: { + hit = false; + break; + } + } - /* shadow ray early termination */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ + /* shadow ray early termination */ + if (hit) { + /* detect if this surface has a shader with transparent shadows */ - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; #ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) + if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) #endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } + { + shader = kernel_tex_fetch(__tri_shader, prim); + } #ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } #endif - int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + + /* if no transparent shadows, all light is blocked */ + if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if (*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; #if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; #endif - isect_array->t = isect_t; - } + isect_array->t = isect_t; + } - prim_addr++; - } - } + prim_addr++; + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); # if BVH_FEATURE(BVH_MOTION) - isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif - num_hits_in_instance = 0; - isect_array->t = isect_t; + num_hits_in_instance = 0; + isect_array->t = isect_t; # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); + tfar = ssef(isect_t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + node_addr = kernel_tex_fetch(__object_node, object); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; + /* Instance pop. */ + if (num_hits_in_instance) { + float t_fac; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); # else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); # endif - /* scale isect->t to adjust for instancing */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { + /* scale isect->t to adjust for instancing */ + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } + else { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif - } + } - isect_t = tmax; - isect_array->t = isect_t; + isect_t = tmax; + isect_array->t = isect_t; # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); + tfar = ssef(isect_t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return false; + return false; } ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, @@ -397,35 +387,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, const uint max_hits, uint *num_hits) { - switch(kernel_data.bvh.bvh_layout) { + switch (kernel_data.bvh.bvh_layout) { #ifdef __KERNEL_AVX2__ - case BVH_LAYOUT_BVH8: - return BVH_FUNCTION_FULL_NAME(OBVH)(kg, - ray, - isect_array, - visibility, - max_hits, - num_hits); + case BVH_LAYOUT_BVH8: + return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, visibility, max_hits, num_hits); #endif #ifdef __QBVH__ - case BVH_LAYOUT_BVH4: - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect_array, - visibility, - max_hits, - num_hits); + case BVH_LAYOUT_BVH4: + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, visibility, max_hits, num_hits); #endif - case BVH_LAYOUT_BVH2: - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect_array, - visibility, - max_hits, - num_hits); - } - kernel_assert(!"Should not happen"); - return false; + case BVH_LAYOUT_BVH2: + return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, visibility, max_hits, num_hits); + } + kernel_assert(!"Should not happen"); + return false; } #undef BVH_FUNCTION_NAME diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h index 76d4cab663d..34a06d003bb 100644 --- a/intern/cycles/kernel/bvh/bvh_traversal.h +++ b/intern/cycles/kernel/bvh/bvh_traversal.h @@ -47,374 +47,362 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, Intersection *isect, const uint visibility #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, + , + uint *lcg_state, float difl, float extmax #endif - ) +) { - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversal_stack[BVH_STACK_SIZE]; - traversal_stack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversal_stack[BVH_STACK_SIZE]; + traversal_stack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* ray parameters in registers */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; - BVH_DEBUG_INIT(); + BVH_DEBUG_INIT(); #if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; # if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect->t); + ssef tnear(0.0f), tfar(isect->t); # endif - shuffle_swap_t shufflexyz[3]; + shuffle_swap_t shufflexyz[3]; - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); + ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - int node_addr_child1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + int node_addr_child1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #if !defined(__KERNEL_SSE2__) # if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - traverse_mask = NODE_INTERSECT_ROBUST(kg, - P, + if (difl != 0.0f) { + traverse_mask = NODE_INTERSECT_ROBUST(kg, + P, # if BVH_FEATURE(BVH_HAIR) - dir, + dir, # endif - idir, - isect->t, - difl, - extmax, - node_addr, - visibility, - dist); - } - else + idir, + isect->t, + difl, + extmax, + node_addr, + visibility, + dist); + } + else # endif - { - traverse_mask = NODE_INTERSECT(kg, - P, -# if BVH_FEATURE(BVH_HAIR) - dir, -# endif - idir, - isect->t, - node_addr, - visibility, - dist); - } + { + traverse_mask = NODE_INTERSECT(kg, + P, +# if BVH_FEATURE(BVH_HAIR) + dir, +# endif + idir, + isect->t, + node_addr, + visibility, + dist); + } #else // __KERNEL_SSE2__ # if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - traverse_mask = NODE_INTERSECT_ROBUST(kg, - P, - dir, + if (difl != 0.0f) { + traverse_mask = NODE_INTERSECT_ROBUST(kg, + P, + dir, # if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, + tnear, + tfar, # endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - difl, - extmax, - node_addr, - visibility, - dist); - } - else + tsplat, + Psplat, + idirsplat, + shufflexyz, + difl, + extmax, + node_addr, + visibility, + dist); + } + else # endif - { - traverse_mask = NODE_INTERSECT(kg, - P, - dir, -# if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, -# endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - node_addr, - visibility, - dist); - } + { + traverse_mask = NODE_INTERSECT(kg, + P, + dir, +# if BVH_FEATURE(BVH_HAIR) + tnear, + tfar, +# endif + tsplat, + Psplat, + idirsplat, + shufflexyz, + node_addr, + visibility, + dist); + } #endif // __KERNEL_SSE2__ - node_addr = __float_as_int(cnodes.z); - node_addr_child1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool is_closest_child1 = (dist[1] < dist[0]); - if(is_closest_child1) { - int tmp = node_addr; - node_addr = node_addr_child1; - node_addr_child1 = tmp; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = node_addr_child1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - node_addr = node_addr_child1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } - } - BVH_DEBUG_NEXT_NODE(); - } - - /* if node is leaf, fetch triangle list */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - int prim_addr = __float_as_int(leaf.x); + node_addr = __float_as_int(cnodes.z); + node_addr_child1 = __float_as_int(cnodes.w); + + if (traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool is_closest_child1 = (dist[1] < dist[0]); + if (is_closest_child1) { + int tmp = node_addr; + node_addr = node_addr_child1; + node_addr_child1 = tmp; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = node_addr_child1; + } + else { + /* One child was intersected. */ + if (traverse_mask == 2) { + node_addr = node_addr_child1; + } + else if (traverse_mask == 0) { + /* Neither child was intersected. */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } + } + BVH_DEBUG_NEXT_NODE(); + } + + /* if node is leaf, fetch triangle list */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - const int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(triangle_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr)) - { - /* shadow ray early termination */ + const int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* pop */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + + /* primitive intersection */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) { + /* shadow ray early termination */ #if defined(__KERNEL_SSE2__) - if(visibility & PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + if (visibility & PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); + tfar = ssef(isect->t); # endif #else - if(visibility & PATH_RAY_SHADOW_OPAQUE) - return true; + if (visibility & PATH_RAY_SHADOW_OPAQUE) + return true; #endif - } - } - break; - } + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(motion_triangle_intersect(kg, - isect, - P, - dir, - ray->time, - visibility, - object, - prim_addr)) - { - /* shadow ray early termination */ + case PRIMITIVE_MOTION_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (motion_triangle_intersect( + kg, isect, P, dir, ray->time, visibility, object, prim_addr)) { + /* shadow ray early termination */ # if defined(__KERNEL_SSE2__) - if(visibility & PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + if (visibility & PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); + tfar = ssef(isect->t); # endif # else - if(visibility & PATH_RAY_SHADOW_OPAQUE) - return true; + if (visibility & PATH_RAY_SHADOW_OPAQUE) + return true; # endif - } - } - break; - } -#endif /* BVH_FEATURE(BVH_MOTION) */ + } + } + break; + } +#endif /* BVH_FEATURE(BVH_MOTION) */ #if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); - bool hit; - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); - } - else { - hit = curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); - } - if(hit) { - /* shadow ray early termination */ + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); + kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); + bool hit; + if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { + hit = cardinal_curve_intersect(kg, + isect, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + lcg_state, + difl, + extmax); + } + else { + hit = curve_intersect(kg, + isect, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + lcg_state, + difl, + extmax); + } + if (hit) { + /* shadow ray early termination */ # if defined(__KERNEL_SSE2__) - if(visibility & PATH_RAY_SHADOW_OPAQUE) - return true; - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + if (visibility & PATH_RAY_SHADOW_OPAQUE) + return true; + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); + tfar = ssef(isect->t); # endif # else - if(visibility & PATH_RAY_SHADOW_OPAQUE) - return true; + if (visibility & PATH_RAY_SHADOW_OPAQUE) + return true; # endif - } - } - break; - } -#endif /* BVH_FEATURE(BVH_HAIR) */ - } - } + } + } + break; + } +#endif /* BVH_FEATURE(BVH_HAIR) */ + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); # endif # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); + tfar = ssef(isect->t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_tex_fetch(__object_node, object); - BVH_DEBUG_NEXT_INSTANCE(); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + BVH_DEBUG_NEXT_INSTANCE(); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* instance pop */ + /* instance pop */ # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); + tfar = ssef(isect->t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return (isect->prim != PRIM_NONE); + return (isect->prim != PRIM_NONE); } ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, @@ -422,53 +410,57 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, Intersection *isect, const uint visibility #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , uint *lcg_state, + , + uint *lcg_state, float difl, float extmax #endif - ) +) { - switch(kernel_data.bvh.bvh_layout) { + switch (kernel_data.bvh.bvh_layout) { #ifdef __KERNEL_AVX2__ - case BVH_LAYOUT_BVH8: - return BVH_FUNCTION_FULL_NAME(OBVH)(kg, - ray, - isect, - visibility + case BVH_LAYOUT_BVH8: + return BVH_FUNCTION_FULL_NAME(OBVH)(kg, + ray, + isect, + visibility # if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , lcg_state, - difl, - extmax + , + lcg_state, + difl, + extmax # endif - ); + ); #endif #ifdef __QBVH__ - case BVH_LAYOUT_BVH4: - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect, - visibility + case BVH_LAYOUT_BVH4: + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, + ray, + isect, + visibility # if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , lcg_state, - difl, - extmax + , + lcg_state, + difl, + extmax # endif - ); -#endif /* __QBVH__ */ - case BVH_LAYOUT_BVH2: - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect, - visibility + ); +#endif /* __QBVH__ */ + case BVH_LAYOUT_BVH2: + return BVH_FUNCTION_FULL_NAME(BVH)(kg, + ray, + isect, + visibility #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - , lcg_state, - difl, - extmax + , + lcg_state, + difl, + extmax #endif - ); - } - kernel_assert(!"Should not happen"); - return false; + ); + } + kernel_assert(!"Should not happen"); + return false; } #undef BVH_FUNCTION_NAME diff --git a/intern/cycles/kernel/bvh/bvh_types.h b/intern/cycles/kernel/bvh/bvh_types.h index 4ca0dc2225e..16f3b03f842 100644 --- a/intern/cycles/kernel/bvh/bvh_types.h +++ b/intern/cycles/kernel/bvh/bvh_types.h @@ -35,13 +35,13 @@ CCL_NAMESPACE_BEGIN #define BVH_OSTACK_SIZE 768 /* BVH intersection function variations */ -#define BVH_INSTANCING 1 -#define BVH_MOTION 2 -#define BVH_HAIR 4 -#define BVH_HAIR_MINIMUM_WIDTH 8 +#define BVH_INSTANCING 1 +#define BVH_MOTION 2 +#define BVH_HAIR 4 +#define BVH_HAIR_MINIMUM_WIDTH 8 -#define BVH_NAME_JOIN(x,y) x ## _ ## y -#define BVH_NAME_EVAL(x,y) BVH_NAME_JOIN(x,y) +#define BVH_NAME_JOIN(x, y) x##_##y +#define BVH_NAME_EVAL(x, y) BVH_NAME_JOIN(x, y) #define BVH_FUNCTION_FULL_NAME(prefix) BVH_NAME_EVAL(prefix, BVH_FUNCTION_NAME) #define BVH_FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) @@ -49,30 +49,30 @@ CCL_NAMESPACE_BEGIN /* Debugging heleprs */ #ifdef __KERNEL_DEBUG__ # define BVH_DEBUG_INIT() \ - do { \ - isect->num_traversed_nodes = 0; \ - isect->num_traversed_instances = 0; \ - isect->num_intersections = 0; \ - } while(0) + do { \ + isect->num_traversed_nodes = 0; \ + isect->num_traversed_instances = 0; \ + isect->num_intersections = 0; \ + } while (0) # define BVH_DEBUG_NEXT_NODE() \ - do { \ - ++isect->num_traversed_nodes; \ - } while(0) + do { \ + ++isect->num_traversed_nodes; \ + } while (0) # define BVH_DEBUG_NEXT_INTERSECTION() \ - do { \ - ++isect->num_intersections; \ - } while(0) + do { \ + ++isect->num_intersections; \ + } while (0) # define BVH_DEBUG_NEXT_INSTANCE() \ - do { \ - ++isect->num_traversed_instances; \ - } while(0) -#else /* __KERNEL_DEBUG__ */ + do { \ + ++isect->num_traversed_instances; \ + } while (0) +#else /* __KERNEL_DEBUG__ */ # define BVH_DEBUG_INIT() # define BVH_DEBUG_NEXT_NODE() # define BVH_DEBUG_NEXT_INTERSECTION() # define BVH_DEBUG_NEXT_INSTANCE() -#endif /* __KERNEL_DEBUG__ */ +#endif /* __KERNEL_DEBUG__ */ CCL_NAMESPACE_END -#endif /* __BVH_TYPES__ */ +#endif /* __BVH_TYPES__ */ diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h index b8257e3493e..c83b0d783f4 100644 --- a/intern/cycles/kernel/bvh/bvh_volume.h +++ b/intern/cycles/kernel/bvh/bvh_volume.h @@ -19,9 +19,9 @@ #ifdef __QBVH__ # include "kernel/bvh/qbvh_volume.h" -#ifdef __KERNEL_AVX2__ -# include "kernel/bvh/obvh_volume.h" -#endif +# ifdef __KERNEL_AVX2__ +# include "kernel/bvh/obvh_volume.h" +# endif #endif #if BVH_FEATURE(BVH_HAIR) @@ -43,267 +43,260 @@ ccl_device #else ccl_device_inline #endif -bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect, - const uint visibility) + bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect, + const uint visibility) { - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversal_stack[BVH_STACK_SIZE]; - traversal_stack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* ray parameters in registers */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversal_stack[BVH_STACK_SIZE]; + traversal_stack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* ray parameters in registers */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; #if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; # if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect->t); + ssef tnear(0.0f), tfar(isect->t); # endif - shuffle_swap_t shufflexyz[3]; + shuffle_swap_t shufflexyz[3]; - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); + ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - int node_addr_child1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + int node_addr_child1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, + traverse_mask = NODE_INTERSECT(kg, + P, # if BVH_FEATURE(BVH_HAIR) - dir, + dir, # endif - idir, - isect->t, - node_addr, - visibility, - dist); + idir, + isect->t, + node_addr, + visibility, + dist); #else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, + traverse_mask = NODE_INTERSECT(kg, + P, + dir, # if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, + tnear, + tfar, # endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - node_addr, - visibility, - dist); + tsplat, + Psplat, + idirsplat, + shufflexyz, + node_addr, + visibility, + dist); #endif // __KERNEL_SSE2__ - node_addr = __float_as_int(cnodes.z); - node_addr_child1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool is_closest_child1 = (dist[1] < dist[0]); - if(is_closest_child1) { - int tmp = node_addr; - node_addr = node_addr_child1; - node_addr_child1 = tmp; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = node_addr_child1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - node_addr = node_addr_child1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - int prim_addr = __float_as_int(leaf.x); + node_addr = __float_as_int(cnodes.z); + node_addr_child1 = __float_as_int(cnodes.w); + + if (traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool is_closest_child1 = (dist[1] < dist[0]); + if (is_closest_child1) { + int tmp = node_addr; + node_addr = node_addr_child1; + node_addr_child1 = tmp; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = node_addr_child1; + } + else { + /* One child was intersected. */ + if (traverse_mask == 2) { + node_addr = node_addr_child1; + } + else if (traverse_mask == 0) { + /* Neither child was intersected. */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - const int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* pop */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - triangle_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr); - } - break; - } + const int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* pop */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + + /* primitive intersection */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr); + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - motion_triangle_intersect(kg, - isect, - P, - dir, - ray->time, - visibility, - object, - prim_addr); - } - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + motion_triangle_intersect( + kg, isect, P, dir, ray->time, visibility, object, prim_addr); + } + break; + } #endif - default: { - break; - } - } - } + default: { + break; + } + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VOLUME) { + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); # endif # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); + tfar = ssef(isect->t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); - } - else { - /* pop */ - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; + + node_addr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* instance pop */ + /* instance pop */ # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); + tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect->t); + tfar = ssef(isect->t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } -#endif /* FEATURE(BVH_MOTION) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } +#endif /* FEATURE(BVH_MOTION) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return (isect->prim != PRIM_NONE); + return (isect->prim != PRIM_NONE); } ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, @@ -311,29 +304,20 @@ ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg, Intersection *isect, const uint visibility) { - switch(kernel_data.bvh.bvh_layout) { + switch (kernel_data.bvh.bvh_layout) { #ifdef __KERNEL_AVX2__ - case BVH_LAYOUT_BVH8: - return BVH_FUNCTION_FULL_NAME(OBVH)(kg, - ray, - isect, - visibility); + case BVH_LAYOUT_BVH8: + return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect, visibility); #endif #ifdef __QBVH__ - case BVH_LAYOUT_BVH4: - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect, - visibility); + case BVH_LAYOUT_BVH4: + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect, visibility); #endif - case BVH_LAYOUT_BVH2: - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect, - visibility); - } - kernel_assert(!"Should not happen"); - return false; + case BVH_LAYOUT_BVH2: + return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect, visibility); + } + kernel_assert(!"Should not happen"); + return false; } #undef BVH_FUNCTION_NAME diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h index f3ca4058460..ae8c4d12e8a 100644 --- a/intern/cycles/kernel/bvh/bvh_volume_all.h +++ b/intern/cycles/kernel/bvh/bvh_volume_all.h @@ -19,9 +19,9 @@ #ifdef __QBVH__ # include "kernel/bvh/qbvh_volume_all.h" -#ifdef __KERNEL_AVX2__ -# include "kernel/bvh/obvh_volume_all.h" -#endif +# ifdef __KERNEL_AVX2__ +# include "kernel/bvh/obvh_volume_all.h" +# endif #endif #if BVH_FEATURE(BVH_HAIR) @@ -43,342 +43,337 @@ ccl_device #else ccl_device_inline #endif -uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, - const Ray *ray, - Intersection *isect_array, - const uint max_hits, - const uint visibility) + uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg, + const Ray *ray, + Intersection *isect_array, + const uint max_hits, + const uint visibility) { - /* todo: - * - test if pushing distance on the stack helps (for non shadow rays) - * - separate version for shadow rays - * - likely and unlikely for if() statements - * - test restrict attribute for pointers - */ - - /* traversal stack in CUDA thread-local memory */ - int traversal_stack[BVH_STACK_SIZE]; - traversal_stack[0] = ENTRYPOINT_SENTINEL; - - /* traversal variables in registers */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* ray parameters in registers */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; + /* todo: + * - test if pushing distance on the stack helps (for non shadow rays) + * - separate version for shadow rays + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversal_stack[BVH_STACK_SIZE]; + traversal_stack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif #if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; + int num_hits_in_instance = 0; #endif - uint num_hits = 0; - isect_array->t = tmax; + uint num_hits = 0; + isect_array->t = tmax; #if defined(__KERNEL_SSE2__) - const shuffle_swap_t shuf_identity = shuffle_swap_identity(); - const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); - const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); - ssef Psplat[3], idirsplat[3]; + const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000)); + ssef Psplat[3], idirsplat[3]; # if BVH_FEATURE(BVH_HAIR) - ssef tnear(0.0f), tfar(isect_t); + ssef tnear(0.0f), tfar(isect_t); # endif - shuffle_swap_t shufflexyz[3]; + shuffle_swap_t shufflexyz[3]; - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); + ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t); - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); -#endif /* __KERNEL_SSE2__ */ + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif /* __KERNEL_SSE2__ */ - /* traversal loop */ - do { - do { - /* traverse internal nodes */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - int node_addr_child1, traverse_mask; - float dist[2]; - float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + int node_addr_child1, traverse_mask; + float dist[2]; + float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #if !defined(__KERNEL_SSE2__) - traverse_mask = NODE_INTERSECT(kg, - P, + traverse_mask = NODE_INTERSECT(kg, + P, # if BVH_FEATURE(BVH_HAIR) - dir, + dir, # endif - idir, - isect_t, - node_addr, - visibility, - dist); + idir, + isect_t, + node_addr, + visibility, + dist); #else // __KERNEL_SSE2__ - traverse_mask = NODE_INTERSECT(kg, - P, - dir, + traverse_mask = NODE_INTERSECT(kg, + P, + dir, # if BVH_FEATURE(BVH_HAIR) - tnear, - tfar, + tnear, + tfar, # endif - tsplat, - Psplat, - idirsplat, - shufflexyz, - node_addr, - visibility, - dist); + tsplat, + Psplat, + idirsplat, + shufflexyz, + node_addr, + visibility, + dist); #endif // __KERNEL_SSE2__ - node_addr = __float_as_int(cnodes.z); - node_addr_child1 = __float_as_int(cnodes.w); - - if(traverse_mask == 3) { - /* Both children were intersected, push the farther one. */ - bool is_closest_child1 = (dist[1] < dist[0]); - if(is_closest_child1) { - int tmp = node_addr; - node_addr = node_addr_child1; - node_addr_child1 = tmp; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = node_addr_child1; - } - else { - /* One child was intersected. */ - if(traverse_mask == 2) { - node_addr = node_addr_child1; - } - else if(traverse_mask == 0) { - /* Neither child was intersected. */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } - } - } - - /* if node is leaf, fetch triangle list */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - int prim_addr = __float_as_int(leaf.x); + node_addr = __float_as_int(cnodes.z); + node_addr_child1 = __float_as_int(cnodes.w); + + if (traverse_mask == 3) { + /* Both children were intersected, push the farther one. */ + bool is_closest_child1 = (dist[1] < dist[0]); + if (is_closest_child1) { + int tmp = node_addr; + node_addr = node_addr_child1; + node_addr_child1 = tmp; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = node_addr_child1; + } + else { + /* One child was intersected. */ + if (traverse_mask == 2) { + node_addr = node_addr_child1; + } + else if (traverse_mask == 0) { + /* Neither child was intersected. */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - const int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - bool hit; - - /* pop */ - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - - /* primitive intersection */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* intersect ray against primitive */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - hit = triangle_intersect(kg, - isect_array, - P, - dir, - visibility, - object, - prim_addr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; + const int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + bool hit; + + /* pop */ + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + + /* primitive intersection */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* intersect ray against primitive */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr); + if (hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; #if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; #endif - isect_array->t = isect_t; - if(num_hits == max_hits) { + isect_array->t = isect_t; + if (num_hits == max_hits) { #if BVH_FEATURE(BVH_INSTANCING) - if(object != OBJECT_NONE) { + if (object != OBJECT_NONE) { # if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); # else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); + Transform itfm = object_fetch_transform( + kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); # endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } -#endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* intersect ray against primitive */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* only primitives from volume object */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - hit = motion_triangle_intersect(kg, - isect_array, - P, - dir, - ray->time, - visibility, - object, - prim_addr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; + case PRIMITIVE_MOTION_TRIANGLE: { + /* intersect ray against primitive */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* only primitives from volume object */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + hit = motion_triangle_intersect( + kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); + if (hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; # if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; # endif - isect_array->t = isect_t; - if(num_hits == max_hits) { + isect_array->t = isect_t; + if (num_hits == max_hits) { # if BVH_FEATURE(BVH_INSTANCING) - if(object != OBJECT_NONE) { + if (object != OBJECT_NONE) { # if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); # else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); + Transform itfm = object_fetch_transform( + kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); # endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } -# endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } -#endif /* BVH_MOTION */ - default: { - break; - } - } - } + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } +# endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } +#endif /* BVH_MOTION */ + default: { + break; + } + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* instance push */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VOLUME) { + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif - num_hits_in_instance = 0; - isect_array->t = isect_t; + num_hits_in_instance = 0; + isect_array->t = isect_t; # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); + tfar = ssef(isect_t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_STACK_SIZE); - traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); - } - else { - /* pop */ - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + ++stack_ptr; + kernel_assert(stack_ptr < BVH_STACK_SIZE); + traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL; + + node_addr = kernel_tex_fetch(__object_node, object); + } + else { + /* pop */ + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; + /* Instance pop. */ + if (num_hits_in_instance) { + float t_fac; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); # else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); # endif - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { + /* Scale isect->t to adjust for instancing. */ + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } + else { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif - } + } - isect_t = tmax; - isect_array->t = isect_t; + isect_t = tmax; + isect_array->t = isect_t; # if defined(__KERNEL_SSE2__) - Psplat[0] = ssef(P.x); - Psplat[1] = ssef(P.y); - Psplat[2] = ssef(P.z); + Psplat[0] = ssef(P.x); + Psplat[1] = ssef(P.y); + Psplat[2] = ssef(P.z); - tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); + tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t); # if BVH_FEATURE(BVH_HAIR) - tfar = ssef(isect_t); + tfar = ssef(isect_t); # endif - gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr]; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr]; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return num_hits; + return num_hits; } ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, @@ -387,32 +382,20 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg, const uint max_hits, const uint visibility) { - switch(kernel_data.bvh.bvh_layout) { + switch (kernel_data.bvh.bvh_layout) { #ifdef __KERNEL_AVX2__ - case BVH_LAYOUT_BVH8: - return BVH_FUNCTION_FULL_NAME(OBVH)(kg, - ray, - isect_array, - max_hits, - visibility); + case BVH_LAYOUT_BVH8: + return BVH_FUNCTION_FULL_NAME(OBVH)(kg, ray, isect_array, max_hits, visibility); #endif #ifdef __QBVH__ - case BVH_LAYOUT_BVH4: - return BVH_FUNCTION_FULL_NAME(QBVH)(kg, - ray, - isect_array, - max_hits, - visibility); + case BVH_LAYOUT_BVH4: + return BVH_FUNCTION_FULL_NAME(QBVH)(kg, ray, isect_array, max_hits, visibility); #endif - case BVH_LAYOUT_BVH2: - return BVH_FUNCTION_FULL_NAME(BVH)(kg, - ray, - isect_array, - max_hits, - visibility); - } - kernel_assert(!"Should not happen"); - return 0; + case BVH_LAYOUT_BVH2: + return BVH_FUNCTION_FULL_NAME(BVH)(kg, ray, isect_array, max_hits, visibility); + } + kernel_assert(!"Should not happen"); + return 0; } #undef BVH_FUNCTION_NAME diff --git a/intern/cycles/kernel/bvh/obvh_local.h b/intern/cycles/kernel/bvh/obvh_local.h index f449cefb335..e6bb548bc5b 100644 --- a/intern/cycles/kernel/bvh/obvh_local.h +++ b/intern/cycles/kernel/bvh/obvh_local.h @@ -34,372 +34,365 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, uint *lcg_state, int max_hits) { - /* Traversal stack in CUDA thread-local memory. */ - OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + /* Traversal stack in CUDA thread-local memory. */ + OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_tex_fetch(__object_node, local_object); + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_tex_fetch(__object_node, local_object); - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = ray->t; + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = ray->t; - if(local_isect != NULL) { - local_isect->num_hits = 0; - } - kernel_assert((local_isect == NULL) == (max_hits == 0)); + if (local_isect != NULL) { + local_isect->num_hits = 0; + } + kernel_assert((local_isect == NULL) == (max_hits == 0)); - const int object_flag = kernel_tex_fetch(__object_flag, local_object); - if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + const int object_flag = kernel_tex_fetch(__object_flag, local_object); + if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - isect_t = bvh_instance_motion_push(kg, - local_object, - ray, - &P, - &dir, - &idir, - isect_t, - &ob_itfm); + Transform ob_itfm; + isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm); #else - isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t); #endif - object = local_object; - } + object = local_object; + } - avxf tnear(0.0f), tfar(isect_t); + avxf tnear(0.0f), tfar(isect_t); #if BVH_FEATURE(BVH_HAIR) - avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); #endif - avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); + avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - avxf dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + avxf dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); - if(child_mask != 0) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - avxf cnodes; + if (child_mask != 0) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + avxf cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26); + } + else #endif - { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14); - } + { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14); + } - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; - /* Five children are hit, push all onto stack and sort 5 - * stack items, continue with closest child - */ - r = __bscf(child_mask); - int c4 = __float_as_int(cnodes[r]); - float d4 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - /* Six children are hit, push all onto stack and sort 6 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c5 = __float_as_int(cnodes[r]); - float d5 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + /* Five children are hit, push all onto stack and sort 5 + * stack items, continue with closest child + */ + r = __bscf(child_mask); + int c4 = __float_as_int(cnodes[r]); + float d4 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + /* Six children are hit, push all onto stack and sort 6 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c5 = __float_as_int(cnodes[r]); + float d5 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; - /* Seven children are hit, push all onto stack and sort 7 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c6 = __float_as_int(cnodes[r]); - float d6 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - /* Eight children are hit, push all onto stack and sort 8 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c7 = __float_as_int(cnodes[r]); - float d7 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c7; - traversal_stack[stack_ptr].dist = d7; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6], - &traversal_stack[stack_ptr - 7]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + /* Seven children are hit, push all onto stack and sort 7 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c6 = __float_as_int(cnodes[r]); + float d6 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + /* Eight children are hit, push all onto stack and sort 8 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c7 = __float_as_int(cnodes[r]); + float d7 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c7; + traversal_stack[stack_ptr].dist = d7; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6], + &traversal_stack[stack_ptr - 7]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - int prim_addr = __float_as_int(leaf.x); + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + int prim_addr = __float_as_int(leaf.x); - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* Intersect ray against primitive, */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(triangle_intersect_local(kg, - local_isect, - P, - dir, - object, - local_object, - prim_addr, - isect_t, - lcg_state, - max_hits)) - { - return true; - } - } - break; - } + /* Primitive intersection. */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* Intersect ray against primitive, */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (triangle_intersect_local(kg, + local_isect, + P, + dir, + object, + local_object, + prim_addr, + isect_t, + lcg_state, + max_hits)) { + return true; + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* Intersect ray against primitive. */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(motion_triangle_intersect_local(kg, - local_isect, - P, - dir, - ray->time, - object, - local_object, - prim_addr, - isect_t, - lcg_state, - max_hits)) - { - return true; - } - } - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + /* Intersect ray against primitive. */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (motion_triangle_intersect_local(kg, + local_isect, + P, + dir, + ray->time, + object, + local_object, + prim_addr, + isect_t, + lcg_state, + max_hits)) { + return true; + } + } + break; + } #endif - default: - break; - } - } - } while(node_addr != ENTRYPOINT_SENTINEL); - } while(node_addr != ENTRYPOINT_SENTINEL); - return false; + default: + break; + } + } + } while (node_addr != ENTRYPOINT_SENTINEL); + } while (node_addr != ENTRYPOINT_SENTINEL); + return false; } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/obvh_nodes.h b/intern/cycles/kernel/bvh/obvh_nodes.h index 93f35f6dffb..6831562cade 100644 --- a/intern/cycles/kernel/bvh/obvh_nodes.h +++ b/intern/cycles/kernel/bvh/obvh_nodes.h @@ -17,11 +17,11 @@ */ struct OBVHStackItem { - int addr; - float dist; + int addr; + float dist; }; -ccl_device_inline void obvh_near_far_idx_calc(const float3& idir, +ccl_device_inline void obvh_near_far_idx_calc(const float3 &idir, int *ccl_restrict near_x, int *ccl_restrict near_y, int *ccl_restrict near_z, @@ -31,41 +31,73 @@ ccl_device_inline void obvh_near_far_idx_calc(const float3& idir, { #ifdef __KERNEL_SSE__ - *near_x = 0; *far_x = 1; - *near_y = 2; *far_y = 3; - *near_z = 4; *far_z = 5; - - const size_t mask = movemask(ssef(idir.m128)); - - const int mask_x = mask & 1; - const int mask_y = (mask & 2) >> 1; - const int mask_z = (mask & 4) >> 2; - - *near_x += mask_x; *far_x -= mask_x; - *near_y += mask_y; *far_y -= mask_y; - *near_z += mask_z; *far_z -= mask_z; + *near_x = 0; + *far_x = 1; + *near_y = 2; + *far_y = 3; + *near_z = 4; + *far_z = 5; + + const size_t mask = movemask(ssef(idir.m128)); + + const int mask_x = mask & 1; + const int mask_y = (mask & 2) >> 1; + const int mask_z = (mask & 4) >> 2; + + *near_x += mask_x; + *far_x -= mask_x; + *near_y += mask_y; + *far_y -= mask_y; + *near_z += mask_z; + *far_z -= mask_z; #else - if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; } - if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; } - if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; } + if (idir.x >= 0.0f) { + *near_x = 0; + *far_x = 1; + } + else { + *near_x = 1; + *far_x = 0; + } + if (idir.y >= 0.0f) { + *near_y = 2; + *far_y = 3; + } + else { + *near_y = 3; + *far_y = 2; + } + if (idir.z >= 0.0f) { + *near_z = 4; + *far_z = 5; + } + else { + *near_z = 5; + *far_z = 4; + } #endif } -ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, - OBVHStackItem *ccl_restrict b) +ccl_device_inline void obvh_item_swap(OBVHStackItem *ccl_restrict a, OBVHStackItem *ccl_restrict b) { - OBVHStackItem tmp = *a; - *a = *b; - *b = tmp; + OBVHStackItem tmp = *a; + *a = *b; + *b = tmp; } ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, OBVHStackItem *ccl_restrict s2, OBVHStackItem *ccl_restrict s3) { - if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); } - if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); } - if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); } + if (s2->dist < s1->dist) { + obvh_item_swap(s2, s1); + } + if (s3->dist < s2->dist) { + obvh_item_swap(s3, s2); + } + if (s2->dist < s1->dist) { + obvh_item_swap(s2, s1); + } } ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, @@ -73,11 +105,21 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, OBVHStackItem *ccl_restrict s3, OBVHStackItem *ccl_restrict s4) { - if(s2->dist < s1->dist) { obvh_item_swap(s2, s1); } - if(s4->dist < s3->dist) { obvh_item_swap(s4, s3); } - if(s3->dist < s1->dist) { obvh_item_swap(s3, s1); } - if(s4->dist < s2->dist) { obvh_item_swap(s4, s2); } - if(s3->dist < s2->dist) { obvh_item_swap(s3, s2); } + if (s2->dist < s1->dist) { + obvh_item_swap(s2, s1); + } + if (s4->dist < s3->dist) { + obvh_item_swap(s4, s3); + } + if (s3->dist < s1->dist) { + obvh_item_swap(s3, s1); + } + if (s4->dist < s2->dist) { + obvh_item_swap(s4, s2); + } + if (s3->dist < s2->dist) { + obvh_item_swap(s3, s2); + } } ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, @@ -86,19 +128,19 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, OBVHStackItem *ccl_restrict s4, OBVHStackItem *ccl_restrict s5) { - obvh_stack_sort(s1, s2, s3, s4); - if(s5->dist < s4->dist) { - obvh_item_swap(s4, s5); - if(s4->dist < s3->dist) { - obvh_item_swap(s3, s4); - if(s3->dist < s2->dist) { - obvh_item_swap(s2, s3); - if(s2->dist < s1->dist) { - obvh_item_swap(s1, s2); - } - } - } - } + obvh_stack_sort(s1, s2, s3, s4); + if (s5->dist < s4->dist) { + obvh_item_swap(s4, s5); + if (s4->dist < s3->dist) { + obvh_item_swap(s3, s4); + if (s3->dist < s2->dist) { + obvh_item_swap(s2, s3); + if (s2->dist < s1->dist) { + obvh_item_swap(s1, s2); + } + } + } + } } ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, @@ -108,22 +150,22 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, OBVHStackItem *ccl_restrict s5, OBVHStackItem *ccl_restrict s6) { - obvh_stack_sort(s1, s2, s3, s4, s5); - if(s6->dist < s5->dist) { - obvh_item_swap(s5, s6); - if(s5->dist < s4->dist) { - obvh_item_swap(s4, s5); - if(s4->dist < s3->dist) { - obvh_item_swap(s3, s4); - if(s3->dist < s2->dist) { - obvh_item_swap(s2, s3); - if(s2->dist < s1->dist) { - obvh_item_swap(s1, s2); - } - } - } - } - } + obvh_stack_sort(s1, s2, s3, s4, s5); + if (s6->dist < s5->dist) { + obvh_item_swap(s5, s6); + if (s5->dist < s4->dist) { + obvh_item_swap(s4, s5); + if (s4->dist < s3->dist) { + obvh_item_swap(s3, s4); + if (s3->dist < s2->dist) { + obvh_item_swap(s2, s3); + if (s2->dist < s1->dist) { + obvh_item_swap(s1, s2); + } + } + } + } + } } ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, @@ -134,25 +176,25 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, OBVHStackItem *ccl_restrict s6, OBVHStackItem *ccl_restrict s7) { - obvh_stack_sort(s1, s2, s3, s4, s5, s6); - if(s7->dist < s6->dist) { - obvh_item_swap(s6, s7); - if(s6->dist < s5->dist) { - obvh_item_swap(s5, s6); - if(s5->dist < s4->dist) { - obvh_item_swap(s4, s5); - if(s4->dist < s3->dist) { - obvh_item_swap(s3, s4); - if(s3->dist < s2->dist) { - obvh_item_swap(s2, s3); - if(s2->dist < s1->dist) { - obvh_item_swap(s1, s2); - } - } - } - } - } - } + obvh_stack_sort(s1, s2, s3, s4, s5, s6); + if (s7->dist < s6->dist) { + obvh_item_swap(s6, s7); + if (s6->dist < s5->dist) { + obvh_item_swap(s5, s6); + if (s5->dist < s4->dist) { + obvh_item_swap(s4, s5); + if (s4->dist < s3->dist) { + obvh_item_swap(s3, s4); + if (s3->dist < s2->dist) { + obvh_item_swap(s2, s3); + if (s2->dist < s1->dist) { + obvh_item_swap(s1, s2); + } + } + } + } + } + } } ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, @@ -164,41 +206,41 @@ ccl_device_inline void obvh_stack_sort(OBVHStackItem *ccl_restrict s1, OBVHStackItem *ccl_restrict s7, OBVHStackItem *ccl_restrict s8) { - obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7); - if(s8->dist < s7->dist) { - obvh_item_swap(s7, s8); - if(s7->dist < s6->dist) { - obvh_item_swap(s6, s7); - if(s6->dist < s5->dist) { - obvh_item_swap(s5, s6); - if(s5->dist < s4->dist) { - obvh_item_swap(s4, s5); - if(s4->dist < s3->dist) { - obvh_item_swap(s3, s4); - if(s3->dist < s2->dist) { - obvh_item_swap(s2, s3); - if(s2->dist < s1->dist) { - obvh_item_swap(s1, s2); - } - } - } - } - } - } - } + obvh_stack_sort(s1, s2, s3, s4, s5, s6, s7); + if (s8->dist < s7->dist) { + obvh_item_swap(s7, s8); + if (s7->dist < s6->dist) { + obvh_item_swap(s6, s7); + if (s6->dist < s5->dist) { + obvh_item_swap(s5, s6); + if (s5->dist < s4->dist) { + obvh_item_swap(s4, s5); + if (s4->dist < s3->dist) { + obvh_item_swap(s3, s4); + if (s3->dist < s2->dist) { + obvh_item_swap(s2, s3); + if (s2->dist < s1->dist) { + obvh_item_swap(s1, s2); + } + } + } + } + } + } + } } /* Axis-aligned nodes intersection */ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, - const avxf& isect_near, - const avxf& isect_far, + const avxf &isect_near, + const avxf &isect_far, #ifdef __KERNEL_AVX2__ - const avx3f& org_idir, + const avx3f &org_idir, #else - const avx3f& org, + const avx3f &org, #endif - const avx3f& idir, + const avx3f &idir, const int near_x, const int near_y, const int near_z, @@ -208,213 +250,216 @@ ccl_device_inline int obvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg const int node_addr, avxf *ccl_restrict dist) { - const int offset = node_addr + 2; + const int offset = node_addr + 2; #ifdef __KERNEL_AVX2__ - const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_x*2), idir.x, org_idir.x); - const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_y*2), idir.y, org_idir.y); - const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+near_z*2), idir.z, org_idir.z); - const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_x*2), idir.x, org_idir.x); - const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_y*2), idir.y, org_idir.y); - const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset+far_z*2), idir.z, org_idir.z); - - const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near); - const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far); - const avxb vmask = tnear <= tfar; - int mask = (int)movemask(vmask); - *dist = tnear; - return mask; + const avxf tnear_x = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, org_idir.x); + const avxf tnear_y = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, org_idir.y); + const avxf tnear_z = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, org_idir.z); + const avxf tfar_x = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, org_idir.x); + const avxf tfar_y = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, org_idir.y); + const avxf tfar_z = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, org_idir.z); + + const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near); + const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far); + const avxb vmask = tnear <= tfar; + int mask = (int)movemask(vmask); + *dist = tnear; + return mask; #else - return 0; + return 0; #endif } -ccl_device_inline int obvh_aligned_node_intersect_robust( - KernelGlobals *ccl_restrict kg, - const avxf& isect_near, - const avxf& isect_far, +ccl_device_inline int obvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, + const avxf &isect_near, + const avxf &isect_far, #ifdef __KERNEL_AVX2__ - const avx3f& P_idir, + const avx3f &P_idir, #else - const avx3f& P, + const avx3f &P, #endif - const avx3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - avxf *ccl_restrict dist) + const avx3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + const float difl, + avxf *ccl_restrict dist) { - const int offset = node_addr + 2; + const int offset = node_addr + 2; #ifdef __KERNEL_AVX2__ - const avxf tnear_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x); - const avxf tfar_x = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x); - const avxf tnear_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y); - const avxf tfar_y = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y); - const avxf tnear_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z); - const avxf tfar_z = msub(kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z); - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near); - const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far); - const avxb vmask = round_down*tnear <= round_up*tfar; - int mask = (int)movemask(vmask); - *dist = tnear; - return mask; + const avxf tnear_x = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + near_x * 2), idir.x, P_idir.x); + const avxf tfar_x = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + far_x * 2), idir.x, P_idir.x); + const avxf tnear_y = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + near_y * 2), idir.y, P_idir.y); + const avxf tfar_y = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + far_y * 2), idir.y, P_idir.y); + const avxf tnear_z = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + near_z * 2), idir.z, P_idir.z); + const avxf tfar_z = msub( + kernel_tex_fetch_avxf(__bvh_nodes, offset + far_z * 2), idir.z, P_idir.z); + + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + const avxf tnear = max4(tnear_x, tnear_y, tnear_z, isect_near); + const avxf tfar = min4(tfar_x, tfar_y, tfar_z, isect_far); + const avxb vmask = round_down * tnear <= round_up * tfar; + int mask = (int)movemask(vmask); + *dist = tnear; + return mask; #else - return 0; + return 0; #endif } /* Unaligned nodes intersection */ -ccl_device_inline int obvh_unaligned_node_intersect( - KernelGlobals *ccl_restrict kg, - const avxf& isect_near, - const avxf& isect_far, +ccl_device_inline int obvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg, + const avxf &isect_near, + const avxf &isect_far, #ifdef __KERNEL_AVX2__ - const avx3f& org_idir, + const avx3f &org_idir, #endif - const avx3f& org, - const avx3f& dir, - const avx3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - avxf *ccl_restrict dist) + const avx3f &org, + const avx3f &dir, + const avx3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + avxf *ccl_restrict dist) { - const int offset = node_addr; - const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2); - const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4); - const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6); - - const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8); - const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10); - const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12); - - const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14); - const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16); - const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18); - - const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20); - const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22); - const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24); - - const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, - aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, - aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; - - const avxf aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x, - aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y, - aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z; - - const avxf neg_one(-1.0f); - const avxf nrdir_x = neg_one / aligned_dir_x, - nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; - - const avxf tlower_x = aligned_P_x * nrdir_x, - tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; - - const avxf tupper_x = tlower_x - nrdir_x, - tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; - - const avxf tnear_x = min(tlower_x, tupper_x); - const avxf tnear_y = min(tlower_y, tupper_y); - const avxf tnear_z = min(tlower_z, tupper_z); - const avxf tfar_x = max(tlower_x, tupper_x); - const avxf tfar_y = max(tlower_y, tupper_y); - const avxf tfar_z = max(tlower_z, tupper_z); - const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const avxb vmask = tnear <= tfar; - *dist = tnear; - return movemask(vmask); + const int offset = node_addr; + const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2); + const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4); + const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6); + + const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8); + const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10); + const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12); + + const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14); + const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16); + const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18); + + const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20); + const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22); + const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24); + + const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z, + aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z, + aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z; + + const avxf aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x, + aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y, + aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z; + + const avxf neg_one(-1.0f); + const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y, + nrdir_z = neg_one / aligned_dir_z; + + const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y, + tlower_z = aligned_P_z * nrdir_z; + + const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y, + tupper_z = tlower_z - nrdir_z; + + const avxf tnear_x = min(tlower_x, tupper_x); + const avxf tnear_y = min(tlower_y, tupper_y); + const avxf tnear_z = min(tlower_z, tupper_z); + const avxf tfar_x = max(tlower_x, tupper_x); + const avxf tfar_y = max(tlower_y, tupper_y); + const avxf tfar_z = max(tlower_z, tupper_z); + const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + const avxb vmask = tnear <= tfar; + *dist = tnear; + return movemask(vmask); } -ccl_device_inline int obvh_unaligned_node_intersect_robust( - KernelGlobals *ccl_restrict kg, - const avxf& isect_near, - const avxf& isect_far, +ccl_device_inline int obvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, + const avxf &isect_near, + const avxf &isect_far, #ifdef __KERNEL_AVX2__ - const avx3f& P_idir, + const avx3f &P_idir, #endif - const avx3f& P, - const avx3f& dir, - const avx3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - avxf *ccl_restrict dist) + const avx3f &P, + const avx3f &dir, + const avx3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + const float difl, + avxf *ccl_restrict dist) { - const int offset = node_addr; - const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+2); - const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+4); - const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+6); - - const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+8); - const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+10); - const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+12); - - const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+14); - const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+16); - const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+18); - - const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset+20); - const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset+22); - const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset+24); - - const avxf aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, - aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, - aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; - - const avxf aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x, - aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y, - aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z; - - const avxf neg_one(-1.0f); - const avxf nrdir_x = neg_one / aligned_dir_x, - nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; - - const avxf tlower_x = aligned_P_x * nrdir_x, - tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; - - const avxf tupper_x = tlower_x - nrdir_x, - tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; - - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - - const avxf tnear_x = min(tlower_x, tupper_x); - const avxf tnear_y = min(tlower_y, tupper_y); - const avxf tnear_z = min(tlower_z, tupper_z); - const avxf tfar_x = max(tlower_x, tupper_x); - const avxf tfar_y = max(tlower_y, tupper_y); - const avxf tfar_z = max(tlower_z, tupper_z); - - const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const avxb vmask = round_down*tnear <= round_up*tfar; - *dist = tnear; - return movemask(vmask); + const int offset = node_addr; + const avxf tfm_x_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 2); + const avxf tfm_x_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 4); + const avxf tfm_x_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 6); + + const avxf tfm_y_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 8); + const avxf tfm_y_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 10); + const avxf tfm_y_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 12); + + const avxf tfm_z_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 14); + const avxf tfm_z_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 16); + const avxf tfm_z_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 18); + + const avxf tfm_t_x = kernel_tex_fetch_avxf(__bvh_nodes, offset + 20); + const avxf tfm_t_y = kernel_tex_fetch_avxf(__bvh_nodes, offset + 22); + const avxf tfm_t_z = kernel_tex_fetch_avxf(__bvh_nodes, offset + 24); + + const avxf aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z, + aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z, + aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z; + + const avxf aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x, + aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y, + aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z; + + const avxf neg_one(-1.0f); + const avxf nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y, + nrdir_z = neg_one / aligned_dir_z; + + const avxf tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y, + tlower_z = aligned_P_z * nrdir_z; + + const avxf tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y, + tupper_z = tlower_z - nrdir_z; + + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + + const avxf tnear_x = min(tlower_x, tupper_x); + const avxf tnear_y = min(tlower_y, tupper_y); + const avxf tnear_z = min(tlower_z, tupper_z); + const avxf tfar_x = max(tlower_x, tupper_x); + const avxf tfar_y = max(tlower_y, tupper_y); + const avxf tfar_z = max(tlower_z, tupper_z); + + const avxf tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const avxf tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + const avxb vmask = round_down * tnear <= round_up * tfar; + *dist = tnear; + return movemask(vmask); } /* Intersectors wrappers. @@ -422,111 +467,125 @@ ccl_device_inline int obvh_unaligned_node_intersect_robust( * They'll check node type and call appropriate intersection code. */ -ccl_device_inline int obvh_node_intersect( - KernelGlobals *ccl_restrict kg, - const avxf& isect_near, - const avxf& isect_far, +ccl_device_inline int obvh_node_intersect(KernelGlobals *ccl_restrict kg, + const avxf &isect_near, + const avxf &isect_far, #ifdef __KERNEL_AVX2__ - const avx3f& org_idir, + const avx3f &org_idir, #endif - const avx3f& org, - const avx3f& dir, - const avx3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - avxf *ccl_restrict dist) + const avx3f &org, + const avx3f &dir, + const avx3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + avxf *ccl_restrict dist) { - const int offset = node_addr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return obvh_unaligned_node_intersect(kg, - isect_near, - isect_far, + const int offset = node_addr; + const float4 node = kernel_tex_fetch(__bvh_nodes, offset); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return obvh_unaligned_node_intersect(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - org_idir, + org_idir, #endif - org, - dir, - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - dist); - } - else { - return obvh_aligned_node_intersect(kg, - isect_near, - isect_far, + org, + dir, + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + dist); + } + else { + return obvh_aligned_node_intersect(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - org_idir, + org_idir, #else - org, + org, #endif - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - dist); - } + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + dist); + } } -ccl_device_inline int obvh_node_intersect_robust( - KernelGlobals *ccl_restrict kg, - const avxf& isect_near, - const avxf& isect_far, +ccl_device_inline int obvh_node_intersect_robust(KernelGlobals *ccl_restrict kg, + const avxf &isect_near, + const avxf &isect_far, #ifdef __KERNEL_AVX2__ - const avx3f& P_idir, + const avx3f &P_idir, #endif - const avx3f& P, - const avx3f& dir, - const avx3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - avxf *ccl_restrict dist) + const avx3f &P, + const avx3f &dir, + const avx3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + const float difl, + avxf *ccl_restrict dist) { - const int offset = node_addr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return obvh_unaligned_node_intersect_robust(kg, - isect_near, - isect_far, + const int offset = node_addr; + const float4 node = kernel_tex_fetch(__bvh_nodes, offset); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return obvh_unaligned_node_intersect_robust(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - P_idir, + P_idir, #endif - P, - dir, - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - difl, - dist); - } - else { - return obvh_aligned_node_intersect_robust(kg, - isect_near, - isect_far, + P, + dir, + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + difl, + dist); + } + else { + return obvh_aligned_node_intersect_robust(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - P_idir, + P_idir, #else - P, + P, #endif - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - difl, - dist); - } + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + difl, + dist); + } } diff --git a/intern/cycles/kernel/bvh/obvh_shadow_all.h b/intern/cycles/kernel/bvh/obvh_shadow_all.h index 10d5422c31c..98efb003788 100644 --- a/intern/cycles/kernel/bvh/obvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/obvh_shadow_all.h @@ -36,645 +36,635 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, const uint max_hits, uint *num_hits) { - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - *num_hits = 0; - isect_array->t = tmax; + *num_hits = 0; + isect_array->t = tmax; #if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; + int num_hits_in_instance = 0; #endif - avxf tnear(0.0f), tfar(isect_t); + avxf tnear(0.0f), tfar(isect_t); #if BVH_FEATURE(BVH_HAIR) - avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); #endif - avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); + avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - (void) inodes; - - if(false + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + (void)inodes; + + if (false #ifdef __VISIBILITY_FLAG__ - || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) + || ((__float_as_uint(inodes.x) & PATH_RAY_SHADOW) == 0) #endif #if BVH_FEATURE(BVH_MOTION) - || UNLIKELY(ray->time < inodes.y) - || UNLIKELY(ray->time > inodes.z) + || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z) #endif - ) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - avxf dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + ) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + avxf dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) -//#if !defined(__KERNEL_AVX2__) - org4, + //#if !defined(__KERNEL_AVX2__) + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - - if(child_mask != 0) { - avxf cnodes; + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + + if (child_mask != 0) { + avxf cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26); + } + else #endif - { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - - /* Five children are hit, push all onto stack and sort 5 - * stack items, continue with closest child - */ - r = __bscf(child_mask); - int c4 = __float_as_int(cnodes[r]); - float d4 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Six children are hit, push all onto stack and sort 6 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c5 = __float_as_int(cnodes[r]); - float d5 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - - /* Seven children are hit, push all onto stack and sort 7 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c6 = __float_as_int(cnodes[r]); - float d6 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Eight children are hit, push all onto stack and sort 8 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c7 = __float_as_int(cnodes[r]); - float d7 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c7; - traversal_stack[stack_ptr].dist = d7; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6], - &traversal_stack[stack_ptr - 7]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); + { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + + /* Five children are hit, push all onto stack and sort 5 + * stack items, continue with closest child + */ + r = __bscf(child_mask); + int c4 = __float_as_int(cnodes[r]); + float d4 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Six children are hit, push all onto stack and sort 6 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c5 = __float_as_int(cnodes[r]); + float d5 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + + /* Seven children are hit, push all onto stack and sort 7 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c6 = __float_as_int(cnodes[r]); + float d6 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Eight children are hit, push all onto stack and sort 8 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c7 = __float_as_int(cnodes[r]); + float d7 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c7; + traversal_stack[stack_ptr].dist = d7; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6], + &traversal_stack[stack_ptr - 7]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); #ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + if ((__float_as_uint(leaf.z) & PATH_RAY_SHADOW) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } #endif - int prim_addr = __float_as_int(leaf.x); + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - - /* Primitive intersection. */ - if(p_type == PRIMITIVE_TRIANGLE) { - int prim_count = prim_addr2 - prim_addr; - if(prim_count < 3) { - while(prim_addr < prim_addr2) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); - int hit = triangle_intersect(kg, - isect_array, - P, - dir, - PATH_RAY_SHADOW, - object, - prim_addr); - /* Shadow ray early termination. */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ - - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + + /* Primitive intersection. */ + if (p_type == PRIMITIVE_TRIANGLE) { + int prim_count = prim_addr2 - prim_addr; + if (prim_count < 3) { + while (prim_addr < prim_addr2) { + kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == + p_type); + int hit = triangle_intersect( + kg, isect_array, P, dir, PATH_RAY_SHADOW, object, prim_addr); + /* Shadow ray early termination. */ + if (hit) { + /* detect if this surface has a shader with transparent shadows */ + + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; #ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) + if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) #endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } + { + shader = kernel_tex_fetch(__tri_shader, prim); + } #ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } #endif - int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + + /* if no transparent shadows, all light is blocked */ + if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if (*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; #if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; #endif - isect_array->t = isect_t; - } + isect_array->t = isect_t; + } - prim_addr++; - } //while - } else { - kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == p_type); + prim_addr++; + } //while + } + else { + kernel_assert((kernel_tex_fetch(__prim_type, (prim_addr)) & PRIMITIVE_ALL) == + p_type); #if BVH_FEATURE(BVH_INSTANCING) - int* nhiptr = &num_hits_in_instance; + int *nhiptr = &num_hits_in_instance; #else - int nhi= 0; - int *nhiptr = &nhi; + int nhi = 0; + int *nhiptr = &nhi; #endif - int result = triangle_intersect8(kg, - &isect_array, - P, - dir, - PATH_RAY_SHADOW, - object, - prim_addr, - prim_count, - num_hits, - max_hits, - nhiptr, - isect_t); - if(result == 2) { - return true; - } - } // prim_count - } // PRIMITIVE_TRIANGLE - else { - while(prim_addr < prim_addr2) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); + int result = triangle_intersect8(kg, + &isect_array, + P, + dir, + PATH_RAY_SHADOW, + object, + prim_addr, + prim_count, + num_hits, + max_hits, + nhiptr, + isect_t); + if (result == 2) { + return true; + } + } // prim_count + } // PRIMITIVE_TRIANGLE + else { + while (prim_addr < prim_addr2) { + kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); #ifdef __SHADOW_TRICKS__ - uint tri_object = (object == OBJECT_NONE) - ? kernel_tex_fetch(__prim_object, prim_addr) - : object; - if(tri_object == skip_object) { - ++prim_addr; - continue; - } + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + if (tri_object == skip_object) { + ++prim_addr; + continue; + } #endif - bool hit; + bool hit; - /* todo: specialized intersect functions which don't fill in - * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? - * might give a few % performance improvement */ + /* todo: specialized intersect functions which don't fill in + * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? + * might give a few % performance improvement */ - switch(p_type) { + switch (p_type) { #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, - isect_array, - P, - dir, - ray->time, - PATH_RAY_SHADOW, - object, - prim_addr); - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect( + kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, prim_addr); + break; + } #endif #if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect_array, - P, - dir, - PATH_RAY_SHADOW, - object, - prim_addr, - ray->time, - curve_type, - NULL, - 0, 0); - } - else { - hit = curve_intersect(kg, - isect_array, - P, - dir, - PATH_RAY_SHADOW, - object, - prim_addr, - ray->time, - curve_type, - NULL, - 0, 0); - } - break; - } + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); + if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { + hit = cardinal_curve_intersect(kg, + isect_array, + P, + dir, + PATH_RAY_SHADOW, + object, + prim_addr, + ray->time, + curve_type, + NULL, + 0, + 0); + } + else { + hit = curve_intersect(kg, + isect_array, + P, + dir, + PATH_RAY_SHADOW, + object, + prim_addr, + ray->time, + curve_type, + NULL, + 0, + 0); + } + break; + } #endif - default: { - hit = false; - break; - } - } + default: { + hit = false; + break; + } + } - /* Shadow ray early termination. */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ + /* Shadow ray early termination. */ + if (hit) { + /* detect if this surface has a shader with transparent shadows */ - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; #ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) + if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) #endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } + { + shader = kernel_tex_fetch(__tri_shader, prim); + } #ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } #endif - int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + + /* if no transparent shadows, all light is blocked */ + if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if (*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; #if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; #endif - isect_array->t = isect_t; - } + isect_array->t = isect_t; + } - prim_addr++; - }//while prim - } - } + prim_addr++; + } //while prim + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); # if BVH_FEATURE(BVH_MOTION) - isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif - num_hits_in_instance = 0; - isect_array->t = isect_t; + num_hits_in_instance = 0; + isect_array->t = isect_t; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect_t); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect_t); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + node_addr = kernel_tex_fetch(__object_node, object); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; + /* Instance pop. */ + if (num_hits_in_instance) { + float t_fac; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); # else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); # endif - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { + /* Scale isect->t to adjust for instancing. */ + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } + else { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif - } + } - isect_t = tmax; - isect_array->t = isect_t; + isect_t = tmax; + isect_array->t = isect_t; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect_t); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect_t); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return false; + return false; } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/obvh_traversal.h b/intern/cycles/kernel/bvh/obvh_traversal.h index 5df7a3be515..86b1de48aaa 100644 --- a/intern/cycles/kernel/bvh/obvh_traversal.h +++ b/intern/cycles/kernel/bvh/obvh_traversal.h @@ -37,598 +37,583 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, Intersection *isect, const uint visibility #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - ,uint *lcg_state, + , + uint *lcg_state, float difl, float extmax #endif - ) +) { - /* Traversal stack in CUDA thread-local memory. */ - OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - traversal_stack[0].dist = -FLT_MAX; - - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - float node_dist = -FLT_MAX; - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; + /* Traversal stack in CUDA thread-local memory. */ + OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + traversal_stack[0].dist = -FLT_MAX; + + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + float node_dist = -FLT_MAX; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; - BVH_DEBUG_INIT(); - avxf tnear(0.0f), tfar(ray->t); + BVH_DEBUG_INIT(); + avxf tnear(0.0f), tfar(ray->t); #if BVH_FEATURE(BVH_HAIR) - avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); #endif - avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - (void) inodes; - - if(UNLIKELY(node_dist > isect->t) + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + (void)inodes; + + if (UNLIKELY(node_dist > isect->t) #if BVH_FEATURE(BVH_MOTION) - || UNLIKELY(ray->time < inodes.y) - || UNLIKELY(ray->time > inodes.z) + || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z) #endif #ifdef __VISIBILITY_FLAG__ - || (__float_as_uint(inodes.x) & visibility) == 0 + || (__float_as_uint(inodes.x) & visibility) == 0 #endif - ) - { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } + ) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } - int child_mask; - avxf dist; + int child_mask; + avxf dist; - BVH_DEBUG_NEXT_NODE(); + BVH_DEBUG_NEXT_NODE(); #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - /* NOTE: We extend all the child BB instead of fetching - * and checking visibility flags for each of the, - * - * Need to test if doing opposite would be any faster. - */ - child_mask = NODE_INTERSECT_ROBUST(kg, - tnear, - tfar, + if (difl != 0.0f) { + /* NOTE: We extend all the child BB instead of fetching + * and checking visibility flags for each of the, + * + * Need to test if doing opposite would be any faster. + */ + child_mask = NODE_INTERSECT_ROBUST(kg, + tnear, + tfar, # ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, # endif # if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, # endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - difl, - &dist); - } - else -#endif /* BVH_HAIR_MINIMUM_WIDTH */ - { - child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + difl, + &dist); + } + else +#endif /* BVH_HAIR_MINIMUM_WIDTH */ + { + child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - } - - if(child_mask != 0) { - avxf cnodes; - /* TODO(sergey): Investigate whether moving cnodes upwards - * gives a speedup (will be different cache pattern but will - * avoid extra check here). - */ + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + } + + if (child_mask != 0) { + avxf cnodes; + /* TODO(sergey): Investigate whether moving cnodes upwards + * gives a speedup (will be different cache pattern but will + * avoid extra check here). + */ #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26); + } + else #endif - { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - float d0 = ((float*)&dist)[r]; - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - node_dist = d0; - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - node_dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - node_dist = d0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - - /* Five children are hit, push all onto stack and sort 5 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c4 = __float_as_int(cnodes[r]); - float d4 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4]); - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - - /* Six children are hit, push all onto stack and sort 6 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c5 = __float_as_int(cnodes[r]); - float d5 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5]); - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - - /* Seven children are hit, push all onto stack and sort 7 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c6 = __float_as_int(cnodes[r]); - float d6 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6]); - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - - /* Eight children are hit, push all onto stack and sort 8 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c7 = __float_as_int(cnodes[r]); - float d7 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c7; - traversal_stack[stack_ptr].dist = d7; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6], - &traversal_stack[stack_ptr - 7]); - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - - - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); + { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + float d0 = ((float *)&dist)[r]; + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + node_dist = d0; + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + node_dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + node_dist = d0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + + /* Five children are hit, push all onto stack and sort 5 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c4 = __float_as_int(cnodes[r]); + float d4 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4]); + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + + /* Six children are hit, push all onto stack and sort 6 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c5 = __float_as_int(cnodes[r]); + float d5 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5]); + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + + /* Seven children are hit, push all onto stack and sort 7 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c6 = __float_as_int(cnodes[r]); + float d6 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6]); + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + + /* Eight children are hit, push all onto stack and sort 8 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c7 = __float_as_int(cnodes[r]); + float d7 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c7; + traversal_stack[stack_ptr].dist = d7; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6], + &traversal_stack[stack_ptr - 7]); + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); #ifdef __VISIBILITY_FLAG__ - if(UNLIKELY((node_dist > isect->t) || - ((__float_as_uint(leaf.z) & visibility) == 0))) + if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0))) #else - if(UNLIKELY((node_dist > isect->t))) + if (UNLIKELY((node_dist > isect->t))) #endif - { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - int prim_addr = __float_as_int(leaf.x); + { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - int prim_count = prim_addr2 - prim_addr; - if(prim_count < 3) { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(triangle_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr)) - { - tfar = avxf(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) { - return true; - } - } - }//for - } - else { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(triangle_intersect8(kg, - &isect, - P, - dir, - visibility, - object, - prim_addr, - prim_count, - 0, - 0, - NULL, - 0.0f)) - { - tfar = avxf(isect->t); - if(visibility == PATH_RAY_SHADOW_OPAQUE) { - return true; - } - } - }//prim count - break; - } + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + + /* Primitive intersection. */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + int prim_count = prim_addr2 - prim_addr; + if (prim_count < 3) { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) { + tfar = avxf(isect->t); + /* Shadow ray early termination. */ + if (visibility == PATH_RAY_SHADOW_OPAQUE) { + return true; + } + } + } //for + } + else { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (triangle_intersect8(kg, + &isect, + P, + dir, + visibility, + object, + prim_addr, + prim_count, + 0, + 0, + NULL, + 0.0f)) { + tfar = avxf(isect->t); + if (visibility == PATH_RAY_SHADOW_OPAQUE) { + return true; + } + } + } //prim count + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(motion_triangle_intersect(kg, - isect, - P, - dir, - ray->time, - visibility, - object, - prim_addr)) - { - tfar = avxf(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) { - return true; - } - } - } - break; - } -#endif /* BVH_FEATURE(BVH_MOTION) */ + case PRIMITIVE_MOTION_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (motion_triangle_intersect( + kg, isect, P, dir, ray->time, visibility, object, prim_addr)) { + tfar = avxf(isect->t); + /* Shadow ray early termination. */ + if (visibility == PATH_RAY_SHADOW_OPAQUE) { + return true; + } + } + } + break; + } +#endif /* BVH_FEATURE(BVH_MOTION) */ #if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); - bool hit; - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); - } - else { - hit = curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); - } - if(hit) { - tfar = avxf(isect->t); - /* Shadow ray early termination. */ - if(visibility == PATH_RAY_SHADOW_OPAQUE) { - return true; - } - } - } - break; - } -#endif /* BVH_FEATURE(BVH_HAIR) */ - } - } + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); + kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); + bool hit; + if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { + hit = cardinal_curve_intersect(kg, + isect, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + lcg_state, + difl, + extmax); + } + else { + hit = curve_intersect(kg, + isect, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + lcg_state, + difl, + extmax); + } + if (hit) { + tfar = avxf(isect->t); + /* Shadow ray early termination. */ + if (visibility == PATH_RAY_SHADOW_OPAQUE) { + return true; + } + } + } + break; + } +#endif /* BVH_FEATURE(BVH_HAIR) */ + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); # if BVH_FEATURE(BVH_MOTION) - qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm); + qbvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm); # else - qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist); + qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist); # endif - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect->t); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - traversal_stack[stack_ptr].dist = -FLT_MAX; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; + traversal_stack[stack_ptr].dist = -FLT_MAX; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_tex_fetch(__object_node, object); - BVH_DEBUG_NEXT_INSTANCE(); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + BVH_DEBUG_NEXT_INSTANCE(); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ + /* Instance pop. */ # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect->t); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return (isect->prim != PRIM_NONE); + return (isect->prim != PRIM_NONE); } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/obvh_volume.h b/intern/cycles/kernel/bvh/obvh_volume.h index e66d499dccc..fb41ae783ab 100644 --- a/intern/cycles/kernel/bvh/obvh_volume.h +++ b/intern/cycles/kernel/bvh/obvh_volume.h @@ -33,444 +33,448 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, Intersection *isect, const uint visibility) { - /* Traversal stack in CUDA thread-local memory. */ - OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + /* Traversal stack in CUDA thread-local memory. */ + OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; - avxf tnear(0.0f), tfar(ray->t); + avxf tnear(0.0f), tfar(ray->t); #if BVH_FEATURE(BVH_HAIR) - avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); #endif - avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); + avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(inodes.x) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + if ((__float_as_uint(inodes.x) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } #endif - avxf dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + avxf dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - - if(child_mask != 0) { - avxf cnodes; + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + + if (child_mask != 0) { + avxf cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26); + } + else #endif - { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - - /* Five children are hit, push all onto stack and sort 5 - * stack items, continue with closest child - */ - r = __bscf(child_mask); - int c4 = __float_as_int(cnodes[r]); - float d4 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Six children are hit, push all onto stack and sort 6 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c5 = __float_as_int(cnodes[r]); - float d5 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - - /* Seven children are hit, push all onto stack and sort 7 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c6 = __float_as_int(cnodes[r]); - float d6 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Eight children are hit, push all onto stack and sort 8 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c7 = __float_as_int(cnodes[r]); - float d7 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c7; - traversal_stack[stack_ptr].dist = d7; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6], - &traversal_stack[stack_ptr - 7]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - - if((__float_as_uint(leaf.z) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - int prim_addr = __float_as_int(leaf.x); + { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + + /* Five children are hit, push all onto stack and sort 5 + * stack items, continue with closest child + */ + r = __bscf(child_mask); + int c4 = __float_as_int(cnodes[r]); + float d4 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Six children are hit, push all onto stack and sort 6 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c5 = __float_as_int(cnodes[r]); + float d5 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + + /* Seven children are hit, push all onto stack and sort 7 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c6 = __float_as_int(cnodes[r]); + float d6 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Eight children are hit, push all onto stack and sort 8 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c7 = __float_as_int(cnodes[r]); + float d7 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c7; + traversal_stack[stack_ptr].dist = d7; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6], + &traversal_stack[stack_ptr - 7]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + + if ((__float_as_uint(leaf.z) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr); - } - break; - } + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + + /* Primitive intersection. */ + switch (p_type) { + case PRIMITIVE_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr); + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr); - } - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + motion_triangle_intersect( + kg, isect, P, dir, ray->time, visibility, object, prim_addr); + } + break; + } #endif - } - } + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VOLUME) { + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); # endif - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect->t); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; + + node_addr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ + /* Instance pop. */ # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect->t); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return (isect->prim != PRIM_NONE); + return (isect->prim != PRIM_NONE); } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/obvh_volume_all.h b/intern/cycles/kernel/bvh/obvh_volume_all.h index 5476f79712a..56e2afd4a11 100644 --- a/intern/cycles/kernel/bvh/obvh_volume_all.h +++ b/intern/cycles/kernel/bvh/obvh_volume_all.h @@ -34,514 +34,518 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg, const uint max_hits, const uint visibility) { - /* Traversal stack in CUDA thread-local memory. */ - OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; + /* Traversal stack in CUDA thread-local memory. */ + OBVHStackItem traversal_stack[BVH_OSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - uint num_hits = 0; - isect_array->t = tmax; + uint num_hits = 0; + isect_array->t = tmax; #if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; + int num_hits_in_instance = 0; #endif - avxf tnear(0.0f), tfar(isect_t); + avxf tnear(0.0f), tfar(isect_t); #if BVH_FEATURE(BVH_HAIR) - avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z)); #endif - avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); + avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(inodes.x) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + if ((__float_as_uint(inodes.x) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } #endif - avxf dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + avxf dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - - if(child_mask != 0) { - avxf cnodes; + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + + if (child_mask != 0) { + avxf cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26); + } + else #endif - { - cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - - /* Five children are hit, push all onto stack and sort 5 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c4 = __float_as_int(cnodes[r]); - float d4 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Six children are hit, push all onto stack and sort 6 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c5 = __float_as_int(cnodes[r]); - float d5 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c5; - traversal_stack[stack_ptr].dist = d5; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c4; - traversal_stack[stack_ptr].dist = d4; - - /* Seven children are hit, push all onto stack and sort 7 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c6 = __float_as_int(cnodes[r]); - float d6 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Eight children are hit, push all onto stack and sort 8 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c7 = __float_as_int(cnodes[r]); - float d7 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c7; - traversal_stack[stack_ptr].dist = d7; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = c6; - traversal_stack[stack_ptr].dist = d6; - obvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3], - &traversal_stack[stack_ptr - 4], - &traversal_stack[stack_ptr - 5], - &traversal_stack[stack_ptr - 6], - &traversal_stack[stack_ptr - 7]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - - if((__float_as_uint(leaf.z) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - int prim_addr = __float_as_int(leaf.x); + { + cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + + /* Five children are hit, push all onto stack and sort 5 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c4 = __float_as_int(cnodes[r]); + float d4 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Six children are hit, push all onto stack and sort 6 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c5 = __float_as_int(cnodes[r]); + float d5 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c5; + traversal_stack[stack_ptr].dist = d5; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c4; + traversal_stack[stack_ptr].dist = d4; + + /* Seven children are hit, push all onto stack and sort 7 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c6 = __float_as_int(cnodes[r]); + float d6 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Eight children are hit, push all onto stack and sort 8 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c7 = __float_as_int(cnodes[r]); + float d7 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c7; + traversal_stack[stack_ptr].dist = d7; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = c6; + traversal_stack[stack_ptr].dist = d6; + obvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3], + &traversal_stack[stack_ptr - 4], + &traversal_stack[stack_ptr - 5], + &traversal_stack[stack_ptr - 6], + &traversal_stack[stack_ptr - 7]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + + if ((__float_as_uint(leaf.z) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - bool hit; - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + bool hit; + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + + /* Primitive intersection. */ + switch (p_type) { + case PRIMITIVE_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr); + if (hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; #if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; #endif - isect_array->t = isect_t; - if(num_hits == max_hits) { + isect_array->t = isect_t; + if (num_hits == max_hits) { #if BVH_FEATURE(BVH_INSTANCING) # if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); # else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); + Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); # endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -#endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; + case PRIMITIVE_MOTION_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = motion_triangle_intersect( + kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); + if (hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; # if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; # endif - isect_array->t = isect_t; - if(num_hits == max_hits) { + isect_array->t = isect_t; + if (num_hits == max_hits) { # if BVH_FEATURE(BVH_INSTANCING) # if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); # else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); + Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); # endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } -# endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } +# endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } #endif - } - } + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VOLUME) { + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect_t); - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect_t); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - num_hits_in_instance = 0; - isect_array->t = isect_t; - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_OSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + num_hits_in_instance = 0; + isect_array->t = isect_t; + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_OSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; + + node_addr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; + /* Instance pop. */ + if (num_hits_in_instance) { + float t_fac; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); # else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); # endif - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { + /* Scale isect->t to adjust for instancing. */ + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } + else { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif - } + } - isect_t = tmax; - isect_array->t = isect_t; + isect_t = tmax; + isect_array->t = isect_t; - obvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = avxf(isect_t); + obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = avxf(isect_t); # if BVH_FEATURE(BVH_HAIR) - dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); + dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z)); # endif - idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); + idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); + org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return num_hits; + return num_hits; } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_local.h b/intern/cycles/kernel/bvh/qbvh_local.h index 661182e31b3..b21f79bd3a0 100644 --- a/intern/cycles/kernel/bvh/qbvh_local.h +++ b/intern/cycles/kernel/bvh/qbvh_local.h @@ -35,262 +35,257 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, uint *lcg_state, int max_hits) { - /* TODO(sergey): - * - Test if pushing distance on the stack helps (for non shadow rays). - * - Separate version for shadow rays. - * - Likely and unlikely for if() statements. - * - SSE for hair. - * - Test restrict attribute for pointers. - */ + /* TODO(sergey): + * - Test if pushing distance on the stack helps (for non shadow rays). + * - Separate version for shadow rays. + * - Likely and unlikely for if() statements. + * - SSE for hair. + * - Test restrict attribute for pointers. + */ - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_tex_fetch(__object_node, local_object); + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_tex_fetch(__object_node, local_object); - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = ray->t; + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = ray->t; - if(local_isect != NULL) { - local_isect->num_hits = 0; - } - kernel_assert((local_isect == NULL) == (max_hits == 0)); + if (local_isect != NULL) { + local_isect->num_hits = 0; + } + kernel_assert((local_isect == NULL) == (max_hits == 0)); - const int object_flag = kernel_tex_fetch(__object_flag, local_object); - if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + const int object_flag = kernel_tex_fetch(__object_flag, local_object); + if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; - isect_t = bvh_instance_motion_push(kg, - local_object, - ray, - &P, - &dir, - &idir, - isect_t, - &ob_itfm); + Transform ob_itfm; + isect_t = bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, isect_t, &ob_itfm); #else - isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t); #endif - object = local_object; - } + object = local_object; + } - ssef tnear(0.0f), tfar(isect_t); + ssef tnear(0.0f), tfar(isect_t); #if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); #endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - ssef dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + ssef dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); - if(child_mask != 0) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - float4 cnodes; + if (child_mask != 0) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + float4 cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13); + } + else #endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7); - } + { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7); + } - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - } + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + } - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - int prim_addr = __float_as_int(leaf.x); + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + int prim_addr = __float_as_int(leaf.x); - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - /* Intersect ray against primitive, */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(triangle_intersect_local(kg, - local_isect, - P, - dir, - object, - local_object, - prim_addr, - isect_t, - lcg_state, - max_hits)) { - return true; - } - } - break; - } + /* Primitive intersection. */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + /* Intersect ray against primitive, */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (triangle_intersect_local(kg, + local_isect, + P, + dir, + object, + local_object, + prim_addr, + isect_t, + lcg_state, + max_hits)) { + return true; + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - /* Intersect ray against primitive. */ - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(motion_triangle_intersect_local(kg, - local_isect, - P, - dir, - ray->time, - object, - local_object, - prim_addr, - isect_t, - lcg_state, - max_hits)) { - return true; - } - } - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + /* Intersect ray against primitive. */ + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (motion_triangle_intersect_local(kg, + local_isect, + P, + dir, + ray->time, + object, + local_object, + prim_addr, + isect_t, + lcg_state, + max_hits)) { + return true; + } + } + break; + } #endif - default: - break; - } - } - } while(node_addr != ENTRYPOINT_SENTINEL); - } while(node_addr != ENTRYPOINT_SENTINEL); + default: + break; + } + } + } while (node_addr != ENTRYPOINT_SENTINEL); + } while (node_addr != ENTRYPOINT_SENTINEL); - return false; + return false; } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h index 2e622af1758..7c1d8c8c72e 100644 --- a/intern/cycles/kernel/bvh/qbvh_nodes.h +++ b/intern/cycles/kernel/bvh/qbvh_nodes.h @@ -17,11 +17,11 @@ */ struct QBVHStackItem { - int addr; - float dist; + int addr; + float dist; }; -ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir, +ccl_device_inline void qbvh_near_far_idx_calc(const float3 &idir, int *ccl_restrict near_x, int *ccl_restrict near_y, int *ccl_restrict near_z, @@ -31,44 +31,76 @@ ccl_device_inline void qbvh_near_far_idx_calc(const float3& idir, { #ifdef __KERNEL_SSE__ - *near_x = 0; *far_x = 1; - *near_y = 2; *far_y = 3; - *near_z = 4; *far_z = 5; - - const size_t mask = movemask(ssef(idir.m128)); - - const int mask_x = mask & 1; - const int mask_y = (mask & 2) >> 1; - const int mask_z = (mask & 4) >> 2; - - *near_x += mask_x; *far_x -= mask_x; - *near_y += mask_y; *far_y -= mask_y; - *near_z += mask_z; *far_z -= mask_z; + *near_x = 0; + *far_x = 1; + *near_y = 2; + *far_y = 3; + *near_z = 4; + *far_z = 5; + + const size_t mask = movemask(ssef(idir.m128)); + + const int mask_x = mask & 1; + const int mask_y = (mask & 2) >> 1; + const int mask_z = (mask & 4) >> 2; + + *near_x += mask_x; + *far_x -= mask_x; + *near_y += mask_y; + *far_y -= mask_y; + *near_z += mask_z; + *far_z -= mask_z; #else - if(idir.x >= 0.0f) { *near_x = 0; *far_x = 1; } else { *near_x = 1; *far_x = 0; } - if(idir.y >= 0.0f) { *near_y = 2; *far_y = 3; } else { *near_y = 3; *far_y = 2; } - if(idir.z >= 0.0f) { *near_z = 4; *far_z = 5; } else { *near_z = 5; *far_z = 4; } + if (idir.x >= 0.0f) { + *near_x = 0; + *far_x = 1; + } + else { + *near_x = 1; + *far_x = 0; + } + if (idir.y >= 0.0f) { + *near_y = 2; + *far_y = 3; + } + else { + *near_y = 3; + *far_y = 2; + } + if (idir.z >= 0.0f) { + *near_z = 4; + *far_z = 5; + } + else { + *near_z = 5; + *far_z = 4; + } #endif } /* TOOD(sergey): Investigate if using intrinsics helps for both * stack item swap and float comparison. */ -ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, - QBVHStackItem *ccl_restrict b) +ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a, QBVHStackItem *ccl_restrict b) { - QBVHStackItem tmp = *a; - *a = *b; - *b = tmp; + QBVHStackItem tmp = *a; + *a = *b; + *b = tmp; } ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1, QBVHStackItem *ccl_restrict s2, QBVHStackItem *ccl_restrict s3) { - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } - if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } + if (s2->dist < s1->dist) { + qbvh_item_swap(s2, s1); + } + if (s3->dist < s2->dist) { + qbvh_item_swap(s3, s2); + } + if (s2->dist < s1->dist) { + qbvh_item_swap(s2, s1); + } } ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1, @@ -76,279 +108,283 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1, QBVHStackItem *ccl_restrict s3, QBVHStackItem *ccl_restrict s4) { - if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); } - if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); } - if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); } - if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); } - if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); } + if (s2->dist < s1->dist) { + qbvh_item_swap(s2, s1); + } + if (s4->dist < s3->dist) { + qbvh_item_swap(s4, s3); + } + if (s3->dist < s1->dist) { + qbvh_item_swap(s3, s1); + } + if (s4->dist < s2->dist) { + qbvh_item_swap(s4, s2); + } + if (s3->dist < s2->dist) { + qbvh_item_swap(s3, s2); + } } /* Axis-aligned nodes intersection */ //ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, static int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg, - const ssef& isect_near, - const ssef& isect_far, + const ssef &isect_near, + const ssef &isect_far, #ifdef __KERNEL_AVX2__ - const sse3f& org_idir, + const sse3f &org_idir, #else - const sse3f& org, + const sse3f &org, #endif - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - ssef *ccl_restrict dist) + const sse3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + ssef *ccl_restrict dist) { - const int offset = node_addr + 1; + const int offset = node_addr + 1; #ifdef __KERNEL_AVX2__ - const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x); - const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y); - const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z); - const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x); - const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y); - const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z); + const ssef tnear_x = msub( + kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, org_idir.x); + const ssef tnear_y = msub( + kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, org_idir.y); + const ssef tnear_z = msub( + kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, org_idir.z); + const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, org_idir.x); + const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, org_idir.y); + const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, org_idir.z); #else - const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x; - const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y; - const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z; - const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x; - const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y; - const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z; + const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - org.x) * idir.x; + const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - org.y) * idir.y; + const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - org.z) * idir.z; + const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - org.x) * idir.x; + const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - org.y) * idir.y; + const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - org.z) * idir.z; #endif #ifdef __KERNEL_SSE41__ - const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near)); - const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far)); - const sseb vmask = cast(tnear) > cast(tfar); - int mask = (int)movemask(vmask)^0xf; + const ssef tnear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, isect_near)); + const ssef tfar = mini(mini(tfar_x, tfar_y), mini(tfar_z, isect_far)); + const sseb vmask = cast(tnear) > cast(tfar); + int mask = (int)movemask(vmask) ^ 0xf; #else - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const sseb vmask = tnear <= tfar; - int mask = (int)movemask(vmask); + const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + const sseb vmask = tnear <= tfar; + int mask = (int)movemask(vmask); #endif - *dist = tnear; - return mask; + *dist = tnear; + return mask; } -ccl_device_inline int qbvh_aligned_node_intersect_robust( - KernelGlobals *ccl_restrict kg, - const ssef& isect_near, - const ssef& isect_far, +ccl_device_inline int qbvh_aligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, + const ssef &isect_near, + const ssef &isect_far, #ifdef __KERNEL_AVX2__ - const sse3f& P_idir, + const sse3f &P_idir, #else - const sse3f& P, + const sse3f &P, #endif - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - ssef *ccl_restrict dist) + const sse3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + const float difl, + ssef *ccl_restrict dist) { - const int offset = node_addr + 1; + const int offset = node_addr + 1; #ifdef __KERNEL_AVX2__ - const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x); - const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y); - const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z); - const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x); - const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y); - const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z); + const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x), idir.x, P_idir.x); + const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y), idir.y, P_idir.y); + const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z), idir.z, P_idir.z); + const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x), idir.x, P_idir.x); + const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y), idir.y, P_idir.y); + const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z), idir.z, P_idir.z); #else - const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x; - const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y; - const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z; - const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x; - const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y; - const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z; + const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_x) - P.x) * idir.x; + const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_y) - P.y) * idir.y; + const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + near_z) - P.z) * idir.z; + const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_x) - P.x) * idir.x; + const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_y) - P.y) * idir.y; + const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset + far_z) - P.z) * idir.z; #endif - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const sseb vmask = round_down*tnear <= round_up*tfar; - *dist = tnear; - return (int)movemask(vmask); + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; + const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + const sseb vmask = round_down * tnear <= round_up * tfar; + *dist = tnear; + return (int)movemask(vmask); } /* Unaligned nodes intersection */ -ccl_device_inline int qbvh_unaligned_node_intersect( - KernelGlobals *ccl_restrict kg, - const ssef& isect_near, - const ssef& isect_far, +ccl_device_inline int qbvh_unaligned_node_intersect(KernelGlobals *ccl_restrict kg, + const ssef &isect_near, + const ssef &isect_far, #ifdef __KERNEL_AVX2__ - const sse3f& org_idir, + const sse3f &org_idir, #endif - const sse3f& org, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - ssef *ccl_restrict dist) + const sse3f &org, + const sse3f &dir, + const sse3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + ssef *ccl_restrict dist) { - const int offset = node_addr; - const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1); - const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2); - const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3); + const int offset = node_addr; + const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1); + const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2); + const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3); - const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4); - const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5); - const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6); + const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4); + const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5); + const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6); - const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7); - const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8); - const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9); + const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7); + const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8); + const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9); - const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10); - const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11); - const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12); + const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10); + const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11); + const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12); - const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, - aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, - aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; + const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z, + aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z, + aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z; - const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x, - aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y, - aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z; + const ssef aligned_P_x = org.x * tfm_x_x + org.y * tfm_x_y + org.z * tfm_x_z + tfm_t_x, + aligned_P_y = org.x * tfm_y_x + org.y * tfm_y_y + org.z * tfm_y_z + tfm_t_y, + aligned_P_z = org.x * tfm_z_x + org.y * tfm_z_y + org.z * tfm_z_z + tfm_t_z; - const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); - const ssef nrdir_x = neg_one / aligned_dir_x, - nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; + const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); + const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y, + nrdir_z = neg_one / aligned_dir_z; - const ssef tlower_x = aligned_P_x * nrdir_x, - tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; + const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y, + tlower_z = aligned_P_z * nrdir_z; - const ssef tupper_x = tlower_x - nrdir_x, - tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; + const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y, + tupper_z = tlower_z - nrdir_z; #ifdef __KERNEL_SSE41__ - const ssef tnear_x = mini(tlower_x, tupper_x); - const ssef tnear_y = mini(tlower_y, tupper_y); - const ssef tnear_z = mini(tlower_z, tupper_z); - const ssef tfar_x = maxi(tlower_x, tupper_x); - const ssef tfar_y = maxi(tlower_y, tupper_y); - const ssef tfar_z = maxi(tlower_z, tupper_z); - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const sseb vmask = tnear <= tfar; - *dist = tnear; - return movemask(vmask); + const ssef tnear_x = mini(tlower_x, tupper_x); + const ssef tnear_y = mini(tlower_y, tupper_y); + const ssef tnear_z = mini(tlower_z, tupper_z); + const ssef tfar_x = maxi(tlower_x, tupper_x); + const ssef tfar_y = maxi(tlower_y, tupper_y); + const ssef tfar_z = maxi(tlower_z, tupper_z); + const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + const sseb vmask = tnear <= tfar; + *dist = tnear; + return movemask(vmask); #else - const ssef tnear_x = min(tlower_x, tupper_x); - const ssef tnear_y = min(tlower_y, tupper_y); - const ssef tnear_z = min(tlower_z, tupper_z); - const ssef tfar_x = max(tlower_x, tupper_x); - const ssef tfar_y = max(tlower_y, tupper_y); - const ssef tfar_z = max(tlower_z, tupper_z); - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const sseb vmask = tnear <= tfar; - *dist = tnear; - return movemask(vmask); + const ssef tnear_x = min(tlower_x, tupper_x); + const ssef tnear_y = min(tlower_y, tupper_y); + const ssef tnear_z = min(tlower_z, tupper_z); + const ssef tfar_x = max(tlower_x, tupper_x); + const ssef tfar_y = max(tlower_y, tupper_y); + const ssef tfar_z = max(tlower_z, tupper_z); + const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + const sseb vmask = tnear <= tfar; + *dist = tnear; + return movemask(vmask); #endif } -ccl_device_inline int qbvh_unaligned_node_intersect_robust( - KernelGlobals *ccl_restrict kg, - const ssef& isect_near, - const ssef& isect_far, +ccl_device_inline int qbvh_unaligned_node_intersect_robust(KernelGlobals *ccl_restrict kg, + const ssef &isect_near, + const ssef &isect_far, #ifdef __KERNEL_AVX2__ - const sse3f& P_idir, + const sse3f &P_idir, #endif - const sse3f& P, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - ssef *ccl_restrict dist) + const sse3f &P, + const sse3f &dir, + const sse3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + const float difl, + ssef *ccl_restrict dist) { - const int offset = node_addr; - const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1); - const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2); - const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3); + const int offset = node_addr; + const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 1); + const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 2); + const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 3); - const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4); - const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5); - const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6); + const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 4); + const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 5); + const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 6); - const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7); - const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8); - const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9); + const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 7); + const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 8); + const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 9); - const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10); - const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11); - const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12); + const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset + 10); + const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset + 11); + const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset + 12); - const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z, - aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z, - aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z; + const ssef aligned_dir_x = dir.x * tfm_x_x + dir.y * tfm_x_y + dir.z * tfm_x_z, + aligned_dir_y = dir.x * tfm_y_x + dir.y * tfm_y_y + dir.z * tfm_y_z, + aligned_dir_z = dir.x * tfm_z_x + dir.y * tfm_z_y + dir.z * tfm_z_z; - const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x, - aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tfm_y_z + tfm_t_y, - aligned_P_z = P.x*tfm_z_x + P.y*tfm_z_y + P.z*tfm_z_z + tfm_t_z; + const ssef aligned_P_x = P.x * tfm_x_x + P.y * tfm_x_y + P.z * tfm_x_z + tfm_t_x, + aligned_P_y = P.x * tfm_y_x + P.y * tfm_y_y + P.z * tfm_y_z + tfm_t_y, + aligned_P_z = P.x * tfm_z_x + P.y * tfm_z_y + P.z * tfm_z_z + tfm_t_z; - const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); - const ssef nrdir_x = neg_one / aligned_dir_x, - nrdir_y = neg_one / aligned_dir_y, - nrdir_z = neg_one / aligned_dir_z; + const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f); + const ssef nrdir_x = neg_one / aligned_dir_x, nrdir_y = neg_one / aligned_dir_y, + nrdir_z = neg_one / aligned_dir_z; - const ssef tlower_x = aligned_P_x * nrdir_x, - tlower_y = aligned_P_y * nrdir_y, - tlower_z = aligned_P_z * nrdir_z; + const ssef tlower_x = aligned_P_x * nrdir_x, tlower_y = aligned_P_y * nrdir_y, + tlower_z = aligned_P_z * nrdir_z; - const ssef tupper_x = tlower_x - nrdir_x, - tupper_y = tlower_y - nrdir_y, - tupper_z = tlower_z - nrdir_z; + const ssef tupper_x = tlower_x - nrdir_x, tupper_y = tlower_y - nrdir_y, + tupper_z = tlower_z - nrdir_z; - const float round_down = 1.0f - difl; - const float round_up = 1.0f + difl; + const float round_down = 1.0f - difl; + const float round_up = 1.0f + difl; #ifdef __KERNEL_SSE41__ - const ssef tnear_x = mini(tlower_x, tupper_x); - const ssef tnear_y = mini(tlower_y, tupper_y); - const ssef tnear_z = mini(tlower_z, tupper_z); - const ssef tfar_x = maxi(tlower_x, tupper_x); - const ssef tfar_y = maxi(tlower_y, tupper_y); - const ssef tfar_z = maxi(tlower_z, tupper_z); + const ssef tnear_x = mini(tlower_x, tupper_x); + const ssef tnear_y = mini(tlower_y, tupper_y); + const ssef tnear_z = mini(tlower_z, tupper_z); + const ssef tfar_x = maxi(tlower_x, tupper_x); + const ssef tfar_y = maxi(tlower_y, tupper_y); + const ssef tfar_z = maxi(tlower_z, tupper_z); #else - const ssef tnear_x = min(tlower_x, tupper_x); - const ssef tnear_y = min(tlower_y, tupper_y); - const ssef tnear_z = min(tlower_z, tupper_z); - const ssef tfar_x = max(tlower_x, tupper_x); - const ssef tfar_y = max(tlower_y, tupper_y); - const ssef tfar_z = max(tlower_z, tupper_z); + const ssef tnear_x = min(tlower_x, tupper_x); + const ssef tnear_y = min(tlower_y, tupper_y); + const ssef tnear_z = min(tlower_z, tupper_z); + const ssef tfar_x = max(tlower_x, tupper_x); + const ssef tfar_y = max(tlower_y, tupper_y); + const ssef tfar_z = max(tlower_z, tupper_z); #endif - const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); - const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); - const sseb vmask = round_down*tnear <= round_up*tfar; - *dist = tnear; - return movemask(vmask); + const ssef tnear = max4(isect_near, tnear_x, tnear_y, tnear_z); + const ssef tfar = min4(isect_far, tfar_x, tfar_y, tfar_z); + const sseb vmask = round_down * tnear <= round_up * tfar; + *dist = tnear; + return movemask(vmask); } /* Intersectors wrappers. @@ -356,111 +392,125 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust( * They'll check node type and call appropriate intersection code. */ -ccl_device_inline int qbvh_node_intersect( - KernelGlobals *ccl_restrict kg, - const ssef& isect_near, - const ssef& isect_far, +ccl_device_inline int qbvh_node_intersect(KernelGlobals *ccl_restrict kg, + const ssef &isect_near, + const ssef &isect_far, #ifdef __KERNEL_AVX2__ - const sse3f& org_idir, + const sse3f &org_idir, #endif - const sse3f& org, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - ssef *ccl_restrict dist) + const sse3f &org, + const sse3f &dir, + const sse3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + ssef *ccl_restrict dist) { - const int offset = node_addr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return qbvh_unaligned_node_intersect(kg, - isect_near, - isect_far, + const int offset = node_addr; + const float4 node = kernel_tex_fetch(__bvh_nodes, offset); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return qbvh_unaligned_node_intersect(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - org_idir, + org_idir, #endif - org, - dir, - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - dist); - } - else { - return qbvh_aligned_node_intersect(kg, - isect_near, - isect_far, + org, + dir, + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + dist); + } + else { + return qbvh_aligned_node_intersect(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - org_idir, + org_idir, #else - org, + org, #endif - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - dist); - } + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + dist); + } } -ccl_device_inline int qbvh_node_intersect_robust( - KernelGlobals *ccl_restrict kg, - const ssef& isect_near, - const ssef& isect_far, +ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *ccl_restrict kg, + const ssef &isect_near, + const ssef &isect_far, #ifdef __KERNEL_AVX2__ - const sse3f& P_idir, + const sse3f &P_idir, #endif - const sse3f& P, - const sse3f& dir, - const sse3f& idir, - const int near_x, - const int near_y, - const int near_z, - const int far_x, - const int far_y, - const int far_z, - const int node_addr, - const float difl, - ssef *ccl_restrict dist) + const sse3f &P, + const sse3f &dir, + const sse3f &idir, + const int near_x, + const int near_y, + const int near_z, + const int far_x, + const int far_y, + const int far_z, + const int node_addr, + const float difl, + ssef *ccl_restrict dist) { - const int offset = node_addr; - const float4 node = kernel_tex_fetch(__bvh_nodes, offset); - if(__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { - return qbvh_unaligned_node_intersect_robust(kg, - isect_near, - isect_far, + const int offset = node_addr; + const float4 node = kernel_tex_fetch(__bvh_nodes, offset); + if (__float_as_uint(node.x) & PATH_RAY_NODE_UNALIGNED) { + return qbvh_unaligned_node_intersect_robust(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - P_idir, + P_idir, #endif - P, - dir, - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - difl, - dist); - } - else { - return qbvh_aligned_node_intersect_robust(kg, - isect_near, - isect_far, + P, + dir, + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + difl, + dist); + } + else { + return qbvh_aligned_node_intersect_robust(kg, + isect_near, + isect_far, #ifdef __KERNEL_AVX2__ - P_idir, + P_idir, #else - P, + P, #endif - idir, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - difl, - dist); - } + idir, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + difl, + dist); + } } diff --git a/intern/cycles/kernel/bvh/qbvh_shadow_all.h b/intern/cycles/kernel/bvh/qbvh_shadow_all.h index dd977fb9e74..49e607bfbd0 100644 --- a/intern/cycles/kernel/bvh/qbvh_shadow_all.h +++ b/intern/cycles/kernel/bvh/qbvh_shadow_all.h @@ -36,439 +36,424 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, const uint max_hits, uint *num_hits) { - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - *num_hits = 0; - isect_array->t = tmax; - + *num_hits = 0; + isect_array->t = tmax; #if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; + int num_hits_in_instance = 0; #endif - ssef tnear(0.0f), tfar(isect_t); + ssef tnear(0.0f), tfar(isect_t); #if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); #endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - (void) inodes; - - if(false + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + (void)inodes; + + if (false #ifdef __VISIBILITY_FLAG__ - || ((__float_as_uint(inodes.x) & visibility) == 0) + || ((__float_as_uint(inodes.x) & visibility) == 0) #endif #if BVH_FEATURE(BVH_MOTION) - || UNLIKELY(ray->time < inodes.y) - || UNLIKELY(ray->time > inodes.z) + || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z) #endif - ) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - ssef dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + ) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + ssef dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - - if(child_mask != 0) { - float4 cnodes; + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + + if (child_mask != 0) { + float4 cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13); + } + else #endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - } - - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); + { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + } + + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); #ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(leaf.z) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + if ((__float_as_uint(leaf.z) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } #endif - int prim_addr = __float_as_int(leaf.x); + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - - /* Primitive intersection. */ - while(prim_addr < prim_addr2) { - kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); - bool hit; - - /* todo: specialized intersect functions which don't fill in - * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? - * might give a few % performance improvement */ - - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - hit = triangle_intersect(kg, - isect_array, - P, - dir, - visibility, - object, - prim_addr); - break; - } + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + + /* Primitive intersection. */ + while (prim_addr < prim_addr2) { + kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type); + bool hit; + + /* todo: specialized intersect functions which don't fill in + * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? + * might give a few % performance improvement */ + + switch (p_type) { + case PRIMITIVE_TRIANGLE: { + hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr); + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - hit = motion_triangle_intersect(kg, - isect_array, - P, - dir, - ray->time, - visibility, - object, - prim_addr); - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect( + kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); + break; + } #endif #if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect_array, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - NULL, - 0, 0); - } - else { - hit = curve_intersect(kg, - isect_array, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - NULL, - 0, 0); - } - break; - } + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); + if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { + hit = cardinal_curve_intersect(kg, + isect_array, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + NULL, + 0, + 0); + } + else { + hit = curve_intersect(kg, + isect_array, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + NULL, + 0, + 0); + } + break; + } #endif - default: { - hit = false; - break; - } - } + default: { + hit = false; + break; + } + } - /* Shadow ray early termination. */ - if(hit) { - /* detect if this surface has a shader with transparent shadows */ + /* Shadow ray early termination. */ + if (hit) { + /* detect if this surface has a shader with transparent shadows */ - /* todo: optimize so primitive visibility flag indicates if - * the primitive has a transparent shadow shader? */ - int prim = kernel_tex_fetch(__prim_index, isect_array->prim); - int shader = 0; + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; #ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) + if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) #endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } + { + shader = kernel_tex_fetch(__tri_shader, prim); + } #ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } #endif - int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; - - /* if no transparent shadows, all light is blocked */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return true; - } - /* if maximum number of hits reached, block all light */ - else if(*num_hits == max_hits) { - return true; - } - - /* move on to next entry in intersections array */ - isect_array++; - (*num_hits)++; + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + + /* if no transparent shadows, all light is blocked */ + if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if (*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; #if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; #endif - isect_array->t = isect_t; - } + isect_array->t = isect_t; + } - prim_addr++; - } - } + prim_addr++; + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); # if BVH_FEATURE(BVH_MOTION) - isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif - num_hits_in_instance = 0; - isect_array->t = isect_t; + num_hits_in_instance = 0; + isect_array->t = isect_t; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect_t); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + node_addr = kernel_tex_fetch(__object_node, object); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; + /* Instance pop. */ + if (num_hits_in_instance) { + float t_fac; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); # else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); # endif - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { + /* Scale isect->t to adjust for instancing. */ + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } + else { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif - } + } - isect_t = tmax; - isect_array->t = isect_t; + isect_t = tmax; + isect_array->t = isect_t; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect_t); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return false; + return false; } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_traversal.h b/intern/cycles/kernel/bvh/qbvh_traversal.h index 40cd57aad34..9ee0f7b5933 100644 --- a/intern/cycles/kernel/bvh/qbvh_traversal.h +++ b/intern/cycles/kernel/bvh/qbvh_traversal.h @@ -37,457 +37,446 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, Intersection *isect, const uint visibility #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - ,uint *lcg_state, + , + uint *lcg_state, float difl, float extmax #endif - ) +) { - /* TODO(sergey): - * - Test if pushing distance on the stack helps (for non shadow rays). - * - Separate version for shadow rays. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - traversal_stack[0].dist = -FLT_MAX; - - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - float node_dist = -FLT_MAX; - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; + /* TODO(sergey): + * - Test if pushing distance on the stack helps (for non shadow rays). + * - Separate version for shadow rays. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + traversal_stack[0].dist = -FLT_MAX; + + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + float node_dist = -FLT_MAX; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; - BVH_DEBUG_INIT(); + BVH_DEBUG_INIT(); - ssef tnear(0.0f), tfar(ray->t); + ssef tnear(0.0f), tfar(ray->t); #if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); #endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); - (void) inodes; - - if(UNLIKELY(node_dist > isect->t) + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); + (void)inodes; + + if (UNLIKELY(node_dist > isect->t) #if BVH_FEATURE(BVH_MOTION) - || UNLIKELY(ray->time < inodes.y) - || UNLIKELY(ray->time > inodes.z) + || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z) #endif #ifdef __VISIBILITY_FLAG__ - || (__float_as_uint(inodes.x) & visibility) == 0 + || (__float_as_uint(inodes.x) & visibility) == 0 #endif - ) - { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } + ) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } - int child_mask; - ssef dist; + int child_mask; + ssef dist; - BVH_DEBUG_NEXT_NODE(); + BVH_DEBUG_NEXT_NODE(); #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH) - if(difl != 0.0f) { - /* NOTE: We extend all the child BB instead of fetching - * and checking visibility flags for each of the, - * - * Need to test if doing opposite would be any faster. - */ - child_mask = NODE_INTERSECT_ROBUST(kg, - tnear, - tfar, + if (difl != 0.0f) { + /* NOTE: We extend all the child BB instead of fetching + * and checking visibility flags for each of the, + * + * Need to test if doing opposite would be any faster. + */ + child_mask = NODE_INTERSECT_ROBUST(kg, + tnear, + tfar, # ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, # endif # if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, # endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - difl, - &dist); - } - else -#endif /* BVH_HAIR_MINIMUM_WIDTH */ - { - child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + difl, + &dist); + } + else +#endif /* BVH_HAIR_MINIMUM_WIDTH */ + { + child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - } - - if(child_mask != 0) { - float4 cnodes; - /* TODO(sergey): Investigate whether moving cnodes upwards - * gives a speedup (will be different cache pattern but will - * avoid extra check here). - */ + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + } + + if (child_mask != 0) { + float4 cnodes; + /* TODO(sergey): Investigate whether moving cnodes upwards + * gives a speedup (will be different cache pattern but will + * avoid extra check here). + */ #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13); + } + else #endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - float d0 = ((float*)&dist)[r]; - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - node_dist = d0; - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - node_dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - node_dist = d0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - } - - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); + { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + float d0 = ((float *)&dist)[r]; + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + node_dist = d0; + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + node_dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + node_dist = d0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + } + + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); #ifdef __VISIBILITY_FLAG__ - if(UNLIKELY((node_dist > isect->t) || - ((__float_as_uint(leaf.z) & visibility) == 0))) + if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0))) #else - if(UNLIKELY((node_dist > isect->t))) + if (UNLIKELY((node_dist > isect->t))) #endif - { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - continue; - } + { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + continue; + } - int prim_addr = __float_as_int(leaf.x); + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - - /* Primitive intersection. */ - switch(type & PRIMITIVE_ALL) { - case PRIMITIVE_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(triangle_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr)) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility & PATH_RAY_SHADOW_OPAQUE) { - return true; - } - } - } - break; - } + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + + /* Primitive intersection. */ + switch (type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + return true; + } + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - if(motion_triangle_intersect(kg, - isect, - P, - dir, - ray->time, - visibility, - object, - prim_addr)) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility & PATH_RAY_SHADOW_OPAQUE) { - return true; - } - } - } - break; - } -#endif /* BVH_FEATURE(BVH_MOTION) */ + case PRIMITIVE_MOTION_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + if (motion_triangle_intersect( + kg, isect, P, dir, ray->time, visibility, object, prim_addr)) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + return true; + } + } + } + break; + } +#endif /* BVH_FEATURE(BVH_MOTION) */ #if BVH_FEATURE(BVH_HAIR) - case PRIMITIVE_CURVE: - case PRIMITIVE_MOTION_CURVE: { - for(; prim_addr < prim_addr2; prim_addr++) { - BVH_DEBUG_NEXT_INTERSECTION(); - const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); - kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); - bool hit; - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { - hit = cardinal_curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); - } - else { - hit = curve_intersect(kg, - isect, - P, - dir, - visibility, - object, - prim_addr, - ray->time, - curve_type, - lcg_state, - difl, - extmax); - } - if(hit) { - tfar = ssef(isect->t); - /* Shadow ray early termination. */ - if(visibility & PATH_RAY_SHADOW_OPAQUE) { - return true; - } - } - } - break; - } -#endif /* BVH_FEATURE(BVH_HAIR) */ - } - } + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + for (; prim_addr < prim_addr2; prim_addr++) { + BVH_DEBUG_NEXT_INTERSECTION(); + const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr); + kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL)); + bool hit; + if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) { + hit = cardinal_curve_intersect(kg, + isect, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + lcg_state, + difl, + extmax); + } + else { + hit = curve_intersect(kg, + isect, + P, + dir, + visibility, + object, + prim_addr, + ray->time, + curve_type, + lcg_state, + difl, + extmax); + } + if (hit) { + tfar = ssef(isect->t); + /* Shadow ray early termination. */ + if (visibility & PATH_RAY_SHADOW_OPAQUE) { + return true; + } + } + } + break; + } +#endif /* BVH_FEATURE(BVH_HAIR) */ + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); # if BVH_FEATURE(BVH_MOTION) - qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm); + qbvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm); # else - qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist); + qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist); # endif - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect->t); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - traversal_stack[stack_ptr].dist = -FLT_MAX; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; + traversal_stack[stack_ptr].dist = -FLT_MAX; - node_addr = kernel_tex_fetch(__object_node, object); + node_addr = kernel_tex_fetch(__object_node, object); - BVH_DEBUG_NEXT_INSTANCE(); - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + BVH_DEBUG_NEXT_INSTANCE(); + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ + /* Instance pop. */ # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect->t); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - node_dist = traversal_stack[stack_ptr].dist; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + node_dist = traversal_stack[stack_ptr].dist; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return (isect->prim != PRIM_NONE); + return (isect->prim != PRIM_NONE); } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h index 6790bfa6c83..e4eaed04467 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume.h +++ b/intern/cycles/kernel/bvh/qbvh_volume.h @@ -33,331 +33,335 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, Intersection *isect, const uint visibility) { - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - isect->t = ray->t; - isect->u = 0.0f; - isect->v = 0.0f; - isect->prim = PRIM_NONE; - isect->object = OBJECT_NONE; + isect->t = ray->t; + isect->u = 0.0f; + isect->v = 0.0f; + isect->prim = PRIM_NONE; + isect->object = OBJECT_NONE; - ssef tnear(0.0f), tfar(ray->t); + ssef tnear(0.0f), tfar(ray->t); #if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); #endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(inodes.x) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + if ((__float_as_uint(inodes.x) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } #endif - ssef dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + ssef dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - - if(child_mask != 0) { - float4 cnodes; + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + + if (child_mask != 0) { + float4 cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13); + } + else #endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - } - - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - - if((__float_as_uint(leaf.z) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - int prim_addr = __float_as_int(leaf.x); + { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + } + + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + + if ((__float_as_uint(leaf.z) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr); - } - break; - } + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + + /* Primitive intersection. */ + switch (p_type) { + case PRIMITIVE_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr); + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, prim_addr); - } - break; - } + case PRIMITIVE_MOTION_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + motion_triangle_intersect( + kg, isect, P, dir, ray->time, visibility, object, prim_addr); + } + break; + } #endif - } - } + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VOLUME) { + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t); # endif - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect->t); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; + + node_addr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ + /* Instance pop. */ # if BVH_FEATURE(BVH_MOTION) - isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); + isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm); # else - isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); + isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t); # endif - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect->t); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect->t); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return (isect->prim != PRIM_NONE); + return (isect->prim != PRIM_NONE); } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h index 63d79b6fe34..eddc48c487e 100644 --- a/intern/cycles/kernel/bvh/qbvh_volume_all.h +++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h @@ -34,405 +34,411 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, const uint max_hits, const uint visibility) { - /* TODO(sergey): - * - Test if pushing distance on the stack helps. - * - Likely and unlikely for if() statements. - * - Test restrict attribute for pointers. - */ - - /* Traversal stack in CUDA thread-local memory. */ - QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; - traversal_stack[0].addr = ENTRYPOINT_SENTINEL; - - /* Traversal variables in registers. */ - int stack_ptr = 0; - int node_addr = kernel_data.bvh.root; - - /* Ray parameters in registers. */ - const float tmax = ray->t; - float3 P = ray->P; - float3 dir = bvh_clamp_direction(ray->D); - float3 idir = bvh_inverse_direction(dir); - int object = OBJECT_NONE; - float isect_t = tmax; + /* TODO(sergey): + * - Test if pushing distance on the stack helps. + * - Likely and unlikely for if() statements. + * - Test restrict attribute for pointers. + */ + + /* Traversal stack in CUDA thread-local memory. */ + QBVHStackItem traversal_stack[BVH_QSTACK_SIZE]; + traversal_stack[0].addr = ENTRYPOINT_SENTINEL; + + /* Traversal variables in registers. */ + int stack_ptr = 0; + int node_addr = kernel_data.bvh.root; + + /* Ray parameters in registers. */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; #if BVH_FEATURE(BVH_MOTION) - Transform ob_itfm; + Transform ob_itfm; #endif - uint num_hits = 0; - isect_array->t = tmax; + uint num_hits = 0; + isect_array->t = tmax; #if BVH_FEATURE(BVH_INSTANCING) - int num_hits_in_instance = 0; + int num_hits_in_instance = 0; #endif - ssef tnear(0.0f), tfar(isect_t); + ssef tnear(0.0f), tfar(isect_t); #if BVH_FEATURE(BVH_HAIR) - sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z)); #endif - sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z)); #ifdef __KERNEL_AVX2__ - float3 P_idir = P*idir; - sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); + float3 P_idir = P * idir; + sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z); #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); + sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z)); #endif - /* Offsets to select the side that becomes the lower or upper bound. */ - int near_x, near_y, near_z; - int far_x, far_y, far_z; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); + /* Offsets to select the side that becomes the lower or upper bound. */ + int near_x, near_y, near_z; + int far_x, far_y, far_z; + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); - /* Traversal loop. */ - do { - do { - /* Traverse internal nodes. */ - while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { - float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0); + /* Traversal loop. */ + do { + do { + /* Traverse internal nodes. */ + while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) { + float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); #ifdef __VISIBILITY_FLAG__ - if((__float_as_uint(inodes.x) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } + if ((__float_as_uint(inodes.x) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } #endif - ssef dist; - int child_mask = NODE_INTERSECT(kg, - tnear, - tfar, + ssef dist; + int child_mask = NODE_INTERSECT(kg, + tnear, + tfar, #ifdef __KERNEL_AVX2__ - P_idir4, + P_idir4, #endif #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4, + org4, #endif #if BVH_FEATURE(BVH_HAIR) - dir4, + dir4, #endif - idir4, - near_x, near_y, near_z, - far_x, far_y, far_z, - node_addr, - &dist); - - if(child_mask != 0) { - float4 cnodes; + idir4, + near_x, + near_y, + near_z, + far_x, + far_y, + far_z, + node_addr, + &dist); + + if (child_mask != 0) { + float4 cnodes; #if BVH_FEATURE(BVH_HAIR) - if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13); - } - else + if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13); + } + else #endif - { - cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7); - } - - /* One child is hit, continue with that child. */ - int r = __bscf(child_mask); - if(child_mask == 0) { - node_addr = __float_as_int(cnodes[r]); - continue; - } - - /* Two children are hit, push far child, and continue with - * closer child. - */ - int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; - r = __bscf(child_mask); - int c1 = __float_as_int(cnodes[r]); - float d1 = ((float*)&dist)[r]; - if(child_mask == 0) { - if(d1 < d0) { - node_addr = c1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - continue; - } - else { - node_addr = c0; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - continue; - } - } - - /* Here starts the slow path for 3 or 4 hit children. We push - * all nodes onto the stack to sort them there. - */ - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c1; - traversal_stack[stack_ptr].dist = d1; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c0; - traversal_stack[stack_ptr].dist = d0; - - /* Three children are hit, push all onto stack and sort 3 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c2 = __float_as_int(cnodes[r]); - float d2 = ((float*)&dist)[r]; - if(child_mask == 0) { - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2]); - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - /* Four children are hit, push all onto stack and sort 4 - * stack items, continue with closest child. - */ - r = __bscf(child_mask); - int c3 = __float_as_int(cnodes[r]); - float d3 = ((float*)&dist)[r]; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c3; - traversal_stack[stack_ptr].dist = d3; - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = c2; - traversal_stack[stack_ptr].dist = d2; - qbvh_stack_sort(&traversal_stack[stack_ptr], - &traversal_stack[stack_ptr - 1], - &traversal_stack[stack_ptr - 2], - &traversal_stack[stack_ptr - 3]); - } - - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - - /* If node is leaf, fetch triangle list. */ - if(node_addr < 0) { - float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1)); - - if((__float_as_uint(leaf.z) & visibility) == 0) { - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - continue; - } - - int prim_addr = __float_as_int(leaf.x); + { + cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7); + } + + /* One child is hit, continue with that child. */ + int r = __bscf(child_mask); + if (child_mask == 0) { + node_addr = __float_as_int(cnodes[r]); + continue; + } + + /* Two children are hit, push far child, and continue with + * closer child. + */ + int c0 = __float_as_int(cnodes[r]); + float d0 = ((float *)&dist)[r]; + r = __bscf(child_mask); + int c1 = __float_as_int(cnodes[r]); + float d1 = ((float *)&dist)[r]; + if (child_mask == 0) { + if (d1 < d0) { + node_addr = c1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + continue; + } + else { + node_addr = c0; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + continue; + } + } + + /* Here starts the slow path for 3 or 4 hit children. We push + * all nodes onto the stack to sort them there. + */ + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c1; + traversal_stack[stack_ptr].dist = d1; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c0; + traversal_stack[stack_ptr].dist = d0; + + /* Three children are hit, push all onto stack and sort 3 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c2 = __float_as_int(cnodes[r]); + float d2 = ((float *)&dist)[r]; + if (child_mask == 0) { + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2]); + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + /* Four children are hit, push all onto stack and sort 4 + * stack items, continue with closest child. + */ + r = __bscf(child_mask); + int c3 = __float_as_int(cnodes[r]); + float d3 = ((float *)&dist)[r]; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c3; + traversal_stack[stack_ptr].dist = d3; + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = c2; + traversal_stack[stack_ptr].dist = d2; + qbvh_stack_sort(&traversal_stack[stack_ptr], + &traversal_stack[stack_ptr - 1], + &traversal_stack[stack_ptr - 2], + &traversal_stack[stack_ptr - 3]); + } + + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + + /* If node is leaf, fetch triangle list. */ + if (node_addr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1)); + + if ((__float_as_uint(leaf.z) & visibility) == 0) { + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + continue; + } + + int prim_addr = __float_as_int(leaf.x); #if BVH_FEATURE(BVH_INSTANCING) - if(prim_addr >= 0) { + if (prim_addr >= 0) { #endif - int prim_addr2 = __float_as_int(leaf.y); - const uint type = __float_as_int(leaf.w); - const uint p_type = type & PRIMITIVE_ALL; - bool hit; - - /* Pop. */ - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - - /* Primitive intersection. */ - switch(p_type) { - case PRIMITIVE_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; + int prim_addr2 = __float_as_int(leaf.y); + const uint type = __float_as_int(leaf.w); + const uint p_type = type & PRIMITIVE_ALL; + bool hit; + + /* Pop. */ + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + + /* Primitive intersection. */ + switch (p_type) { + case PRIMITIVE_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr); + if (hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; #if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; #endif - isect_array->t = isect_t; - if(num_hits == max_hits) { + isect_array->t = isect_t; + if (num_hits == max_hits) { #if BVH_FEATURE(BVH_INSTANCING) - if(object != OBJECT_NONE) { + if (object != OBJECT_NONE) { # if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); # else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); + Transform itfm = object_fetch_transform( + kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); # endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } -#endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } +#endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } #if BVH_FEATURE(BVH_MOTION) - case PRIMITIVE_MOTION_TRIANGLE: { - for(; prim_addr < prim_addr2; prim_addr++) { - kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); - /* Only primitives from volume object. */ - uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; - int object_flag = kernel_tex_fetch(__object_flag, tri_object); - if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { - continue; - } - /* Intersect ray against primitive. */ - hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); - if(hit) { - /* Move on to next entry in intersections array. */ - isect_array++; - num_hits++; + case PRIMITIVE_MOTION_TRIANGLE: { + for (; prim_addr < prim_addr2; prim_addr++) { + kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type); + /* Only primitives from volume object. */ + uint tri_object = (object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, prim_addr) : + object; + int object_flag = kernel_tex_fetch(__object_flag, tri_object); + if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) { + continue; + } + /* Intersect ray against primitive. */ + hit = motion_triangle_intersect( + kg, isect_array, P, dir, ray->time, visibility, object, prim_addr); + if (hit) { + /* Move on to next entry in intersections array. */ + isect_array++; + num_hits++; # if BVH_FEATURE(BVH_INSTANCING) - num_hits_in_instance++; + num_hits_in_instance++; # endif - isect_array->t = isect_t; - if(num_hits == max_hits) { + isect_array->t = isect_t; + if (num_hits == max_hits) { # if BVH_FEATURE(BVH_INSTANCING) - if(object != OBJECT_NONE) { + if (object != OBJECT_NONE) { # if BVH_FEATURE(BVH_MOTION) - float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); + float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir)); # else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - float t_fac = 1.0f / len(transform_direction(&itfm, dir)); + Transform itfm = object_fetch_transform( + kg, object, OBJECT_INVERSE_TRANSFORM); + float t_fac = 1.0f / len(transform_direction(&itfm, dir)); # endif - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } -# endif /* BVH_FEATURE(BVH_INSTANCING) */ - return num_hits; - } - } - } - break; - } + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } +# endif /* BVH_FEATURE(BVH_INSTANCING) */ + return num_hits; + } + } + } + break; + } #endif - } - } + } + } #if BVH_FEATURE(BVH_INSTANCING) - else { - /* Instance push. */ - object = kernel_tex_fetch(__prim_object, -prim_addr-1); - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VOLUME) { + else { + /* Instance push. */ + object = kernel_tex_fetch(__prim_object, -prim_addr - 1); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME) { # if BVH_FEATURE(BVH_MOTION) - isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); + isect_t = bvh_instance_motion_push( + kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm); # else - isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); + isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t); # endif - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect_t); - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect_t); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - num_hits_in_instance = 0; - isect_array->t = isect_t; - - ++stack_ptr; - kernel_assert(stack_ptr < BVH_QSTACK_SIZE); - traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; - - node_addr = kernel_tex_fetch(__object_node, object); - } - else { - /* Pop. */ - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } - } - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + num_hits_in_instance = 0; + isect_array->t = isect_t; + + ++stack_ptr; + kernel_assert(stack_ptr < BVH_QSTACK_SIZE); + traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL; + + node_addr = kernel_tex_fetch(__object_node, object); + } + else { + /* Pop. */ + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } + } + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); #if BVH_FEATURE(BVH_INSTANCING) - if(stack_ptr >= 0) { - kernel_assert(object != OBJECT_NONE); + if (stack_ptr >= 0) { + kernel_assert(object != OBJECT_NONE); - /* Instance pop. */ - if(num_hits_in_instance) { - float t_fac; + /* Instance pop. */ + if (num_hits_in_instance) { + float t_fac; # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm); # else - bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); # endif - /* Scale isect->t to adjust for instancing. */ - for(int i = 0; i < num_hits_in_instance; i++) { - (isect_array-i-1)->t *= t_fac; - } - } - else { + /* Scale isect->t to adjust for instancing. */ + for (int i = 0; i < num_hits_in_instance; i++) { + (isect_array - i - 1)->t *= t_fac; + } + } + else { # if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm); # else - bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX); # endif - } + } - isect_t = tmax; - isect_array->t = isect_t; + isect_t = tmax; + isect_array->t = isect_t; - qbvh_near_far_idx_calc(idir, - &near_x, &near_y, &near_z, - &far_x, &far_y, &far_z); - tfar = ssef(isect_t); + qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z); + tfar = ssef(isect_t); # if BVH_FEATURE(BVH_HAIR) - dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); + dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z)); # endif - idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); + idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z)); # ifdef __KERNEL_AVX2__ - P_idir = P*idir; - P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); + P_idir = P * idir; + P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z); # endif # if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__) - org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); + org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z)); # endif - object = OBJECT_NONE; - node_addr = traversal_stack[stack_ptr].addr; - --stack_ptr; - } -#endif /* FEATURE(BVH_INSTANCING) */ - } while(node_addr != ENTRYPOINT_SENTINEL); + object = OBJECT_NONE; + node_addr = traversal_stack[stack_ptr].addr; + --stack_ptr; + } +#endif /* FEATURE(BVH_INSTANCING) */ + } while (node_addr != ENTRYPOINT_SENTINEL); - return num_hits; + return num_hits; } #undef NODE_INTERSECT diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h index acccba9ecec..341d1e16eb1 100644 --- a/intern/cycles/kernel/closure/alloc.h +++ b/intern/cycles/kernel/closure/alloc.h @@ -18,69 +18,72 @@ CCL_NAMESPACE_BEGIN ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight) { - kernel_assert(size <= sizeof(ShaderClosure)); + kernel_assert(size <= sizeof(ShaderClosure)); - if(sd->num_closure_left == 0) - return NULL; + if (sd->num_closure_left == 0) + return NULL; - ShaderClosure *sc = &sd->closure[sd->num_closure]; + ShaderClosure *sc = &sd->closure[sd->num_closure]; - sc->type = type; - sc->weight = weight; + sc->type = type; + sc->weight = weight; - sd->num_closure++; - sd->num_closure_left--; + sd->num_closure++; + sd->num_closure_left--; - return sc; + return sc; } ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size) { - /* Allocate extra space for closure that need more parameters. We allocate - * in chunks of sizeof(ShaderClosure) starting from the end of the closure - * array. - * - * This lets us keep the same fast array iteration over closures, as we - * found linked list iteration and iteration with skipping to be slower. */ - int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure)); - - if(num_extra > sd->num_closure_left) { - /* Remove previous closure if it was allocated. */ - sd->num_closure--; - sd->num_closure_left++; - return NULL; - } - - sd->num_closure_left -= num_extra; - return (ccl_addr_space void*)(sd->closure + sd->num_closure + sd->num_closure_left); + /* Allocate extra space for closure that need more parameters. We allocate + * in chunks of sizeof(ShaderClosure) starting from the end of the closure + * array. + * + * This lets us keep the same fast array iteration over closures, as we + * found linked list iteration and iteration with skipping to be slower. */ + int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure)); + + if (num_extra > sd->num_closure_left) { + /* Remove previous closure if it was allocated. */ + sd->num_closure--; + sd->num_closure_left++; + return NULL; + } + + sd->num_closure_left -= num_extra; + return (ccl_addr_space void *)(sd->closure + sd->num_closure + sd->num_closure_left); } ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight) { - ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); - if(sc == NULL) - return NULL; + if (sc == NULL) + return NULL; - float sample_weight = fabsf(average(weight)); - sc->sample_weight = sample_weight; - return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; + float sample_weight = fabsf(average(weight)); + sc->sample_weight = sample_weight; + return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; } #ifdef __OSL__ -ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, float3 weight, void *data) +ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, + int size, + float3 weight, + void *data) { - ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); + ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight); - if(!sc) - return NULL; + if (!sc) + return NULL; - memcpy((void *)sc, data, size); + memcpy((void *)sc, data, size); - float sample_weight = fabsf(average(weight)); - sc->weight = weight; - sc->sample_weight = sample_weight; - return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; + float sample_weight = fabsf(average(weight)); + sc->weight = weight; + sc->sample_weight = sample_weight; + return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? sc : NULL; } #endif diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 3a9629ea9d7..5e26f90a878 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -39,38 +39,38 @@ CCL_NAMESPACE_BEGIN * 0 for singular closures and 1 otherwise. */ ccl_device_inline float bsdf_get_specular_roughness_squared(const ShaderClosure *sc) { - if(CLOSURE_IS_BSDF_SINGULAR(sc->type)) { - return 0.0f; - } + if (CLOSURE_IS_BSDF_SINGULAR(sc->type)) { + return 0.0f; + } - if(CLOSURE_IS_BSDF_MICROFACET(sc->type)) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; - return bsdf->alpha_x*bsdf->alpha_y; - } + if (CLOSURE_IS_BSDF_MICROFACET(sc->type)) { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; + return bsdf->alpha_x * bsdf->alpha_y; + } - return 1.0f; + return 1.0f; } ccl_device_inline float bsdf_get_roughness_squared(const ShaderClosure *sc) { - /* This version includes diffuse, mainly for baking Principled BSDF - * where specular and metallic zero otherwise does not bake the - * specified roughness parameter. */ - if(sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) { - OrenNayarBsdf *bsdf = (OrenNayarBsdf*)sc; - return sqr(sqr(bsdf->roughness)); - } + /* This version includes diffuse, mainly for baking Principled BSDF + * where specular and metallic zero otherwise does not bake the + * specified roughness parameter. */ + if (sc->type == CLOSURE_BSDF_OREN_NAYAR_ID) { + OrenNayarBsdf *bsdf = (OrenNayarBsdf *)sc; + return sqr(sqr(bsdf->roughness)); + } - if(sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) { - PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)sc; - return sqr(sqr(bsdf->roughness)); - } + if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) { + PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)sc; + return sqr(sqr(bsdf->roughness)); + } - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { - return 0.0f; - } + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { + return 0.0f; + } - return bsdf_get_specular_roughness_squared(sc); + return bsdf_get_specular_roughness_squared(sc); } ccl_device_inline int bsdf_sample(KernelGlobals *kg, @@ -83,133 +83,349 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, differential3 *domega_in, float *pdf) { - int label; + int label; - switch(sc->type) { - case CLOSURE_BSDF_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_ID: - label = bsdf_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; + switch (sc->type) { + case CLOSURE_BSDF_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_ID: + label = bsdf_diffuse_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; #ifdef __SVM__ - case CLOSURE_BSDF_OREN_NAYAR_ID: - label = bsdf_oren_nayar_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; -#ifdef __OSL__ - case CLOSURE_BSDF_PHONG_RAMP_ID: - label = bsdf_phong_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - label = bsdf_diffuse_ramp_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; -#endif - case CLOSURE_BSDF_TRANSLUCENT_ID: - label = bsdf_translucent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_REFLECTION_ID: - label = bsdf_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_REFRACTION_ID: - label = bsdf_refraction_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_TRANSPARENT_ID: - label = bsdf_transparent_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_MICROFACET_GGX_ID: - case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - label = bsdf_microfacet_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: - label = bsdf_microfacet_multi_ggx_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state); - break; - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: - label = bsdf_microfacet_multi_ggx_glass_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf, &sd->lcg_state); - break; - case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - label = bsdf_microfacet_beckmann_sample(kg, sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - label = bsdf_ashikhmin_shirley_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - label = bsdf_ashikhmin_velvet_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_DIFFUSE_TOON_ID: - label = bsdf_diffuse_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_GLOSSY_TOON_ID: - label = bsdf_glossy_toon_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_HAIR_REFLECTION_ID: - label = bsdf_hair_reflection_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: - label = bsdf_principled_hair_sample(kg, sc, sd, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; -#ifdef __PRINCIPLED__ - case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: - label = bsdf_principled_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: - label = bsdf_principled_sheen_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, - eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; -#endif /* __PRINCIPLED__ */ + case CLOSURE_BSDF_OREN_NAYAR_ID: + label = bsdf_oren_nayar_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; +# ifdef __OSL__ + case CLOSURE_BSDF_PHONG_RAMP_ID: + label = bsdf_phong_ramp_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_DIFFUSE_RAMP_ID: + label = bsdf_diffuse_ramp_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; +# endif + case CLOSURE_BSDF_TRANSLUCENT_ID: + label = bsdf_translucent_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_REFLECTION_ID: + label = bsdf_reflection_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_REFRACTION_ID: + label = bsdf_refraction_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_TRANSPARENT_ID: + label = bsdf_transparent_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_MICROFACET_GGX_ID: + case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: + label = bsdf_microfacet_ggx_sample(kg, + sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: + label = bsdf_microfacet_multi_ggx_sample(kg, + sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf, + &sd->lcg_state); + break; + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: + label = bsdf_microfacet_multi_ggx_glass_sample(kg, + sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf, + &sd->lcg_state); + break; + case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: + label = bsdf_microfacet_beckmann_sample(kg, + sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: + label = bsdf_ashikhmin_shirley_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: + label = bsdf_ashikhmin_velvet_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_DIFFUSE_TOON_ID: + label = bsdf_diffuse_toon_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_GLOSSY_TOON_ID: + label = bsdf_glossy_toon_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_HAIR_REFLECTION_ID: + label = bsdf_hair_reflection_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: + label = bsdf_hair_transmission_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: + label = bsdf_principled_hair_sample( + kg, sc, sd, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); + break; +# ifdef __PRINCIPLED__ + case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: + label = bsdf_principled_diffuse_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: + label = bsdf_principled_sheen_sample(sc, + sd->Ng, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; +# endif /* __PRINCIPLED__ */ #endif #ifdef __VOLUME__ - case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; + case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: + label = volume_henyey_greenstein_sample(sc, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; #endif - default: - label = LABEL_NONE; - break; - } + default: + label = LABEL_NONE; + break; + } - /* Test if BSDF sample should be treated as transparent for background. */ - if(label & LABEL_TRANSMIT) { - float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold; + /* Test if BSDF sample should be treated as transparent for background. */ + if (label & LABEL_TRANSMIT) { + float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold; - if(threshold_squared >= 0.0f) { - if(bsdf_get_specular_roughness_squared(sc) <= threshold_squared) { - label |= LABEL_TRANSMIT_TRANSPARENT; - } - } - } + if (threshold_squared >= 0.0f) { + if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) { + label |= LABEL_TRANSMIT_TRANSPARENT; + } + } + } - return label; + return label; } #ifndef __KERNEL_CUDA__ @@ -217,285 +433,288 @@ ccl_device #else ccl_device_inline #endif -float3 bsdf_eval(KernelGlobals *kg, - ShaderData *sd, - const ShaderClosure *sc, - const float3 omega_in, - float *pdf) + float3 + bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + const float3 omega_in, + float *pdf) { - float3 eval; + float3 eval; - if(dot(sd->Ng, omega_in) >= 0.0f) { - switch(sc->type) { - case CLOSURE_BSDF_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_ID: - eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); - break; + if (dot(sd->Ng, omega_in) >= 0.0f) { + switch (sc->type) { + case CLOSURE_BSDF_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_ID: + eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); + break; #ifdef __SVM__ - case CLOSURE_BSDF_OREN_NAYAR_ID: - eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf); - break; -#ifdef __OSL__ - case CLOSURE_BSDF_PHONG_RAMP_ID: - eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_DIFFUSE_RAMP_ID: - eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf); - break; -#endif - case CLOSURE_BSDF_TRANSLUCENT_ID: - eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_REFLECTION_ID: - eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_REFRACTION_ID: - eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_TRANSPARENT_ID: - eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_MICROFACET_GGX_ID: - case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: - eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state); - break; - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: - eval = bsdf_microfacet_multi_ggx_glass_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state); - break; - case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_DIFFUSE_TOON_ID: - eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_GLOSSY_TOON_ID: - eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: - eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf); - break; - case CLOSURE_BSDF_HAIR_REFLECTION_ID: - eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf); - break; -#ifdef __PRINCIPLED__ - case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: - eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: - eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf); - break; -#endif /* __PRINCIPLED__ */ + case CLOSURE_BSDF_OREN_NAYAR_ID: + eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf); + break; +# ifdef __OSL__ + case CLOSURE_BSDF_PHONG_RAMP_ID: + eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_DIFFUSE_RAMP_ID: + eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf); + break; +# endif + case CLOSURE_BSDF_TRANSLUCENT_ID: + eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_REFLECTION_ID: + eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_REFRACTION_ID: + eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_TRANSPARENT_ID: + eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_MICROFACET_GGX_ID: + case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: + eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: + eval = bsdf_microfacet_multi_ggx_eval_reflect(sc, sd->I, omega_in, pdf, &sd->lcg_state); + break; + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: + eval = bsdf_microfacet_multi_ggx_glass_eval_reflect( + sc, sd->I, omega_in, pdf, &sd->lcg_state); + break; + case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: + eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: + eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: + eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_DIFFUSE_TOON_ID: + eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_GLOSSY_TOON_ID: + eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: + eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf); + break; + case CLOSURE_BSDF_HAIR_REFLECTION_ID: + eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: + eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf); + break; +# ifdef __PRINCIPLED__ + case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: + eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: + eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf); + break; +# endif /* __PRINCIPLED__ */ #endif #ifdef __VOLUME__ - case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); - break; + case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: + eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); + break; #endif - default: - eval = make_float3(0.0f, 0.0f, 0.0f); - break; - } - } - else { - switch(sc->type) { - case CLOSURE_BSDF_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_ID: - eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); - break; + default: + eval = make_float3(0.0f, 0.0f, 0.0f); + break; + } + } + else { + switch (sc->type) { + case CLOSURE_BSDF_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_ID: + eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); + break; #ifdef __SVM__ - case CLOSURE_BSDF_OREN_NAYAR_ID: - eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_TRANSLUCENT_ID: - eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_REFLECTION_ID: - eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_REFRACTION_ID: - eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_TRANSPARENT_ID: - eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_MICROFACET_GGX_ID: - case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: - eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state); - break; - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: - eval = bsdf_microfacet_multi_ggx_glass_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state); - break; - case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_DIFFUSE_TOON_ID: - eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_GLOSSY_TOON_ID: - eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: - eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf); - break; - case CLOSURE_BSDF_HAIR_REFLECTION_ID: - eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf); - break; -#ifdef __PRINCIPLED__ - case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: - eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); - break; - case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: - eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf); - break; -#endif /* __PRINCIPLED__ */ + case CLOSURE_BSDF_OREN_NAYAR_ID: + eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_TRANSLUCENT_ID: + eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_REFLECTION_ID: + eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_REFRACTION_ID: + eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_TRANSPARENT_ID: + eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_MICROFACET_GGX_ID: + case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: + eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: + eval = bsdf_microfacet_multi_ggx_eval_transmit(sc, sd->I, omega_in, pdf, &sd->lcg_state); + break; + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: + eval = bsdf_microfacet_multi_ggx_glass_eval_transmit( + sc, sd->I, omega_in, pdf, &sd->lcg_state); + break; + case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: + eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: + eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: + eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_DIFFUSE_TOON_ID: + eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_GLOSSY_TOON_ID: + eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: + eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf); + break; + case CLOSURE_BSDF_HAIR_REFLECTION_ID: + eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: + eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf); + break; +# ifdef __PRINCIPLED__ + case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: + eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf); + break; + case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID: + eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf); + break; +# endif /* __PRINCIPLED__ */ #endif #ifdef __VOLUME__ - case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); - break; + case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: + eval = volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); + break; #endif - default: - eval = make_float3(0.0f, 0.0f, 0.0f); - break; - } - } + default: + eval = make_float3(0.0f, 0.0f, 0.0f); + break; + } + } - return eval; + return eval; } ccl_device void bsdf_blur(KernelGlobals *kg, ShaderClosure *sc, float roughness) { - /* ToDo: do we want to blur volume closures? */ + /* ToDo: do we want to blur volume closures? */ #ifdef __SVM__ - switch(sc->type) { - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: - bsdf_microfacet_multi_ggx_blur(sc, roughness); - break; - case CLOSURE_BSDF_MICROFACET_GGX_ID: - case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - bsdf_microfacet_ggx_blur(sc, roughness); - break; - case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - bsdf_microfacet_beckmann_blur(sc, roughness); - break; - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - bsdf_ashikhmin_shirley_blur(sc, roughness); - break; - case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: - bsdf_principled_hair_blur(sc, roughness); - break; - default: - break; - } + switch (sc->type) { + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: + bsdf_microfacet_multi_ggx_blur(sc, roughness); + break; + case CLOSURE_BSDF_MICROFACET_GGX_ID: + case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: + bsdf_microfacet_ggx_blur(sc, roughness); + break; + case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: + bsdf_microfacet_beckmann_blur(sc, roughness); + break; + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: + bsdf_ashikhmin_shirley_blur(sc, roughness); + break; + case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: + bsdf_principled_hair_blur(sc, roughness); + break; + default: + break; + } #endif } ccl_device bool bsdf_merge(ShaderClosure *a, ShaderClosure *b) { #ifdef __SVM__ - switch(a->type) { - case CLOSURE_BSDF_TRANSPARENT_ID: - return true; - case CLOSURE_BSDF_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_ID: - case CLOSURE_BSDF_TRANSLUCENT_ID: - return bsdf_diffuse_merge(a, b); - case CLOSURE_BSDF_OREN_NAYAR_ID: - return bsdf_oren_nayar_merge(a, b); - case CLOSURE_BSDF_REFLECTION_ID: - case CLOSURE_BSDF_REFRACTION_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ID: - case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: - return bsdf_microfacet_merge(a, b); - case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: - return bsdf_ashikhmin_velvet_merge(a, b); - case CLOSURE_BSDF_DIFFUSE_TOON_ID: - case CLOSURE_BSDF_GLOSSY_TOON_ID: - return bsdf_toon_merge(a, b); - case CLOSURE_BSDF_HAIR_REFLECTION_ID: - case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: - return bsdf_hair_merge(a, b); -#ifdef __PRINCIPLED__ - case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: - case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: - return bsdf_principled_diffuse_merge(a, b); -#endif -#ifdef __VOLUME__ - case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - return volume_henyey_greenstein_merge(a, b); -#endif - default: - return false; - } + switch (a->type) { + case CLOSURE_BSDF_TRANSPARENT_ID: + return true; + case CLOSURE_BSDF_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_ID: + case CLOSURE_BSDF_TRANSLUCENT_ID: + return bsdf_diffuse_merge(a, b); + case CLOSURE_BSDF_OREN_NAYAR_ID: + return bsdf_oren_nayar_merge(a, b); + case CLOSURE_BSDF_REFLECTION_ID: + case CLOSURE_BSDF_REFRACTION_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ID: + case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: + return bsdf_microfacet_merge(a, b); + case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: + return bsdf_ashikhmin_velvet_merge(a, b); + case CLOSURE_BSDF_DIFFUSE_TOON_ID: + case CLOSURE_BSDF_GLOSSY_TOON_ID: + return bsdf_toon_merge(a, b); + case CLOSURE_BSDF_HAIR_REFLECTION_ID: + case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: + return bsdf_hair_merge(a, b); +# ifdef __PRINCIPLED__ + case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID: + case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID: + return bsdf_principled_diffuse_merge(a, b); +# endif +# ifdef __VOLUME__ + case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: + return volume_henyey_greenstein_merge(a, b); +# endif + default: + return false; + } #else - return false; + return false; #endif } diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index 4e7425bd800..b3b1c37748d 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -33,203 +33,226 @@ CCL_NAMESPACE_BEGIN ccl_device int bsdf_ashikhmin_shirley_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_ashikhmin_shirley_aniso_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); - bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f); + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f); - bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; - bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); - bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float roughness) { - return 2.0f / (roughness*roughness) - 2.0f; + return 2.0f / (roughness * roughness) - 2.0f; } -ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect( - const ShaderClosure *sc, - const float3 I, - const float3 omega_in, - float *pdf) +ccl_device_forceinline float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float3 N = bsdf->N; - - float NdotI = dot(N, I); /* in Cycles/OSL convention I is omega_out */ - float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;) */ - - float out = 0.0f; - - if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) - return make_float3(0.0f, 0.0f, 0.0f); - - if(NdotI > 0.0f && NdotO > 0.0f) { - NdotI = fmaxf(NdotI, 1e-6f); - NdotO = fmaxf(NdotO, 1e-6f); - float3 H = normalize(omega_in + I); - float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f); - float HdotN = fmaxf(dot(H, N), 1e-6f); - - float pump = 1.0f / fmaxf(1e-6f, (HdotI*fmaxf(NdotO, NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */ - /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */ - - float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); - float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); - - if(n_x == n_y) { - /* isotropic */ - float e = n_x; - float lobe = powf(HdotN, e); - float norm = (n_x + 1.0f) / (8.0f * M_PI_F); - - out = NdotO * norm * lobe * pump; - *pdf = norm * lobe / HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */ - } - else { - /* anisotropic */ - float3 X, Y; - make_orthonormals_tangent(N, bsdf->T, &X, &Y); - - float HdotX = dot(H, X); - float HdotY = dot(H, Y); - float lobe; - if(HdotN < 1.0f) { - float e = (n_x * HdotX*HdotX + n_y * HdotY*HdotY) / (1.0f - HdotN*HdotN); - lobe = powf(HdotN, e); - } - else { - lobe = 1.0f; - } - float norm = sqrtf((n_x + 1.0f)*(n_y + 1.0f)) / (8.0f * M_PI_F); - - out = NdotO * norm * lobe * pump; - *pdf = norm * lobe / HdotI; - } - } - - return make_float3(out, out, out); + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float3 N = bsdf->N; + + float NdotI = dot(N, I); /* in Cycles/OSL convention I is omega_out */ + float NdotO = dot(N, omega_in); /* and consequently we use for O omaga_in ;) */ + + float out = 0.0f; + + if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) + return make_float3(0.0f, 0.0f, 0.0f); + + if (NdotI > 0.0f && NdotO > 0.0f) { + NdotI = fmaxf(NdotI, 1e-6f); + NdotO = fmaxf(NdotO, 1e-6f); + float3 H = normalize(omega_in + I); + float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f); + float HdotN = fmaxf(dot(H, N), 1e-6f); + + float pump = + 1.0f / + fmaxf( + 1e-6f, + (HdotI * + fmaxf( + NdotO, + NdotI))); /* pump from original paper (first derivative disc., but cancels the HdotI in the pdf nicely) */ + /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */ /* pump from d-brdf paper */ + + float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); + float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); + + if (n_x == n_y) { + /* isotropic */ + float e = n_x; + float lobe = powf(HdotN, e); + float norm = (n_x + 1.0f) / (8.0f * M_PI_F); + + out = NdotO * norm * lobe * pump; + *pdf = + norm * lobe / + HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */ + } + else { + /* anisotropic */ + float3 X, Y; + make_orthonormals_tangent(N, bsdf->T, &X, &Y); + + float HdotX = dot(H, X); + float HdotY = dot(H, Y); + float lobe; + if (HdotN < 1.0f) { + float e = (n_x * HdotX * HdotX + n_y * HdotY * HdotY) / (1.0f - HdotN * HdotN); + lobe = powf(HdotN, e); + } + else { + lobe = 1.0f; + } + float norm = sqrtf((n_x + 1.0f) * (n_y + 1.0f)) / (8.0f * M_PI_F); + + out = NdotO * norm * lobe * pump; + *pdf = norm * lobe / HdotI; + } + } + + return make_float3(out, out, out); } -ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta) +ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant( + float n_x, float n_y, float randu, float randv, float *phi, float *cos_theta) { - *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu)); - float cos_phi = cosf(*phi); - float sin_phi = sinf(*phi); - *cos_theta = powf(randv, 1.0f / (n_x * cos_phi*cos_phi + n_y * sin_phi*sin_phi + 1.0f)); + *phi = atanf(sqrtf((n_x + 1.0f) / (n_y + 1.0f)) * tanf(M_PI_2_F * randu)); + float cos_phi = cosf(*phi); + float sin_phi = sinf(*phi); + *cos_theta = powf(randv, 1.0f / (n_x * cos_phi * cos_phi + n_y * sin_phi * sin_phi + 1.0f)); } -ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float3 N = bsdf->N; - int label = LABEL_REFLECT | LABEL_GLOSSY; - - float NdotI = dot(N, I); - if(NdotI > 0.0f) { - - float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); - float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); - - /* get x,y basis on the surface for anisotropy */ - float3 X, Y; - - if(n_x == n_y) - make_orthonormals(N, &X, &Y); - else - make_orthonormals_tangent(N, bsdf->T, &X, &Y); - - /* sample spherical coords for h in tangent space */ - float phi; - float cos_theta; - if(n_x == n_y) { - /* isotropic sampling */ - phi = M_2PI_F * randu; - cos_theta = powf(randv, 1.0f / (n_x + 1.0f)); - } - else { - /* anisotropic sampling */ - if(randu < 0.25f) { /* first quadrant */ - float remapped_randu = 4.0f * randu; - bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); - } - else if(randu < 0.5f) { /* second quadrant */ - float remapped_randu = 4.0f * (.5f - randu); - bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); - phi = M_PI_F - phi; - } - else if(randu < 0.75f) { /* third quadrant */ - float remapped_randu = 4.0f * (randu - 0.5f); - bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); - phi = M_PI_F + phi; - } - else { /* fourth quadrant */ - float remapped_randu = 4.0f * (1.0f - randu); - bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); - phi = 2.0f * M_PI_F - phi; - } - } - - /* get half vector in tangent space */ - float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta*cos_theta)); - float cos_phi = cosf(phi); - float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */ - float3 h = make_float3( - sin_theta * cos_phi, - sin_theta * sin_phi, - cos_theta - ); - - /* half vector to world space */ - float3 H = h.x*X + h.y*Y + h.z*N; - float HdotI = dot(H, I); - if(HdotI < 0.0f) H = -H; - - /* reflect I on H to get omega_in */ - *omega_in = -I + (2.0f * HdotI) * H; - - if(fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) { - /* Some high number for MIS. */ - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - label = LABEL_REFLECT | LABEL_SINGULAR; - } - else { - /* leave the rest to eval_reflect */ - *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf); - } + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float3 N = bsdf->N; + int label = LABEL_REFLECT | LABEL_GLOSSY; + + float NdotI = dot(N, I); + if (NdotI > 0.0f) { + + float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x); + float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y); + + /* get x,y basis on the surface for anisotropy */ + float3 X, Y; + + if (n_x == n_y) + make_orthonormals(N, &X, &Y); + else + make_orthonormals_tangent(N, bsdf->T, &X, &Y); + + /* sample spherical coords for h in tangent space */ + float phi; + float cos_theta; + if (n_x == n_y) { + /* isotropic sampling */ + phi = M_2PI_F * randu; + cos_theta = powf(randv, 1.0f / (n_x + 1.0f)); + } + else { + /* anisotropic sampling */ + if (randu < 0.25f) { /* first quadrant */ + float remapped_randu = 4.0f * randu; + bsdf_ashikhmin_shirley_sample_first_quadrant( + n_x, n_y, remapped_randu, randv, &phi, &cos_theta); + } + else if (randu < 0.5f) { /* second quadrant */ + float remapped_randu = 4.0f * (.5f - randu); + bsdf_ashikhmin_shirley_sample_first_quadrant( + n_x, n_y, remapped_randu, randv, &phi, &cos_theta); + phi = M_PI_F - phi; + } + else if (randu < 0.75f) { /* third quadrant */ + float remapped_randu = 4.0f * (randu - 0.5f); + bsdf_ashikhmin_shirley_sample_first_quadrant( + n_x, n_y, remapped_randu, randv, &phi, &cos_theta); + phi = M_PI_F + phi; + } + else { /* fourth quadrant */ + float remapped_randu = 4.0f * (1.0f - randu); + bsdf_ashikhmin_shirley_sample_first_quadrant( + n_x, n_y, remapped_randu, randv, &phi, &cos_theta); + phi = 2.0f * M_PI_F - phi; + } + } + + /* get half vector in tangent space */ + float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta * cos_theta)); + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */ + float3 h = make_float3(sin_theta * cos_phi, sin_theta * sin_phi, cos_theta); + + /* half vector to world space */ + float3 H = h.x * X + h.y * Y + h.z * N; + float HdotI = dot(H, I); + if (HdotI < 0.0f) + H = -H; + + /* reflect I on H to get omega_in */ + *omega_in = -I + (2.0f * HdotI) * H; + + if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) { + /* Some high number for MIS. */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + label = LABEL_REFLECT | LABEL_SINGULAR; + } + else { + /* leave the rest to eval_reflect */ + *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf); + } #ifdef __RAY_DIFFERENTIALS__ - /* just do the reflection thing for now */ - *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; + /* just do the reflection thing for now */ + *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy; #endif - } + } - return label; + return label; } - CCL_NAMESPACE_END -#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */ +#endif /* __BSDF_ASHIKHMIN_SHIRLEY_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h index 80fd9ba2b37..8122bcc1424 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h @@ -36,126 +36,142 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct VelvetBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float sigma; - float invsigma2; + float sigma; + float invsigma2; } VelvetBsdf; ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf) { - float sigma = fmaxf(bsdf->sigma, 0.01f); - bsdf->invsigma2 = 1.0f/(sigma * sigma); + float sigma = fmaxf(bsdf->sigma, 0.01f); + bsdf->invsigma2 = 1.0f / (sigma * sigma); - bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID; + bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device bool bsdf_ashikhmin_velvet_merge(const ShaderClosure *a, const ShaderClosure *b) { - const VelvetBsdf *bsdf_a = (const VelvetBsdf*)a; - const VelvetBsdf *bsdf_b = (const VelvetBsdf*)b; + const VelvetBsdf *bsdf_a = (const VelvetBsdf *)a; + const VelvetBsdf *bsdf_b = (const VelvetBsdf *)b; - return (isequal_float3(bsdf_a->N, bsdf_b->N)) && - (bsdf_a->sigma == bsdf_b->sigma); + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->sigma == bsdf_b->sigma); } -ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const VelvetBsdf *bsdf = (const VelvetBsdf*)sc; - float m_invsigma2 = bsdf->invsigma2; - float3 N = bsdf->N; + const VelvetBsdf *bsdf = (const VelvetBsdf *)sc; + float m_invsigma2 = bsdf->invsigma2; + float3 N = bsdf->N; - float cosNO = dot(N, I); - float cosNI = dot(N, omega_in); - if(cosNO > 0 && cosNI > 0) { - float3 H = normalize(omega_in + I); + float cosNO = dot(N, I); + float cosNI = dot(N, omega_in); + if (cosNO > 0 && cosNI > 0) { + float3 H = normalize(omega_in + I); - float cosNH = dot(N, H); - float cosHO = fabsf(dot(I, H)); + float cosNH = dot(N, H); + float cosHO = fabsf(dot(I, H)); - if(!(fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f)) - return make_float3(0.0f, 0.0f, 0.0f); + if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f)) + return make_float3(0.0f, 0.0f, 0.0f); - float cosNHdivHO = cosNH / cosHO; - cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f); + float cosNHdivHO = cosNH / cosHO; + cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f); - float fac1 = 2 * fabsf(cosNHdivHO * cosNO); - float fac2 = 2 * fabsf(cosNHdivHO * cosNI); + float fac1 = 2 * fabsf(cosNHdivHO * cosNO); + float fac2 = 2 * fabsf(cosNHdivHO * cosNI); - float sinNH2 = 1 - cosNH * cosNH; - float sinNH4 = sinNH2 * sinNH2; - float cotangent2 = (cosNH * cosNH) / sinNH2; + float sinNH2 = 1 - cosNH * cosNH; + float sinNH4 = sinNH2 * sinNH2; + float cotangent2 = (cosNH * cosNH) / sinNH2; - float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4; - float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically + float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4; + float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically - float out = 0.25f * (D * G) / cosNO; + float out = 0.25f * (D * G) / cosNO; - *pdf = 0.5f * M_1_PI_F; - return make_float3(out, out, out); - } + *pdf = 0.5f * M_1_PI_F; + return make_float3(out, out, out); + } - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_ashikhmin_velvet_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const VelvetBsdf *bsdf = (const VelvetBsdf*)sc; - float m_invsigma2 = bsdf->invsigma2; - float3 N = bsdf->N; + const VelvetBsdf *bsdf = (const VelvetBsdf *)sc; + float m_invsigma2 = bsdf->invsigma2; + float3 N = bsdf->N; - // we are viewing the surface from above - send a ray out with uniform - // distribution over the hemisphere - sample_uniform_hemisphere(N, randu, randv, omega_in, pdf); + // we are viewing the surface from above - send a ray out with uniform + // distribution over the hemisphere + sample_uniform_hemisphere(N, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0) { - float3 H = normalize(*omega_in + I); + if (dot(Ng, *omega_in) > 0) { + float3 H = normalize(*omega_in + I); - float cosNI = dot(N, *omega_in); - float cosNO = dot(N, I); - float cosNH = dot(N, H); - float cosHO = fabsf(dot(I, H)); + float cosNI = dot(N, *omega_in); + float cosNO = dot(N, I); + float cosNH = dot(N, H); + float cosHO = fabsf(dot(I, H)); - if(fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f-1e-5f && cosHO > 1e-5f) { - float cosNHdivHO = cosNH / cosHO; - cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f); + if (fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f) { + float cosNHdivHO = cosNH / cosHO; + cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f); - float fac1 = 2 * fabsf(cosNHdivHO * cosNO); - float fac2 = 2 * fabsf(cosNHdivHO * cosNI); + float fac1 = 2 * fabsf(cosNHdivHO * cosNO); + float fac2 = 2 * fabsf(cosNHdivHO * cosNI); - float sinNH2 = 1 - cosNH * cosNH; - float sinNH4 = sinNH2 * sinNH2; - float cotangent2 = (cosNH * cosNH) / sinNH2; + float sinNH2 = 1 - cosNH * cosNH; + float sinNH4 = sinNH2 * sinNH2; + float cotangent2 = (cosNH * cosNH) / sinNH2; - float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4; - float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically + float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4; + float G = min(1.0f, min(fac1, fac2)); // TODO: derive G from D analytically - float power = 0.25f * (D * G) / cosNO; + float power = 0.25f * (D * G) / cosNO; - *eval = make_float3(power, power, power); + *eval = make_float3(power, power, power); #ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the retroreflective bounce - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; + // TODO: find a better approximation for the retroreflective bounce + *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; #endif - } - else - *pdf = 0.0f; - } - else - *pdf = 0.0f; - - return LABEL_REFLECT|LABEL_DIFFUSE; + } + else + *pdf = 0.0f; + } + else + *pdf = 0.0f; + + return LABEL_REFLECT | LABEL_DIFFUSE; } CCL_NAMESPACE_END -#endif /* __BSDF_ASHIKHMIN_VELVET_H__ */ +#endif /* __BSDF_ASHIKHMIN_VELVET_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h index 946c460a70e..76b50548455 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse.h @@ -36,107 +36,141 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct DiffuseBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; } DiffuseBsdf; /* DIFFUSE */ ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_DIFFUSE_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_DIFFUSE_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device bool bsdf_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b) { - const DiffuseBsdf *bsdf_a = (const DiffuseBsdf*)a; - const DiffuseBsdf *bsdf_b = (const DiffuseBsdf*)b; + const DiffuseBsdf *bsdf_a = (const DiffuseBsdf *)a; + const DiffuseBsdf *bsdf_b = (const DiffuseBsdf *)b; - return (isequal_float3(bsdf_a->N, bsdf_b->N)); + return (isequal_float3(bsdf_a->N, bsdf_b->N)); } -ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; - float3 N = bsdf->N; + const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc; + float3 N = bsdf->N; - float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; - *pdf = cos_pi; - return make_float3(cos_pi, cos_pi, cos_pi); + float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; + *pdf = cos_pi; + return make_float3(cos_pi, cos_pi, cos_pi); } -ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_diffuse_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; - float3 N = bsdf->N; + const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc; + float3 N = bsdf->N; - // distribution over the hemisphere - sample_cos_hemisphere(N, randu, randv, omega_in, pdf); + // distribution over the hemisphere + sample_cos_hemisphere(N, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0.0f) { - *eval = make_float3(*pdf, *pdf, *pdf); + if (dot(Ng, *omega_in) > 0.0f) { + *eval = make_float3(*pdf, *pdf, *pdf); #ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; + // TODO: find a better approximation for the diffuse bounce + *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; #endif - } - else - *pdf = 0.0f; + } + else + *pdf = 0.0f; - return LABEL_REFLECT|LABEL_DIFFUSE; + return LABEL_REFLECT | LABEL_DIFFUSE; } /* TRANSLUCENT */ ccl_device int bsdf_translucent_setup(DiffuseBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_TRANSLUCENT_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_translucent_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; - float3 N = bsdf->N; + const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc; + float3 N = bsdf->N; - float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F; - *pdf = cos_pi; - return make_float3 (cos_pi, cos_pi, cos_pi); + float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F; + *pdf = cos_pi; + return make_float3(cos_pi, cos_pi, cos_pi); } -ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; - float3 N = bsdf->N; - - // we are viewing the surface from the right side - send a ray out with cosine - // distribution over the hemisphere - sample_cos_hemisphere (-N, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) < 0) { - *eval = make_float3(*pdf, *pdf, *pdf); + const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc; + float3 N = bsdf->N; + + // we are viewing the surface from the right side - send a ray out with cosine + // distribution over the hemisphere + sample_cos_hemisphere(-N, randu, randv, omega_in, pdf); + if (dot(Ng, *omega_in) < 0) { + *eval = make_float3(*pdf, *pdf, *pdf); #ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); + // TODO: find a better approximation for the diffuse bounce + *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); + *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); #endif - } - else { - *pdf = 0; - } - return LABEL_TRANSMIT|LABEL_DIFFUSE; + } + else { + *pdf = 0; + } + return LABEL_TRANSMIT | LABEL_DIFFUSE; } CCL_NAMESPACE_END -#endif /* __BSDF_DIFFUSE_H__ */ +#endif /* __BSDF_DIFFUSE_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h index ca33a5b275c..9d13eb8d4e0 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h @@ -38,73 +38,90 @@ CCL_NAMESPACE_BEGIN #ifdef __OSL__ typedef ccl_addr_space struct DiffuseRampBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float3 *colors; + float3 *colors; } DiffuseRampBsdf; ccl_device float3 bsdf_diffuse_ramp_get_color(const float3 colors[8], float pos) { - int MAXCOLORS = 8; - - float npos = pos * (float)(MAXCOLORS - 1); - int ipos = float_to_int(npos); - if(ipos < 0) - return colors[0]; - if(ipos >= (MAXCOLORS - 1)) - return colors[MAXCOLORS - 1]; - float offset = npos - (float)ipos; - return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; + int MAXCOLORS = 8; + + float npos = pos * (float)(MAXCOLORS - 1); + int ipos = float_to_int(npos); + if (ipos < 0) + return colors[0]; + if (ipos >= (MAXCOLORS - 1)) + return colors[MAXCOLORS - 1]; + float offset = npos - (float)ipos; + return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset; } ccl_device int bsdf_diffuse_ramp_setup(DiffuseRampBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness) { } -ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_ramp_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc; - float3 N = bsdf->N; + const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc; + float3 N = bsdf->N; - float cos_pi = fmaxf(dot(N, omega_in), 0.0f); - *pdf = cos_pi * M_1_PI_F; - return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F; + float cos_pi = fmaxf(dot(N, omega_in), 0.0f); + *pdf = cos_pi * M_1_PI_F; + return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F; } -ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_ramp_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_diffuse_ramp_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf*)sc; - float3 N = bsdf->N; - - // distribution over the hemisphere - sample_cos_hemisphere(N, randu, randv, omega_in, pdf); - - if(dot(Ng, *omega_in) > 0.0f) { - *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F; -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; - *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; -#endif - } - else - *pdf = 0.0f; - - return LABEL_REFLECT|LABEL_DIFFUSE; + const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc; + float3 N = bsdf->N; + + // distribution over the hemisphere + sample_cos_hemisphere(N, randu, randv, omega_in, pdf); + + if (dot(Ng, *omega_in) > 0.0f) { + *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F; +# ifdef __RAY_DIFFERENTIALS__ + *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx; + *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy; +# endif + } + else + *pdf = 0.0f; + + return LABEL_REFLECT | LABEL_DIFFUSE; } -#endif /* __OSL__ */ +#endif /* __OSL__ */ CCL_NAMESPACE_END -#endif /* __BSDF_DIFFUSE_RAMP_H__ */ +#endif /* __BSDF_DIFFUSE_RAMP_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index e1a0cfaa3f5..6b2a9a97d30 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -36,245 +36,276 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct HairBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float3 T; - float roughness1; - float roughness2; - float offset; + float3 T; + float roughness1; + float roughness2; + float offset; } HairBsdf; ccl_device int bsdf_hair_reflection_setup(HairBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID; - bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f); - bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f); - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_HAIR_REFLECTION_ID; + bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f); + bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f); + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_hair_transmission_setup(HairBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID; - bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f); - bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f); - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID; + bsdf->roughness1 = clamp(bsdf->roughness1, 0.001f, 1.0f); + bsdf->roughness2 = clamp(bsdf->roughness2, 0.001f, 1.0f); + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device bool bsdf_hair_merge(const ShaderClosure *a, const ShaderClosure *b) { - const HairBsdf *bsdf_a = (const HairBsdf*)a; - const HairBsdf *bsdf_b = (const HairBsdf*)b; + const HairBsdf *bsdf_a = (const HairBsdf *)a; + const HairBsdf *bsdf_b = (const HairBsdf *)b; - return (isequal_float3(bsdf_a->T, bsdf_b->T)) && - (bsdf_a->roughness1 == bsdf_b->roughness1) && - (bsdf_a->roughness2 == bsdf_b->roughness2) && - (bsdf_a->offset == bsdf_b->offset); + return (isequal_float3(bsdf_a->T, bsdf_b->T)) && (bsdf_a->roughness1 == bsdf_b->roughness1) && + (bsdf_a->roughness2 == bsdf_b->roughness2) && (bsdf_a->offset == bsdf_b->offset); } -ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const HairBsdf *bsdf = (const HairBsdf*)sc; - float offset = bsdf->offset; - float3 Tg = bsdf->T; - float roughness1 = bsdf->roughness1; - float roughness2 = bsdf->roughness2; + const HairBsdf *bsdf = (const HairBsdf *)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; - float Iz = dot(Tg, I); - float3 locy = normalize(I - Tg * Iz); + float Iz = dot(Tg, I); + float3 locy = normalize(I - Tg * Iz); - float theta_r = M_PI_2_F - fast_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); - float omega_in_z = dot(Tg, omega_in); - float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); + float omega_in_z = dot(Tg, omega_in); + float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); - float theta_i = M_PI_2_F - fast_acosf(omega_in_z); - float cosphi_i = dot(omega_in_y, locy); + float theta_i = M_PI_2_F - fast_acosf(omega_in_z); + float cosphi_i = dot(omega_in_y, locy); - if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) { - *pdf = 0.0f; - return make_float3(*pdf, *pdf, *pdf); - } + if (M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) { + *pdf = 0.0f; + return make_float3(*pdf, *pdf, *pdf); + } - float roughness1_inv = 1.0f / roughness1; - float roughness2_inv = 1.0f / roughness2; - float phi_i = fast_acosf(cosphi_i) * roughness2_inv; - phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F; - float costheta_i = fast_cosf(theta_i); + float roughness1_inv = 1.0f / roughness1; + float roughness2_inv = 1.0f / roughness2; + float phi_i = fast_acosf(cosphi_i) * roughness2_inv; + phi_i = fabsf(phi_i) < M_PI_F ? phi_i : M_PI_F; + float costheta_i = fast_cosf(theta_i); - float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); - float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); - float theta_h = (theta_i + theta_r) * 0.5f; - float t = theta_h - offset; + float theta_h = (theta_i + theta_r) * 0.5f; + float t = theta_h - offset; - float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv; - float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)* costheta_i); - *pdf = phi_pdf * theta_pdf; + float phi_pdf = fast_cosf(phi_i * 0.5f) * 0.25f * roughness2_inv; + float theta_pdf = roughness1 / + (2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i); + *pdf = phi_pdf * theta_pdf; - return make_float3(*pdf, *pdf, *pdf); + return make_float3(*pdf, *pdf, *pdf); } -ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_hair_transmission_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } - -ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_hair_reflection_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const HairBsdf *bsdf = (const HairBsdf*)sc; - float offset = bsdf->offset; - float3 Tg = bsdf->T; - float roughness1 = bsdf->roughness1; - float roughness2 = bsdf->roughness2; - float Iz = dot(Tg, I); - float3 locy = normalize(I - Tg * Iz); + const HairBsdf *bsdf = (const HairBsdf *)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; + float Iz = dot(Tg, I); + float3 locy = normalize(I - Tg * Iz); - float theta_r = M_PI_2_F - fast_acosf(Iz); + float theta_r = M_PI_2_F - fast_acosf(Iz); - float omega_in_z = dot(Tg, omega_in); - float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); + float omega_in_z = dot(Tg, omega_in); + float3 omega_in_y = normalize(omega_in - Tg * omega_in_z); - float theta_i = M_PI_2_F - fast_acosf(omega_in_z); - float phi_i = fast_acosf(dot(omega_in_y, locy)); + float theta_i = M_PI_2_F - fast_acosf(omega_in_z); + float phi_i = fast_acosf(dot(omega_in_y, locy)); - if(M_PI_2_F - fabsf(theta_i) < 0.001f) { - *pdf = 0.0f; - return make_float3(*pdf, *pdf, *pdf); - } + if (M_PI_2_F - fabsf(theta_i) < 0.001f) { + *pdf = 0.0f; + return make_float3(*pdf, *pdf, *pdf); + } - float costheta_i = fast_cosf(theta_i); + float costheta_i = fast_cosf(theta_i); - float roughness1_inv = 1.0f / roughness1; - float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); - float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); - float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_TT = fast_atan2f(((M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f); + float b_TT = fast_atan2f(((-M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f); + float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); - float theta_h = (theta_i + theta_r) / 2; - float t = theta_h - offset; - float phi = fabsf(phi_i); + float theta_h = (theta_i + theta_r) / 2; + float t = theta_h - offset; + float phi = fabsf(phi_i); - float p = M_PI_F - phi; - float theta_pdf = roughness1 / (2 * (t*t + roughness1 * roughness1) * (a_TT - b_TT)*costheta_i); - float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); + float p = M_PI_F - phi; + float theta_pdf = roughness1 / + (2 * (t * t + roughness1 * roughness1) * (a_TT - b_TT) * costheta_i); + float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); - *pdf = phi_pdf * theta_pdf; - return make_float3(*pdf, *pdf, *pdf); + *pdf = phi_pdf * theta_pdf; + return make_float3(*pdf, *pdf, *pdf); } -ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const HairBsdf *bsdf = (const HairBsdf*)sc; - float offset = bsdf->offset; - float3 Tg = bsdf->T; - float roughness1 = bsdf->roughness1; - float roughness2 = bsdf->roughness2; - float Iz = dot(Tg, I); - float3 locy = normalize(I - Tg * Iz); - float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - fast_acosf(Iz); + const HairBsdf *bsdf = (const HairBsdf *)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; + float Iz = dot(Tg, I); + float3 locy = normalize(I - Tg * Iz); + float3 locx = cross(locy, Tg); + float theta_r = M_PI_2_F - fast_acosf(Iz); - float roughness1_inv = 1.0f / roughness1; - float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); - float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float roughness1_inv = 1.0f / roughness1; + float a_R = fast_atan2f(((M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); + float b_R = fast_atan2f(((-M_PI_2_F + theta_r) * 0.5f - offset) * roughness1_inv, 1.0f); - float t = roughness1 * tanf(randu * (a_R - b_R) + b_R); + float t = roughness1 * tanf(randu * (a_R - b_R) + b_R); - float theta_h = t + offset; - float theta_i = 2 * theta_h - theta_r; + float theta_h = t + offset; + float theta_i = 2 * theta_h - theta_r; - float costheta_i, sintheta_i; - fast_sincosf(theta_i, &sintheta_i, &costheta_i); + float costheta_i, sintheta_i; + fast_sincosf(theta_i, &sintheta_i, &costheta_i); - float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2; + float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2; - float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2; + float phi_pdf = fast_cosf(phi * 0.5f) * 0.25f / roughness2; - float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)*costheta_i); + float theta_pdf = roughness1 / + (2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i); - float sinphi, cosphi; - fast_sincosf(phi, &sinphi, &cosphi); - *omega_in =(cosphi * costheta_i) * locy - - (sinphi * costheta_i) * locx + - ( sintheta_i) * Tg; + float sinphi, cosphi; + fast_sincosf(phi, &sinphi, &cosphi); + *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; - //differentials - TODO: find a better approximation for the reflective bounce + //differentials - TODO: find a better approximation for the reflective bounce #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; - *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; + *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; + *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; #endif - *pdf = fabsf(phi_pdf * theta_pdf); - if(M_PI_2_F - fabsf(theta_i) < 0.001f) - *pdf = 0.0f; + *pdf = fabsf(phi_pdf * theta_pdf); + if (M_PI_2_F - fabsf(theta_i) < 0.001f) + *pdf = 0.0f; - *eval = make_float3(*pdf, *pdf, *pdf); + *eval = make_float3(*pdf, *pdf, *pdf); - return LABEL_REFLECT|LABEL_GLOSSY; + return LABEL_REFLECT | LABEL_GLOSSY; } -ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const HairBsdf *bsdf = (const HairBsdf*)sc; - float offset = bsdf->offset; - float3 Tg = bsdf->T; - float roughness1 = bsdf->roughness1; - float roughness2 = bsdf->roughness2; - float Iz = dot(Tg, I); - float3 locy = normalize(I - Tg * Iz); - float3 locx = cross(locy, Tg); - float theta_r = M_PI_2_F - fast_acosf(Iz); - - float roughness1_inv = 1.0f / roughness1; - float a_TT = fast_atan2f(((M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); - float b_TT = fast_atan2f(((-M_PI_2_F + theta_r)/2 - offset) * roughness1_inv, 1.0f); - float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); - - float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT); - - float theta_h = t + offset; - float theta_i = 2 * theta_h - theta_r; - - float costheta_i, sintheta_i; - fast_sincosf(theta_i, &sintheta_i, &costheta_i); - - float p = roughness2 * tanf(c_TT * (randv - 0.5f)); - float phi = p + M_PI_F; - float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_TT - b_TT) * costheta_i); - float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); - - float sinphi, cosphi; - fast_sincosf(phi, &sinphi, &cosphi); - *omega_in =(cosphi * costheta_i) * locy - - (sinphi * costheta_i) * locx + - ( sintheta_i) * Tg; - - //differentials - TODO: find a better approximation for the transmission bounce + const HairBsdf *bsdf = (const HairBsdf *)sc; + float offset = bsdf->offset; + float3 Tg = bsdf->T; + float roughness1 = bsdf->roughness1; + float roughness2 = bsdf->roughness2; + float Iz = dot(Tg, I); + float3 locy = normalize(I - Tg * Iz); + float3 locx = cross(locy, Tg); + float theta_r = M_PI_2_F - fast_acosf(Iz); + + float roughness1_inv = 1.0f / roughness1; + float a_TT = fast_atan2f(((M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f); + float b_TT = fast_atan2f(((-M_PI_2_F + theta_r) / 2 - offset) * roughness1_inv, 1.0f); + float c_TT = 2 * fast_atan2f(M_PI_2_F / roughness2, 1.0f); + + float t = roughness1 * tanf(randu * (a_TT - b_TT) + b_TT); + + float theta_h = t + offset; + float theta_i = 2 * theta_h - theta_r; + + float costheta_i, sintheta_i; + fast_sincosf(theta_i, &sintheta_i, &costheta_i); + + float p = roughness2 * tanf(c_TT * (randv - 0.5f)); + float phi = p + M_PI_F; + float theta_pdf = roughness1 / + (2 * (t * t + roughness1 * roughness1) * (a_TT - b_TT) * costheta_i); + float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); + + float sinphi, cosphi; + fast_sincosf(phi, &sinphi, &cosphi); + *omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg; + + //differentials - TODO: find a better approximation for the transmission bounce #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; - *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; + *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx; + *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy; #endif - *pdf = fabsf(phi_pdf * theta_pdf); - if(M_PI_2_F - fabsf(theta_i) < 0.001f) { - *pdf = 0.0f; - } + *pdf = fabsf(phi_pdf * theta_pdf); + if (M_PI_2_F - fabsf(theta_i) < 0.001f) { + *pdf = 0.0f; + } - *eval = make_float3(*pdf, *pdf, *pdf); + *eval = make_float3(*pdf, *pdf, *pdf); - /* TODO(sergey): Should always be negative, but seems some precision issue - * is involved here. - */ - kernel_assert(dot(locy, *omega_in) < 1e-4f); + /* TODO(sergey): Should always be negative, but seems some precision issue + * is involved here. + */ + kernel_assert(dot(locy, *omega_in) < 1e-4f); - return LABEL_TRANSMIT|LABEL_GLOSSY; + return LABEL_TRANSMIT | LABEL_GLOSSY; } CCL_NAMESPACE_END -#endif /* __BSDF_HAIR_H__ */ +#endif /* __BSDF_HAIR_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h index 68335ee887a..a4bba2fbf6c 100644 --- a/intern/cycles/kernel/closure/bsdf_hair_principled.h +++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h @@ -15,251 +15,245 @@ */ #ifdef __KERNEL_CPU__ -#include <fenv.h> +# include <fenv.h> #endif #include "kernel/kernel_color.h" #ifndef __BSDF_HAIR_PRINCIPLED_H__ -#define __BSDF_HAIR_PRINCIPLED_H__ +# define __BSDF_HAIR_PRINCIPLED_H__ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct PrincipledHairExtra { - /* Geometry data. */ - float4 geom; + /* Geometry data. */ + float4 geom; } PrincipledHairExtra; typedef ccl_addr_space struct PrincipledHairBSDF { - SHADER_CLOSURE_BASE; - - /* Absorption coefficient. */ - float3 sigma; - /* Variance of the underlying logistic distribution. */ - float v; - /* Scale factor of the underlying logistic distribution. */ - float s; - /* Cuticle tilt angle. */ - float alpha; - /* IOR. */ - float eta; - /* Effective variance for the diffuse bounce only. */ - float m0_roughness; - - /* Extra closure. */ - PrincipledHairExtra *extra; + SHADER_CLOSURE_BASE; + + /* Absorption coefficient. */ + float3 sigma; + /* Variance of the underlying logistic distribution. */ + float v; + /* Scale factor of the underlying logistic distribution. */ + float s; + /* Cuticle tilt angle. */ + float alpha; + /* IOR. */ + float eta; + /* Effective variance for the diffuse bounce only. */ + float m0_roughness; + + /* Extra closure. */ + PrincipledHairExtra *extra; } PrincipledHairBSDF; -static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF), "PrincipledHairBSDF is too large!"); -static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairExtra), "PrincipledHairExtra is too large!"); +static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairBSDF), + "PrincipledHairBSDF is too large!"); +static_assert(sizeof(ShaderClosure) >= sizeof(PrincipledHairExtra), + "PrincipledHairExtra is too large!"); ccl_device_inline float cos_from_sin(const float s) { - return safe_sqrtf(1.0f - s*s); + return safe_sqrtf(1.0f - s * s); } /* Gives the change in direction in the normal plane for the given angles and p-th-order scattering. */ ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t) { - return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F; + return 2.0f * p * gamma_t - 2.0f * gamma_o + p * M_PI_F; } /* Remaps the given angle to [-pi, pi]. */ ccl_device_inline float wrap_angle(float a) { - while(a > M_PI_F) { - a -= M_2PI_F; - } - while(a < -M_PI_F) { - a += M_2PI_F; - } - return a; + while (a > M_PI_F) { + a -= M_2PI_F; + } + while (a < -M_PI_F) { + a += M_2PI_F; + } + return a; } /* Logistic distribution function. */ ccl_device_inline float logistic(float x, float s) { - float v = expf(-fabsf(x)/s); - return v / (s * sqr(1.0f + v)); + float v = expf(-fabsf(x) / s); + return v / (s * sqr(1.0f + v)); } /* Logistic cumulative density function. */ ccl_device_inline float logistic_cdf(float x, float s) { - float arg = -x/s; - /* expf() overflows if arg >= 89.0. */ - if(arg > 88.0f) { - return 0.0f; - } - else { - return 1.0f / (1.0f + expf(arg)); - } + float arg = -x / s; + /* expf() overflows if arg >= 89.0. */ + if (arg > 88.0f) { + return 0.0f; + } + else { + return 1.0f / (1.0f + expf(arg)); + } } /* Numerical approximation to the Bessel function of the first kind. */ ccl_device_inline float bessel_I0(float x) { - x = sqr(x); - float val = 1.0f + 0.25f*x; - float pow_x_2i = sqr(x); - uint64_t i_fac_2 = 1; - int pow_4_i = 16; - for(int i = 2; i < 10; i++) { - i_fac_2 *= i*i; - float newval = val + pow_x_2i / (pow_4_i * i_fac_2); - if(val == newval) { - return val; - } - val = newval; - pow_x_2i *= x; - pow_4_i *= 4; - } - return val; + x = sqr(x); + float val = 1.0f + 0.25f * x; + float pow_x_2i = sqr(x); + uint64_t i_fac_2 = 1; + int pow_4_i = 16; + for (int i = 2; i < 10; i++) { + i_fac_2 *= i * i; + float newval = val + pow_x_2i / (pow_4_i * i_fac_2); + if (val == newval) { + return val; + } + val = newval; + pow_x_2i *= x; + pow_4_i *= 4; + } + return val; } /* Logarithm of the Bessel function of the first kind. */ ccl_device_inline float log_bessel_I0(float x) { - if(x > 12.0f) { - /* log(1/x) == -log(x) iff x > 0. - * This is only used with positive cosines */ - return x + 0.5f * (1.f / (8.0f * x) - M_LN_2PI_F - logf(x)); - } - else { - return logf(bessel_I0(x)); - } + if (x > 12.0f) { + /* log(1/x) == -log(x) iff x > 0. + * This is only used with positive cosines */ + return x + 0.5f * (1.f / (8.0f * x) - M_LN_2PI_F - logf(x)); + } + else { + return logf(bessel_I0(x)); + } } /* Logistic distribution limited to the interval [-pi, pi]. */ ccl_device_inline float trimmed_logistic(float x, float s) { - /* The logistic distribution is symmetric and centered around zero, - * so logistic_cdf(x, s) = 1 - logistic_cdf(-x, s). - * Therefore, logistic_cdf(x, s)-logistic_cdf(-x, s) = 1 - 2*logistic_cdf(-x, s) */ - float scaling_fac = 1.0f - 2.0f*logistic_cdf(-M_PI_F, s); - float val = logistic(x, s); - return safe_divide(val, scaling_fac); + /* The logistic distribution is symmetric and centered around zero, + * so logistic_cdf(x, s) = 1 - logistic_cdf(-x, s). + * Therefore, logistic_cdf(x, s)-logistic_cdf(-x, s) = 1 - 2*logistic_cdf(-x, s) */ + float scaling_fac = 1.0f - 2.0f * logistic_cdf(-M_PI_F, s); + float val = logistic(x, s); + return safe_divide(val, scaling_fac); } /* Sampling function for the trimmed logistic function. */ ccl_device_inline float sample_trimmed_logistic(float u, float s) { - float cdf_minuspi = logistic_cdf(-M_PI_F, s); - float x = -s*logf(1.0f / (u*(1.0f - 2.0f*cdf_minuspi) + cdf_minuspi) - 1.0f); - return clamp(x, -M_PI_F, M_PI_F); + float cdf_minuspi = logistic_cdf(-M_PI_F, s); + float x = -s * logf(1.0f / (u * (1.0f - 2.0f * cdf_minuspi) + cdf_minuspi) - 1.0f); + return clamp(x, -M_PI_F, M_PI_F); } /* Azimuthal scattering function Np. */ -ccl_device_inline float azimuthal_scattering(float phi, - int p, - float s, - float gamma_o, - float gamma_t) +ccl_device_inline float azimuthal_scattering( + float phi, int p, float s, float gamma_o, float gamma_t) { - float phi_o = wrap_angle(phi - delta_phi(p, gamma_o, gamma_t)); - float val = trimmed_logistic(phi_o, s); - return val; + float phi_o = wrap_angle(phi - delta_phi(p, gamma_o, gamma_t)); + float val = trimmed_logistic(phi_o, s); + return val; } /* Longitudinal scattering function Mp. */ -ccl_device_inline float longitudinal_scattering(float sin_theta_i, - float cos_theta_i, - float sin_theta_o, - float cos_theta_o, - float v) +ccl_device_inline float longitudinal_scattering( + float sin_theta_i, float cos_theta_i, float sin_theta_o, float cos_theta_o, float v) { - float inv_v = 1.0f/v; - float cos_arg = cos_theta_i * cos_theta_o * inv_v; - float sin_arg = sin_theta_i * sin_theta_o * inv_v; - if(v <= 0.1f) { - float i0 = log_bessel_I0(cos_arg); - float val = expf(i0 - sin_arg - inv_v + 0.6931f + logf(0.5f*inv_v)); - return val; - } - else { - float i0 = bessel_I0(cos_arg); - float val = (expf(-sin_arg) * i0) / (sinhf(inv_v) * 2.0f * v); - return val; - } + float inv_v = 1.0f / v; + float cos_arg = cos_theta_i * cos_theta_o * inv_v; + float sin_arg = sin_theta_i * sin_theta_o * inv_v; + if (v <= 0.1f) { + float i0 = log_bessel_I0(cos_arg); + float val = expf(i0 - sin_arg - inv_v + 0.6931f + logf(0.5f * inv_v)); + return val; + } + else { + float i0 = bessel_I0(cos_arg); + float val = (expf(-sin_arg) * i0) / (sinhf(inv_v) * 2.0f * v); + return val; + } } /* Combine the three values using their luminances. */ ccl_device_inline float4 combine_with_energy(KernelGlobals *kg, float3 c) { - return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c)); + return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c)); } -#ifdef __HAIR__ +# ifdef __HAIR__ /* Set up the hair closure. */ ccl_device int bsdf_principled_hair_setup(ShaderData *sd, PrincipledHairBSDF *bsdf) { - bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID; - bsdf->v = clamp(bsdf->v, 0.001f, 1.0f); - bsdf->s = clamp(bsdf->s, 0.001f, 1.0f); - /* Apply Primary Reflection Roughness modifier. */ - bsdf->m0_roughness = clamp(bsdf->m0_roughness*bsdf->v, 0.001f, 1.0f); - - /* Map from roughness_u and roughness_v to variance and scale factor. */ - bsdf->v = sqr(0.726f*bsdf->v + 0.812f*sqr(bsdf->v) + 3.700f*pow20(bsdf->v)); - bsdf->s = (0.265f*bsdf->s + 1.194f*sqr(bsdf->s) + 5.372f*pow22(bsdf->s))*M_SQRT_PI_8_F; - bsdf->m0_roughness = sqr(0.726f*bsdf->m0_roughness + 0.812f*sqr(bsdf->m0_roughness) + 3.700f*pow20(bsdf->m0_roughness)); - - /* Compute local frame, aligned to curve tangent and ray direction. */ - float3 X = safe_normalize(sd->dPdu); - float3 Y = safe_normalize(cross(X, sd->I)); - float3 Z = safe_normalize(cross(X, Y)); - /* TODO: the solution below works where sd->Ng is the normal - * pointing from the center of the curve to the shading point. - * It doesn't work for triangles, see https://developer.blender.org/T43625 */ - - /* h -1..0..1 means the rays goes from grazing the hair, to hitting it at - * the center, to grazing the other edge. This is the sine of the angle - * between sd->Ng and Z, as seen from the tangent X. */ - - /* TODO: we convert this value to a cosine later and discard the sign, so - * we could probably save some operations. */ - float h = dot(cross(sd->Ng, X), Z); - - kernel_assert(fabsf(h) < 1.0f + 1e-4f); - kernel_assert(isfinite3_safe(Y)); - kernel_assert(isfinite_safe(h)); - - bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h); - - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; + bsdf->type = CLOSURE_BSDF_HAIR_PRINCIPLED_ID; + bsdf->v = clamp(bsdf->v, 0.001f, 1.0f); + bsdf->s = clamp(bsdf->s, 0.001f, 1.0f); + /* Apply Primary Reflection Roughness modifier. */ + bsdf->m0_roughness = clamp(bsdf->m0_roughness * bsdf->v, 0.001f, 1.0f); + + /* Map from roughness_u and roughness_v to variance and scale factor. */ + bsdf->v = sqr(0.726f * bsdf->v + 0.812f * sqr(bsdf->v) + 3.700f * pow20(bsdf->v)); + bsdf->s = (0.265f * bsdf->s + 1.194f * sqr(bsdf->s) + 5.372f * pow22(bsdf->s)) * M_SQRT_PI_8_F; + bsdf->m0_roughness = sqr(0.726f * bsdf->m0_roughness + 0.812f * sqr(bsdf->m0_roughness) + + 3.700f * pow20(bsdf->m0_roughness)); + + /* Compute local frame, aligned to curve tangent and ray direction. */ + float3 X = safe_normalize(sd->dPdu); + float3 Y = safe_normalize(cross(X, sd->I)); + float3 Z = safe_normalize(cross(X, Y)); + /* TODO: the solution below works where sd->Ng is the normal + * pointing from the center of the curve to the shading point. + * It doesn't work for triangles, see https://developer.blender.org/T43625 */ + + /* h -1..0..1 means the rays goes from grazing the hair, to hitting it at + * the center, to grazing the other edge. This is the sine of the angle + * between sd->Ng and Z, as seen from the tangent X. */ + + /* TODO: we convert this value to a cosine later and discard the sign, so + * we could probably save some operations. */ + float h = dot(cross(sd->Ng, X), Z); + + kernel_assert(fabsf(h) < 1.0f + 1e-4f); + kernel_assert(isfinite3_safe(Y)); + kernel_assert(isfinite_safe(h)); + + bsdf->extra->geom = make_float4(Y.x, Y.y, Y.z, h); + + return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG; } -#endif /* __HAIR__ */ +# endif /* __HAIR__ */ /* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */ -ccl_device_inline void hair_attenuation(KernelGlobals *kg, - float f, - float3 T, - float4 *Ap) +ccl_device_inline void hair_attenuation(KernelGlobals *kg, float f, float3 T, float4 *Ap) { - /* Primary specular (R). */ - Ap[0] = make_float4(f, f, f, f); + /* Primary specular (R). */ + Ap[0] = make_float4(f, f, f, f); - /* Transmission (TT). */ - float3 col = sqr(1.0f - f) * T; - Ap[1] = combine_with_energy(kg, col); + /* Transmission (TT). */ + float3 col = sqr(1.0f - f) * T; + Ap[1] = combine_with_energy(kg, col); - /* Secondary specular (TRT). */ - col *= T*f; - Ap[2] = combine_with_energy(kg, col); + /* Secondary specular (TRT). */ + col *= T * f; + Ap[2] = combine_with_energy(kg, col); - /* Residual component (TRRT+). */ - col *= safe_divide_color(T*f, make_float3(1.0f, 1.0f, 1.0f) - T*f); - Ap[3] = combine_with_energy(kg, col); + /* Residual component (TRRT+). */ + col *= safe_divide_color(T * f, make_float3(1.0f, 1.0f, 1.0f) - T * f); + Ap[3] = combine_with_energy(kg, col); - /* Normalize sampling weights. */ - float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w; - float fac = safe_divide(1.0f, totweight); + /* Normalize sampling weights. */ + float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w; + float fac = safe_divide(1.0f, totweight); - Ap[0].w *= fac; - Ap[1].w *= fac; - Ap[2].w *= fac; - Ap[3].w *= fac; + Ap[0].w *= fac; + Ap[1].w *= fac; + Ap[2].w *= fac; + Ap[3].w *= fac; } /* Given the tilt angle, generate the rotated theta_i for the different bounces. */ @@ -268,19 +262,19 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i, float alpha, float *angles) { - float sin_1alpha = sinf(alpha); - float cos_1alpha = cos_from_sin(sin_1alpha); - float sin_2alpha = 2.0f*sin_1alpha*cos_1alpha; - float cos_2alpha = sqr(cos_1alpha) - sqr(sin_1alpha); - float sin_4alpha = 2.0f*sin_2alpha*cos_2alpha; - float cos_4alpha = sqr(cos_2alpha) - sqr(sin_2alpha); - - angles[0] = sin_theta_i*cos_2alpha + cos_theta_i*sin_2alpha; - angles[1] = fabsf(cos_theta_i*cos_2alpha - sin_theta_i*sin_2alpha); - angles[2] = sin_theta_i*cos_1alpha - cos_theta_i*sin_1alpha; - angles[3] = fabsf(cos_theta_i*cos_1alpha + sin_theta_i*sin_1alpha); - angles[4] = sin_theta_i*cos_4alpha - cos_theta_i*sin_4alpha; - angles[5] = fabsf(cos_theta_i*cos_4alpha + sin_theta_i*sin_4alpha); + float sin_1alpha = sinf(alpha); + float cos_1alpha = cos_from_sin(sin_1alpha); + float sin_2alpha = 2.0f * sin_1alpha * cos_1alpha; + float cos_2alpha = sqr(cos_1alpha) - sqr(sin_1alpha); + float sin_4alpha = 2.0f * sin_2alpha * cos_2alpha; + float cos_4alpha = sqr(cos_2alpha) - sqr(sin_2alpha); + + angles[0] = sin_theta_i * cos_2alpha + cos_theta_i * sin_2alpha; + angles[1] = fabsf(cos_theta_i * cos_2alpha - sin_theta_i * sin_2alpha); + angles[2] = sin_theta_i * cos_1alpha - cos_theta_i * sin_1alpha; + angles[3] = fabsf(cos_theta_i * cos_1alpha + sin_theta_i * sin_1alpha); + angles[4] = sin_theta_i * cos_4alpha - cos_theta_i * sin_4alpha; + angles[5] = fabsf(cos_theta_i * cos_4alpha + sin_theta_i * sin_4alpha); } /* Evaluation function for our shader. */ @@ -290,75 +284,75 @@ ccl_device float3 bsdf_principled_hair_eval(KernelGlobals *kg, const float3 omega_in, float *pdf) { - kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length)); + kernel_assert(isfinite3_safe(sd->P) && isfinite_safe(sd->ray_length)); - const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF*) sc; - float3 Y = float4_to_float3(bsdf->extra->geom); + const PrincipledHairBSDF *bsdf = (const PrincipledHairBSDF *)sc; + float3 Y = float4_to_float3(bsdf->extra->geom); - float3 X = safe_normalize(sd->dPdu); - kernel_assert(fabsf(dot(X, Y)) < 1e-3f); - float3 Z = safe_normalize(cross(X, Y)); + float3 X = safe_normalize(sd->dPdu); + kernel_assert(fabsf(dot(X, Y)) < 1e-3f); + float3 Z = safe_normalize(cross(X, Y)); - float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z)); - float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); + float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z)); + float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); - float sin_theta_o = wo.x; - float cos_theta_o = cos_from_sin(sin_theta_o); - float phi_o = atan2f(wo.z, wo.y); + float sin_theta_o = wo.x; + float cos_theta_o = cos_from_sin(sin_theta_o); + float phi_o = atan2f(wo.z, wo.y); - float sin_theta_t = sin_theta_o / bsdf->eta; - float cos_theta_t = cos_from_sin(sin_theta_t); + float sin_theta_t = sin_theta_o / bsdf->eta; + float cos_theta_t = cos_from_sin(sin_theta_t); - float sin_gamma_o = bsdf->extra->geom.w; - float cos_gamma_o = cos_from_sin(sin_gamma_o); - float gamma_o = safe_asinf(sin_gamma_o); + float sin_gamma_o = bsdf->extra->geom.w; + float cos_gamma_o = cos_from_sin(sin_gamma_o); + float gamma_o = safe_asinf(sin_gamma_o); - float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o)); - float cos_gamma_t = cos_from_sin(sin_gamma_t); - float gamma_t = safe_asinf(sin_gamma_t); + float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o)); + float cos_gamma_t = cos_from_sin(sin_gamma_t); + float gamma_t = safe_asinf(sin_gamma_t); - float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); - float4 Ap[4]; - hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap); + float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); + float4 Ap[4]; + hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap); - float sin_theta_i = wi.x; - float cos_theta_i = cos_from_sin(sin_theta_i); - float phi_i = atan2f(wi.z, wi.y); + float sin_theta_i = wi.x; + float cos_theta_i = cos_from_sin(sin_theta_i); + float phi_i = atan2f(wi.z, wi.y); - float phi = phi_i - phi_o; + float phi = phi_i - phi_o; - float angles[6]; - hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles); + float angles[6]; + hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles); - float4 F; - float Mp, Np; + float4 F; + float Mp, Np; - /* Primary specular (R). */ - Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness); - Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t); - F = Ap[0] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + /* Primary specular (R). */ + Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness); + Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t); + F = Ap[0] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); - /* Transmission (TT). */ - Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f*bsdf->v); - Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t); - F += Ap[1] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + /* Transmission (TT). */ + Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v); + Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t); + F += Ap[1] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); - /* Secondary specular (TRT). */ - Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f*bsdf->v); - Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t); - F += Ap[2] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + /* Secondary specular (TRT). */ + Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v); + Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t); + F += Ap[2] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); - /* Residual component (TRRT+). */ - Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f*bsdf->v); - Np = M_1_2PI_F; - F += Ap[3] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); + /* Residual component (TRRT+). */ + Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v); + Np = M_1_2PI_F; + F += Ap[3] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); - *pdf = F.w; - return float4_to_float3(F); + *pdf = F.w; + return float4_to_float3(F); } /* Sampling function for the hair shader. */ @@ -373,130 +367,131 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals *kg, float3 *domega_in_dy, float *pdf) { - PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*) sc; - - float3 Y = float4_to_float3(bsdf->extra->geom); - - float3 X = safe_normalize(sd->dPdu); - kernel_assert(fabsf(dot(X, Y)) < 1e-3f); - float3 Z = safe_normalize(cross(X, Y)); - - float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z)); - - float2 u[2]; - u[0] = make_float2(randu, randv); - u[1].x = lcg_step_float_addrspace(&sd->lcg_state); - u[1].y = lcg_step_float_addrspace(&sd->lcg_state); - - float sin_theta_o = wo.x; - float cos_theta_o = cos_from_sin(sin_theta_o); - float phi_o = atan2f(wo.z, wo.y); - - float sin_theta_t = sin_theta_o / bsdf->eta; - float cos_theta_t = cos_from_sin(sin_theta_t); - - float sin_gamma_o = bsdf->extra->geom.w; - float cos_gamma_o = cos_from_sin(sin_gamma_o); - float gamma_o = safe_asinf(sin_gamma_o); - - float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o)); - float cos_gamma_t = cos_from_sin(sin_gamma_t); - float gamma_t = safe_asinf(sin_gamma_t); - - float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); - float4 Ap[4]; - hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap); - - int p = 0; - for(; p < 3; p++) { - if(u[0].x < Ap[p].w) { - break; - } - u[0].x -= Ap[p].w; - } - - float v = bsdf->v; - if(p == 1) { - v *= 0.25f; - } - if(p >= 2) { - v *= 4.0f; - } - - u[1].x = max(u[1].x, 1e-5f); - float fac = 1.0f + v*logf(u[1].x + (1.0f - u[1].x)*expf(-2.0f/v)); - float sin_theta_i = -fac * sin_theta_o + cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o; - float cos_theta_i = cos_from_sin(sin_theta_i); - - float angles[6]; - if(p < 3) { - hair_alpha_angles(sin_theta_i, cos_theta_i, -bsdf->alpha, angles); - sin_theta_i = angles[2*p]; - cos_theta_i = angles[2*p+1]; - } - - float phi; - if(p < 3) { - phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(u[0].y, bsdf->s); - } - else { - phi = M_2PI_F*u[0].y; - } - float phi_i = phi_o + phi; - - hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles); - - float4 F; - float Mp, Np; - - /* Primary specular (R). */ - Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness); - Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t); - F = Ap[0] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); - - /* Transmission (TT). */ - Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f*bsdf->v); - Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t); - F += Ap[1] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); - - /* Secondary specular (TRT). */ - Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f*bsdf->v); - Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t); - F += Ap[2] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); - - /* Residual component (TRRT+). */ - Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f*bsdf->v); - Np = M_1_2PI_F; - F += Ap[3] * Mp * Np; - kernel_assert(isfinite3_safe(float4_to_float3(F))); - - *eval = float4_to_float3(F); - *pdf = F.w; - - *omega_in = X*sin_theta_i + Y*cos_theta_i*cosf(phi_i) + Z*cos_theta_i*sinf(phi_i); - -#ifdef __RAY_DIFFERENTIALS__ - float3 N = safe_normalize(sd->I + *omega_in); - *domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx; - *domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy; -#endif - - return LABEL_GLOSSY|((p == 0)? LABEL_REFLECT : LABEL_TRANSMIT); + PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc; + + float3 Y = float4_to_float3(bsdf->extra->geom); + + float3 X = safe_normalize(sd->dPdu); + kernel_assert(fabsf(dot(X, Y)) < 1e-3f); + float3 Z = safe_normalize(cross(X, Y)); + + float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z)); + + float2 u[2]; + u[0] = make_float2(randu, randv); + u[1].x = lcg_step_float_addrspace(&sd->lcg_state); + u[1].y = lcg_step_float_addrspace(&sd->lcg_state); + + float sin_theta_o = wo.x; + float cos_theta_o = cos_from_sin(sin_theta_o); + float phi_o = atan2f(wo.z, wo.y); + + float sin_theta_t = sin_theta_o / bsdf->eta; + float cos_theta_t = cos_from_sin(sin_theta_t); + + float sin_gamma_o = bsdf->extra->geom.w; + float cos_gamma_o = cos_from_sin(sin_gamma_o); + float gamma_o = safe_asinf(sin_gamma_o); + + float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o)); + float cos_gamma_t = cos_from_sin(sin_gamma_t); + float gamma_t = safe_asinf(sin_gamma_t); + + float3 T = exp3(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t)); + float4 Ap[4]; + hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap); + + int p = 0; + for (; p < 3; p++) { + if (u[0].x < Ap[p].w) { + break; + } + u[0].x -= Ap[p].w; + } + + float v = bsdf->v; + if (p == 1) { + v *= 0.25f; + } + if (p >= 2) { + v *= 4.0f; + } + + u[1].x = max(u[1].x, 1e-5f); + float fac = 1.0f + v * logf(u[1].x + (1.0f - u[1].x) * expf(-2.0f / v)); + float sin_theta_i = -fac * sin_theta_o + + cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o; + float cos_theta_i = cos_from_sin(sin_theta_i); + + float angles[6]; + if (p < 3) { + hair_alpha_angles(sin_theta_i, cos_theta_i, -bsdf->alpha, angles); + sin_theta_i = angles[2 * p]; + cos_theta_i = angles[2 * p + 1]; + } + + float phi; + if (p < 3) { + phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(u[0].y, bsdf->s); + } + else { + phi = M_2PI_F * u[0].y; + } + float phi_i = phi_o + phi; + + hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles); + + float4 F; + float Mp, Np; + + /* Primary specular (R). */ + Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness); + Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t); + F = Ap[0] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); + + /* Transmission (TT). */ + Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v); + Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t); + F += Ap[1] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); + + /* Secondary specular (TRT). */ + Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v); + Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t); + F += Ap[2] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); + + /* Residual component (TRRT+). */ + Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v); + Np = M_1_2PI_F; + F += Ap[3] * Mp * Np; + kernel_assert(isfinite3_safe(float4_to_float3(F))); + + *eval = float4_to_float3(F); + *pdf = F.w; + + *omega_in = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i); + +# ifdef __RAY_DIFFERENTIALS__ + float3 N = safe_normalize(sd->I + *omega_in); + *domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx; + *domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy; +# endif + + return LABEL_GLOSSY | ((p == 0) ? LABEL_REFLECT : LABEL_TRANSMIT); } /* Implements Filter Glossy by capping the effective roughness. */ ccl_device void bsdf_principled_hair_blur(ShaderClosure *sc, float roughness) { - PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)sc; + PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)sc; - bsdf->v = fmaxf(roughness, bsdf->v); - bsdf->s = fmaxf(roughness, bsdf->s); - bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness); + bsdf->v = fmaxf(roughness, bsdf->v); + bsdf->s = fmaxf(roughness, bsdf->s); + bsdf->m0_roughness = fmaxf(roughness, bsdf->m0_roughness); } CCL_NAMESPACE_END -#endif /* __BSDF_HAIR_PRINCIPLED_H__ */ +#endif /* __BSDF_HAIR_PRINCIPLED_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 32b6e50b09a..b4da3123f28 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -36,95 +36,98 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct MicrofacetExtra { - float3 color, cspec0; - float clearcoat; + float3 color, cspec0; + float clearcoat; } MicrofacetExtra; typedef ccl_addr_space struct MicrofacetBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float alpha_x, alpha_y, ior; - MicrofacetExtra *extra; - float3 T; + float alpha_x, alpha_y, ior; + MicrofacetExtra *extra; + float3 T; } MicrofacetBsdf; /* Beckmann and GGX microfacet importance sampling. */ -ccl_device_inline void microfacet_beckmann_sample_slopes( - KernelGlobals *kg, - const float cos_theta_i, const float sin_theta_i, - float randu, float randv, float *slope_x, float *slope_y, - float *G1i) +ccl_device_inline void microfacet_beckmann_sample_slopes(KernelGlobals *kg, + const float cos_theta_i, + const float sin_theta_i, + float randu, + float randv, + float *slope_x, + float *slope_y, + float *G1i) { - /* special case (normal incidence) */ - if(cos_theta_i >= 0.99999f) { - const float r = sqrtf(-logf(randu)); - const float phi = M_2PI_F * randv; - *slope_x = r * cosf(phi); - *slope_y = r * sinf(phi); - *G1i = 1.0f; - return; - } - - /* precomputations */ - const float tan_theta_i = sin_theta_i/cos_theta_i; - const float inv_a = tan_theta_i; - const float cot_theta_i = 1.0f/tan_theta_i; - const float erf_a = fast_erff(cot_theta_i); - const float exp_a2 = expf(-cot_theta_i*cot_theta_i); - const float SQRT_PI_INV = 0.56418958354f; - const float Lambda = 0.5f*(erf_a - 1.0f) + (0.5f*SQRT_PI_INV)*(exp_a2*inv_a); - const float G1 = 1.0f/(1.0f + Lambda); /* masking */ - - *G1i = G1; + /* special case (normal incidence) */ + if (cos_theta_i >= 0.99999f) { + const float r = sqrtf(-logf(randu)); + const float phi = M_2PI_F * randv; + *slope_x = r * cosf(phi); + *slope_y = r * sinf(phi); + *G1i = 1.0f; + return; + } + + /* precomputations */ + const float tan_theta_i = sin_theta_i / cos_theta_i; + const float inv_a = tan_theta_i; + const float cot_theta_i = 1.0f / tan_theta_i; + const float erf_a = fast_erff(cot_theta_i); + const float exp_a2 = expf(-cot_theta_i * cot_theta_i); + const float SQRT_PI_INV = 0.56418958354f; + const float Lambda = 0.5f * (erf_a - 1.0f) + (0.5f * SQRT_PI_INV) * (exp_a2 * inv_a); + const float G1 = 1.0f / (1.0f + Lambda); /* masking */ + + *G1i = G1; #if defined(__KERNEL_GPU__) - /* Based on paper from Wenzel Jakob - * An Improved Visible Normal Sampling Routine for the Beckmann Distribution - * - * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf - * - * Reformulation from OpenShadingLanguage which avoids using inverse - * trigonometric functions. - */ - - /* Sample slope X. - * - * Compute a coarse approximation using the approximation: - * exp(-ierf(x)^2) ~= 1 - x * x - * solve y = 1 + b + K * (1 - b * b) - */ - float K = tan_theta_i * SQRT_PI_INV; - float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a)); - float y_exact = randu * (1.0f + erf_a + K * exp_a2); - float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f; - - /* Perform newton step to refine toward the true root. */ - float inv_erf = fast_ierff(b); - float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact; - /* Check if we are close enough already, - * this also avoids NaNs as we get close to the root. - */ - if(fabsf(value) > 1e-6f) { - b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */ - inv_erf = fast_ierff(b); - value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact; - b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */ - /* Compute the slope from the refined value. */ - *slope_x = fast_ierff(b); - } - else { - /* We are close enough already. */ - *slope_x = inv_erf; - } - *slope_y = fast_ierff(2.0f*randv - 1.0f); + /* Based on paper from Wenzel Jakob + * An Improved Visible Normal Sampling Routine for the Beckmann Distribution + * + * http://www.mitsuba-renderer.org/~wenzel/files/visnormal.pdf + * + * Reformulation from OpenShadingLanguage which avoids using inverse + * trigonometric functions. + */ + + /* Sample slope X. + * + * Compute a coarse approximation using the approximation: + * exp(-ierf(x)^2) ~= 1 - x * x + * solve y = 1 + b + K * (1 - b * b) + */ + float K = tan_theta_i * SQRT_PI_INV; + float y_approx = randu * (1.0f + erf_a + K * (1 - erf_a * erf_a)); + float y_exact = randu * (1.0f + erf_a + K * exp_a2); + float b = K > 0 ? (0.5f - sqrtf(K * (K - y_approx + 1.0f) + 0.25f)) / K : y_approx - 1.0f; + + /* Perform newton step to refine toward the true root. */ + float inv_erf = fast_ierff(b); + float value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact; + /* Check if we are close enough already, + * this also avoids NaNs as we get close to the root. + */ + if (fabsf(value) > 1e-6f) { + b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 1. */ + inv_erf = fast_ierff(b); + value = 1.0f + b + K * expf(-inv_erf * inv_erf) - y_exact; + b -= value / (1.0f - inv_erf * tan_theta_i); /* newton step 2. */ + /* Compute the slope from the refined value. */ + *slope_x = fast_ierff(b); + } + else { + /* We are close enough already. */ + *slope_x = inv_erf; + } + *slope_y = fast_ierff(2.0f * randv - 1.0f); #else - /* Use precomputed table on CPU, it gives better perfomance. */ - int beckmann_table_offset = kernel_data.tables.beckmann_offset; + /* Use precomputed table on CPU, it gives better perfomance. */ + int beckmann_table_offset = kernel_data.tables.beckmann_offset; - *slope_x = lookup_table_read_2D(kg, randu, cos_theta_i, - beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE); - *slope_y = fast_ierff(2.0f*randv - 1.0f); + *slope_x = lookup_table_read_2D( + kg, randu, cos_theta_i, beckmann_table_offset, BECKMANN_TABLE_SIZE, BECKMANN_TABLE_SIZE); + *slope_y = fast_ierff(2.0f * randv - 1.0f); #endif } @@ -134,103 +137,109 @@ ccl_device_inline void microfacet_beckmann_sample_slopes( * E. Heitz and E. d'Eon, EGSR 2014 */ -ccl_device_inline void microfacet_ggx_sample_slopes( - const float cos_theta_i, const float sin_theta_i, - float randu, float randv, float *slope_x, float *slope_y, - float *G1i) +ccl_device_inline void microfacet_ggx_sample_slopes(const float cos_theta_i, + const float sin_theta_i, + float randu, + float randv, + float *slope_x, + float *slope_y, + float *G1i) { - /* special case (normal incidence) */ - if(cos_theta_i >= 0.99999f) { - const float r = sqrtf(randu/(1.0f - randu)); - const float phi = M_2PI_F * randv; - *slope_x = r * cosf(phi); - *slope_y = r * sinf(phi); - *G1i = 1.0f; - - return; - } - - /* precomputations */ - const float tan_theta_i = sin_theta_i/cos_theta_i; - const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i*tan_theta_i)); - - *G1i = 1.0f/G1_inv; - - /* sample slope_x */ - const float A = 2.0f*randu*G1_inv - 1.0f; - const float AA = A*A; - const float tmp = 1.0f/(AA - 1.0f); - const float B = tan_theta_i; - const float BB = B*B; - const float D = safe_sqrtf(BB*(tmp*tmp) - (AA - BB)*tmp); - const float slope_x_1 = B*tmp - D; - const float slope_x_2 = B*tmp + D; - *slope_x = (A < 0.0f || slope_x_2*tan_theta_i > 1.0f)? slope_x_1: slope_x_2; - - /* sample slope_y */ - float S; - - if(randv > 0.5f) { - S = 1.0f; - randv = 2.0f*(randv - 0.5f); - } - else { - S = -1.0f; - randv = 2.0f*(0.5f - randv); - } - - const float z = (randv*(randv*(randv*0.27385f - 0.73369f) + 0.46341f)) / (randv*(randv*(randv*0.093073f + 0.309420f) - 1.000000f) + 0.597999f); - *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x)*(*slope_x)); + /* special case (normal incidence) */ + if (cos_theta_i >= 0.99999f) { + const float r = sqrtf(randu / (1.0f - randu)); + const float phi = M_2PI_F * randv; + *slope_x = r * cosf(phi); + *slope_y = r * sinf(phi); + *G1i = 1.0f; + + return; + } + + /* precomputations */ + const float tan_theta_i = sin_theta_i / cos_theta_i; + const float G1_inv = 0.5f * (1.0f + safe_sqrtf(1.0f + tan_theta_i * tan_theta_i)); + + *G1i = 1.0f / G1_inv; + + /* sample slope_x */ + const float A = 2.0f * randu * G1_inv - 1.0f; + const float AA = A * A; + const float tmp = 1.0f / (AA - 1.0f); + const float B = tan_theta_i; + const float BB = B * B; + const float D = safe_sqrtf(BB * (tmp * tmp) - (AA - BB) * tmp); + const float slope_x_1 = B * tmp - D; + const float slope_x_2 = B * tmp + D; + *slope_x = (A < 0.0f || slope_x_2 * tan_theta_i > 1.0f) ? slope_x_1 : slope_x_2; + + /* sample slope_y */ + float S; + + if (randv > 0.5f) { + S = 1.0f; + randv = 2.0f * (randv - 0.5f); + } + else { + S = -1.0f; + randv = 2.0f * (0.5f - randv); + } + + const float z = (randv * (randv * (randv * 0.27385f - 0.73369f) + 0.46341f)) / + (randv * (randv * (randv * 0.093073f + 0.309420f) - 1.000000f) + 0.597999f); + *slope_y = S * z * safe_sqrtf(1.0f + (*slope_x) * (*slope_x)); } -ccl_device_forceinline float3 microfacet_sample_stretched( - KernelGlobals *kg, const float3 omega_i, - const float alpha_x, const float alpha_y, - const float randu, const float randv, - bool beckmann, float *G1i) +ccl_device_forceinline float3 microfacet_sample_stretched(KernelGlobals *kg, + const float3 omega_i, + const float alpha_x, + const float alpha_y, + const float randu, + const float randv, + bool beckmann, + float *G1i) { - /* 1. stretch omega_i */ - float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z); - omega_i_ = normalize(omega_i_); - - /* get polar coordinates of omega_i_ */ - float costheta_ = 1.0f; - float sintheta_ = 0.0f; - float cosphi_ = 1.0f; - float sinphi_ = 0.0f; - - if(omega_i_.z < 0.99999f) { - costheta_ = omega_i_.z; - sintheta_ = safe_sqrtf(1.0f - costheta_*costheta_); - - float invlen = 1.0f/sintheta_; - cosphi_ = omega_i_.x * invlen; - sinphi_ = omega_i_.y * invlen; - } - - /* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */ - float slope_x, slope_y; - - if(beckmann) { - microfacet_beckmann_sample_slopes(kg, costheta_, sintheta_, - randu, randv, &slope_x, &slope_y, G1i); - } - else { - microfacet_ggx_sample_slopes(costheta_, sintheta_, - randu, randv, &slope_x, &slope_y, G1i); - } - - /* 3. rotate */ - float tmp = cosphi_*slope_x - sinphi_*slope_y; - slope_y = sinphi_*slope_x + cosphi_*slope_y; - slope_x = tmp; - - /* 4. unstretch */ - slope_x = alpha_x * slope_x; - slope_y = alpha_y * slope_y; - - /* 5. compute normal */ - return normalize(make_float3(-slope_x, -slope_y, 1.0f)); + /* 1. stretch omega_i */ + float3 omega_i_ = make_float3(alpha_x * omega_i.x, alpha_y * omega_i.y, omega_i.z); + omega_i_ = normalize(omega_i_); + + /* get polar coordinates of omega_i_ */ + float costheta_ = 1.0f; + float sintheta_ = 0.0f; + float cosphi_ = 1.0f; + float sinphi_ = 0.0f; + + if (omega_i_.z < 0.99999f) { + costheta_ = omega_i_.z; + sintheta_ = safe_sqrtf(1.0f - costheta_ * costheta_); + + float invlen = 1.0f / sintheta_; + cosphi_ = omega_i_.x * invlen; + sinphi_ = omega_i_.y * invlen; + } + + /* 2. sample P22_{omega_i}(x_slope, y_slope, 1, 1) */ + float slope_x, slope_y; + + if (beckmann) { + microfacet_beckmann_sample_slopes( + kg, costheta_, sintheta_, randu, randv, &slope_x, &slope_y, G1i); + } + else { + microfacet_ggx_sample_slopes(costheta_, sintheta_, randu, randv, &slope_x, &slope_y, G1i); + } + + /* 3. rotate */ + float tmp = cosphi_ * slope_x - sinphi_ * slope_y; + slope_y = sinphi_ * slope_x + cosphi_ * slope_y; + slope_x = tmp; + + /* 4. unstretch */ + slope_x = alpha_x * slope_x; + slope_y = alpha_y * slope_y; + + /* 5. compute normal */ + return normalize(make_float3(-slope_x, -slope_y, 1.0f)); } /* Calculate the reflection color @@ -240,27 +249,29 @@ ccl_device_forceinline float3 microfacet_sample_stretched( * * Else it is simply white */ -ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H) { - float3 F = make_float3(1.0f, 1.0f, 1.0f); - bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID - || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID - || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID); +ccl_device_forceinline float3 reflection_color(const MicrofacetBsdf *bsdf, float3 L, float3 H) +{ + float3 F = make_float3(1.0f, 1.0f, 1.0f); + bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || + bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID || + bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID); - if(use_fresnel) { - float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + if (use_fresnel) { + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); - F = interpolate_fresnel_color(L, H, bsdf->ior, F0, bsdf->extra->cspec0); - } + F = interpolate_fresnel_color(L, H, bsdf->ior, F0, bsdf->extra->cspec0); + } - return F; + return F; } ccl_device_forceinline float D_GTR1(float NdotH, float alpha) { - if(alpha >= 1.0f) return M_1_PI_F; - float alpha2 = alpha*alpha; - float t = 1.0f + (alpha2 - 1.0f) * NdotH*NdotH; - return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t); + if (alpha >= 1.0f) + return M_1_PI_F; + float alpha2 = alpha * alpha; + float t = 1.0f + (alpha2 - 1.0f) * NdotH * NdotH; + return (alpha2 - 1.0f) / (M_PI_F * logf(alpha2) * t); } /* GGX microfacet with Smith shadow-masking from: @@ -278,483 +289,511 @@ ccl_device_forceinline float D_GTR1(float NdotH, float alpha) ccl_device int bsdf_microfacet_ggx_setup(MicrofacetBsdf *bsdf) { - bsdf->extra = NULL; + bsdf->extra = NULL; - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { - bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); - bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); - bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); + bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); + bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); - float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); - float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); - bsdf->sample_weight *= F; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_clearcoat_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { - bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); - bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); - bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); + bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); + bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); - float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); - float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); - bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= 0.25f * bsdf->extra->clearcoat * F; - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device bool bsdf_microfacet_merge(const ShaderClosure *a, const ShaderClosure *b) { - const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf*)a; - const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf*)b; - - return (isequal_float3(bsdf_a->N, bsdf_b->N)) && - (bsdf_a->alpha_x == bsdf_b->alpha_x) && - (bsdf_a->alpha_y == bsdf_b->alpha_y) && - (isequal_float3(bsdf_a->T, bsdf_b->T)) && - (bsdf_a->ior == bsdf_b->ior) && - ((bsdf_a->extra == NULL && bsdf_b->extra == NULL) || - ((bsdf_a->extra && bsdf_b->extra) && - (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color)) && - (isequal_float3(bsdf_a->extra->cspec0, bsdf_b->extra->cspec0)) && - (bsdf_a->extra->clearcoat == bsdf_b->extra->clearcoat))); + const MicrofacetBsdf *bsdf_a = (const MicrofacetBsdf *)a; + const MicrofacetBsdf *bsdf_b = (const MicrofacetBsdf *)b; + + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->alpha_x == bsdf_b->alpha_x) && + (bsdf_a->alpha_y == bsdf_b->alpha_y) && (isequal_float3(bsdf_a->T, bsdf_b->T)) && + (bsdf_a->ior == bsdf_b->ior) && + ((bsdf_a->extra == NULL && bsdf_b->extra == NULL) || + ((bsdf_a->extra && bsdf_b->extra) && + (isequal_float3(bsdf_a->extra->color, bsdf_b->extra->color)) && + (isequal_float3(bsdf_a->extra->cspec0, bsdf_b->extra->cspec0)) && + (bsdf_a->extra->clearcoat == bsdf_b->extra->clearcoat))); } ccl_device int bsdf_microfacet_ggx_aniso_setup(MicrofacetBsdf *bsdf) { - bsdf->extra = NULL; + bsdf->extra = NULL; - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = saturate(bsdf->alpha_y); + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = saturate(bsdf->alpha_y); - bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { - bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); - bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); - bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); + bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); + bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); - float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); - float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); - bsdf->sample_weight *= F; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = saturate(bsdf->alpha_y); + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = saturate(bsdf->alpha_y); - bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_refraction_setup(MicrofacetBsdf *bsdf) { - bsdf->extra = NULL; + bsdf->extra = NULL; - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; - bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); - bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } -ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_microfacet_ggx_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float alpha_x = bsdf->alpha_x; - float alpha_y = bsdf->alpha_y; - bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - float3 N = bsdf->N; - - if(m_refractive || alpha_x*alpha_y <= 1e-7f) - return make_float3(0.0f, 0.0f, 0.0f); - - float cosNO = dot(N, I); - float cosNI = dot(N, omega_in); - - if(cosNI > 0 && cosNO > 0) { - /* get half vector */ - float3 m = normalize(omega_in + I); - float alpha2 = alpha_x * alpha_y; - float D, G1o, G1i; - - if(alpha_x == alpha_y) { - /* isotropic - * eq. 20: (F*G*D)/(4*in*on) - * eq. 33: first we calculate D(m) */ - float cosThetaM = dot(N, m); - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; - - if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { - /* use GTR1 for clearcoat */ - D = D_GTR1(cosThetaM, bsdf->alpha_x); - - /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */ - alpha2 = 0.0625f; - } - else { - /* use GTR2 otherwise */ - D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); - } - - /* eq. 34: now calculate G1(i,m) and G1(o,m) */ - G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO))); - G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); - } - else { - /* anisotropic */ - float3 X, Y, Z = N; - make_orthonormals_tangent(Z, bsdf->T, &X, &Y); - - /* distribution */ - float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); - float slope_x = -local_m.x/(local_m.z*alpha_x); - float slope_y = -local_m.y/(local_m.z*alpha_y); - float slope_len = 1 + slope_x*slope_x + slope_y*slope_y; - - float cosThetaM = local_m.z; - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - - D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4); - - /* G1(i,m) and G1(o,m) */ - float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO); - float cosPhiO = dot(I, X); - float sinPhiO = dot(I, Y); - - float alphaO2 = (cosPhiO*cosPhiO)*(alpha_x*alpha_x) + (sinPhiO*sinPhiO)*(alpha_y*alpha_y); - alphaO2 /= cosPhiO*cosPhiO + sinPhiO*sinPhiO; - - G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2)); - - float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI); - float cosPhiI = dot(omega_in, X); - float sinPhiI = dot(omega_in, Y); - - float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y); - alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI; - - G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2)); - } - - float G = G1o * G1i; - - /* eq. 20 */ - float common = D * 0.25f / cosNO; - - float3 F = reflection_color(bsdf, omega_in, m); - if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { - F *= 0.25f * bsdf->extra->clearcoat; - } - - float3 out = F * G * common; - - /* eq. 2 in distribution of visible normals sampling - * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ - - /* eq. 38 - but see also: - * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf - * pdf = pm * 0.25 / dot(m, I); */ - *pdf = G1o * common; - - return out; - } - - return make_float3(0.0f, 0.0f, 0.0f); + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + float3 N = bsdf->N; + + if (m_refractive || alpha_x * alpha_y <= 1e-7f) + return make_float3(0.0f, 0.0f, 0.0f); + + float cosNO = dot(N, I); + float cosNI = dot(N, omega_in); + + if (cosNI > 0 && cosNO > 0) { + /* get half vector */ + float3 m = normalize(omega_in + I); + float alpha2 = alpha_x * alpha_y; + float D, G1o, G1i; + + if (alpha_x == alpha_y) { + /* isotropic + * eq. 20: (F*G*D)/(4*in*on) + * eq. 33: first we calculate D(m) */ + float cosThetaM = dot(N, m); + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; + + if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { + /* use GTR1 for clearcoat */ + D = D_GTR1(cosThetaM, bsdf->alpha_x); + + /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */ + alpha2 = 0.0625f; + } + else { + /* use GTR2 otherwise */ + D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); + } + + /* eq. 34: now calculate G1(i,m) and G1(o,m) */ + G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO))); + G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); + } + else { + /* anisotropic */ + float3 X, Y, Z = N; + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); + + /* distribution */ + float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); + float slope_x = -local_m.x / (local_m.z * alpha_x); + float slope_y = -local_m.y / (local_m.z * alpha_y); + float slope_len = 1 + slope_x * slope_x + slope_y * slope_y; + + float cosThetaM = local_m.z; + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + + D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4); + + /* G1(i,m) and G1(o,m) */ + float tanThetaO2 = (1 - cosNO * cosNO) / (cosNO * cosNO); + float cosPhiO = dot(I, X); + float sinPhiO = dot(I, Y); + + float alphaO2 = (cosPhiO * cosPhiO) * (alpha_x * alpha_x) + + (sinPhiO * sinPhiO) * (alpha_y * alpha_y); + alphaO2 /= cosPhiO * cosPhiO + sinPhiO * sinPhiO; + + G1o = 2 / (1 + safe_sqrtf(1 + alphaO2 * tanThetaO2)); + + float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI); + float cosPhiI = dot(omega_in, X); + float sinPhiI = dot(omega_in, Y); + + float alphaI2 = (cosPhiI * cosPhiI) * (alpha_x * alpha_x) + + (sinPhiI * sinPhiI) * (alpha_y * alpha_y); + alphaI2 /= cosPhiI * cosPhiI + sinPhiI * sinPhiI; + + G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2)); + } + + float G = G1o * G1i; + + /* eq. 20 */ + float common = D * 0.25f / cosNO; + + float3 F = reflection_color(bsdf, omega_in, m); + if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { + F *= 0.25f * bsdf->extra->clearcoat; + } + + float3 out = F * G * common; + + /* eq. 2 in distribution of visible normals sampling + * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ + + /* eq. 38 - but see also: + * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf + * pdf = pm * 0.25 / dot(m, I); */ + *pdf = G1o * common; + + return out; + } + + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_microfacet_ggx_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float alpha_x = bsdf->alpha_x; - float alpha_y = bsdf->alpha_y; - float m_eta = bsdf->ior; - bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - float3 N = bsdf->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + float m_eta = bsdf->ior; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + float3 N = bsdf->N; - if(!m_refractive || alpha_x*alpha_y <= 1e-7f) - return make_float3(0.0f, 0.0f, 0.0f); + if (!m_refractive || alpha_x * alpha_y <= 1e-7f) + return make_float3(0.0f, 0.0f, 0.0f); - float cosNO = dot(N, I); - float cosNI = dot(N, omega_in); + float cosNO = dot(N, I); + float cosNI = dot(N, omega_in); - if(cosNO <= 0 || cosNI >= 0) - return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */ + if (cosNO <= 0 || cosNI >= 0) + return make_float3(0.0f, 0.0f, 0.0f); /* vectors on same side -- not possible */ - /* compute half-vector of the refraction (eq. 16) */ - float3 ht = -(m_eta * omega_in + I); - float3 Ht = normalize(ht); - float cosHO = dot(Ht, I); - float cosHI = dot(Ht, omega_in); + /* compute half-vector of the refraction (eq. 16) */ + float3 ht = -(m_eta * omega_in + I); + float3 Ht = normalize(ht); + float cosHO = dot(Ht, I); + float cosHI = dot(Ht, omega_in); - float D, G1o, G1i; + float D, G1o, G1i; - /* eq. 33: first we calculate D(m) with m=Ht: */ - float alpha2 = alpha_x * alpha_y; - float cosThetaM = dot(N, Ht); - float cosThetaM2 = cosThetaM * cosThetaM; - float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); + /* eq. 33: first we calculate D(m) with m=Ht: */ + float alpha2 = alpha_x * alpha_y; + float cosThetaM = dot(N, Ht); + float cosThetaM2 = cosThetaM * cosThetaM; + float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); - /* eq. 34: now calculate G1(i,m) and G1(o,m) */ - G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO))); - G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); + /* eq. 34: now calculate G1(i,m) and G1(o,m) */ + G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO))); + G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); - float G = G1o * G1i; + float G = G1o * G1i; - /* probability */ - float Ht2 = dot(ht, ht); + /* probability */ + float Ht2 = dot(ht, ht); - /* eq. 2 in distribution of visible normals sampling - * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ + /* eq. 2 in distribution of visible normals sampling + * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ - /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2) - * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */ - float common = D * (m_eta * m_eta) / (cosNO * Ht2); - float out = G * fabsf(cosHI * cosHO) * common; - *pdf = G1o * fabsf(cosHO * cosHI) * common; + /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2) + * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */ + float common = D * (m_eta * m_eta) / (cosNO * Ht2); + float out = G * fabsf(cosHI * cosHO) * common; + *pdf = G1o * fabsf(cosHO * cosHI) * common; - return make_float3(out, out, out); + return make_float3(out, out, out); } -ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, + const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float alpha_x = bsdf->alpha_x; - float alpha_y = bsdf->alpha_y; - bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - float3 N = bsdf->N; - int label; - - float cosNO = dot(N, I); - if(cosNO > 0) { - float3 X, Y, Z = N; - - if(alpha_x == alpha_y) - make_orthonormals(Z, &X, &Y); - else - make_orthonormals_tangent(Z, bsdf->T, &X, &Y); - - /* importance sampling with distribution of visible normals. vectors are - * transformed to local space before and after */ - float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO); - float3 local_m; - float G1o; - - local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_y, - randu, randv, false, &G1o); - - float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z; - float cosThetaM = local_m.z; - - /* reflection or refraction? */ - if(!m_refractive) { - float cosMO = dot(m, I); - label = LABEL_REFLECT | LABEL_GLOSSY; - - if(cosMO > 0) { - /* eq. 39 - compute actual reflected direction */ - *omega_in = 2 * cosMO * m - I; - - if(dot(Ng, *omega_in) > 0) { - if(alpha_x*alpha_y <= 1e-7f) { - /* some high number for MIS */ - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - - bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID - || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID - || bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID); - - /* if fresnel is used, calculate the color with reflection_color(...) */ - if(use_fresnel) { - *eval *= reflection_color(bsdf, *omega_in, m); - } - - label = LABEL_REFLECT | LABEL_SINGULAR; - } - else { - /* microfacet normal is visible to this ray */ - /* eq. 33 */ - float alpha2 = alpha_x * alpha_y; - float D, G1i; - - if(alpha_x == alpha_y) { - /* isotropic */ - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - float tanThetaM2 = 1/(cosThetaM2) - 1; - - /* eval BRDF*cosNI */ - float cosNI = dot(N, *omega_in); - - if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { - /* use GTR1 for clearcoat */ - D = D_GTR1(cosThetaM, bsdf->alpha_x); - - /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */ - alpha2 = 0.0625f; - - /* recalculate G1o */ - G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO))); - } - else { - /* use GTR2 otherwise */ - D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); - } - - /* eq. 34: now calculate G1(i,m) */ - G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); - } - else { - /* anisotropic distribution */ - float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); - float slope_x = -local_m.x/(local_m.z*alpha_x); - float slope_y = -local_m.y/(local_m.z*alpha_y); - float slope_len = 1 + slope_x*slope_x + slope_y*slope_y; - - float cosThetaM = local_m.z; - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - - D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4); - - /* calculate G1(i,m) */ - float cosNI = dot(N, *omega_in); - - float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI); - float cosPhiI = dot(*omega_in, X); - float sinPhiI = dot(*omega_in, Y); - - float alphaI2 = (cosPhiI*cosPhiI)*(alpha_x*alpha_x) + (sinPhiI*sinPhiI)*(alpha_y*alpha_y); - alphaI2 /= cosPhiI*cosPhiI + sinPhiI*sinPhiI; - - G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2)); - } - - /* see eval function for derivation */ - float common = (G1o * D) * 0.25f / cosNO; - *pdf = common; - - float3 F = reflection_color(bsdf, *omega_in, m); - - *eval = G1i * common * F; - } - - if(bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { - *eval *= 0.25f * bsdf->extra->clearcoat; - } + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; + float3 N = bsdf->N; + int label; + + float cosNO = dot(N, I); + if (cosNO > 0) { + float3 X, Y, Z = N; + + if (alpha_x == alpha_y) + make_orthonormals(Z, &X, &Y); + else + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); + + /* importance sampling with distribution of visible normals. vectors are + * transformed to local space before and after */ + float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO); + float3 local_m; + float G1o; + + local_m = microfacet_sample_stretched( + kg, local_I, alpha_x, alpha_y, randu, randv, false, &G1o); + + float3 m = X * local_m.x + Y * local_m.y + Z * local_m.z; + float cosThetaM = local_m.z; + + /* reflection or refraction? */ + if (!m_refractive) { + float cosMO = dot(m, I); + label = LABEL_REFLECT | LABEL_GLOSSY; + + if (cosMO > 0) { + /* eq. 39 - compute actual reflected direction */ + *omega_in = 2 * cosMO * m - I; + + if (dot(Ng, *omega_in) > 0) { + if (alpha_x * alpha_y <= 1e-7f) { + /* some high number for MIS */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + + bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID || + bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID || + bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID); + + /* if fresnel is used, calculate the color with reflection_color(...) */ + if (use_fresnel) { + *eval *= reflection_color(bsdf, *omega_in, m); + } + + label = LABEL_REFLECT | LABEL_SINGULAR; + } + else { + /* microfacet normal is visible to this ray */ + /* eq. 33 */ + float alpha2 = alpha_x * alpha_y; + float D, G1i; + + if (alpha_x == alpha_y) { + /* isotropic */ + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + float tanThetaM2 = 1 / (cosThetaM2)-1; + + /* eval BRDF*cosNI */ + float cosNI = dot(N, *omega_in); + + if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { + /* use GTR1 for clearcoat */ + D = D_GTR1(cosThetaM, bsdf->alpha_x); + + /* the alpha value for clearcoat is a fixed 0.25 => alpha2 = 0.25 * 0.25 */ + alpha2 = 0.0625f; + + /* recalculate G1o */ + G1o = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNO * cosNO) / (cosNO * cosNO))); + } + else { + /* use GTR2 otherwise */ + D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); + } + + /* eq. 34: now calculate G1(i,m) */ + G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); + } + else { + /* anisotropic distribution */ + float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); + float slope_x = -local_m.x / (local_m.z * alpha_x); + float slope_y = -local_m.y / (local_m.z * alpha_y); + float slope_len = 1 + slope_x * slope_x + slope_y * slope_y; + + float cosThetaM = local_m.z; + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + + D = 1 / ((slope_len * slope_len) * M_PI_F * alpha2 * cosThetaM4); + + /* calculate G1(i,m) */ + float cosNI = dot(N, *omega_in); + + float tanThetaI2 = (1 - cosNI * cosNI) / (cosNI * cosNI); + float cosPhiI = dot(*omega_in, X); + float sinPhiI = dot(*omega_in, Y); + + float alphaI2 = (cosPhiI * cosPhiI) * (alpha_x * alpha_x) + + (sinPhiI * sinPhiI) * (alpha_y * alpha_y); + alphaI2 /= cosPhiI * cosPhiI + sinPhiI * sinPhiI; + + G1i = 2 / (1 + safe_sqrtf(1 + alphaI2 * tanThetaI2)); + } + + /* see eval function for derivation */ + float common = (G1o * D) * 0.25f / cosNO; + *pdf = common; + + float3 F = reflection_color(bsdf, *omega_in, m); + + *eval = G1i * common * F; + } + + if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) { + *eval *= 0.25f * bsdf->extra->clearcoat; + } #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; - *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; + *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; + *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; #endif - } - } - } - else { - label = LABEL_TRANSMIT | LABEL_GLOSSY; - - /* CAUTION: the i and o variables are inverted relative to the paper - * eq. 39 - compute actual refractive direction */ - float3 R, T; + } + } + } + else { + label = LABEL_TRANSMIT | LABEL_GLOSSY; + + /* CAUTION: the i and o variables are inverted relative to the paper + * eq. 39 - compute actual refractive direction */ + float3 R, T; #ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; + float3 dRdx, dRdy, dTdx, dTdy; #endif - float m_eta = bsdf->ior, fresnel; - bool inside; - - fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, + float m_eta = bsdf->ior, fresnel; + bool inside; + + fresnel = fresnel_dielectric(m_eta, + m, + I, + &R, + &T, #ifdef __RAY_DIFFERENTIALS__ - dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy, + dIdx, + dIdy, + &dRdx, + &dRdy, + &dTdx, + &dTdy, #endif - &inside); + &inside); - if(!inside && fresnel != 1.0f) { + if (!inside && fresnel != 1.0f) { - *omega_in = T; + *omega_in = T; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; + *domega_in_dx = dTdx; + *domega_in_dy = dTdy; #endif - if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { - /* some high number for MIS */ - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - label = LABEL_TRANSMIT | LABEL_SINGULAR; - } - else { - /* eq. 33 */ - float alpha2 = alpha_x * alpha_y; - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - float tanThetaM2 = 1/(cosThetaM2) - 1; - float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); - - /* eval BRDF*cosNI */ - float cosNI = dot(N, *omega_in); - - /* eq. 34: now calculate G1(i,m) */ - float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); - - /* eq. 21 */ - float cosHI = dot(m, *omega_in); - float cosHO = dot(m, I); - float Ht2 = m_eta * cosHI + cosHO; - Ht2 *= Ht2; - - /* see eval function for derivation */ - float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2); - float out = G1i * fabsf(cosHI * cosHO) * common; - *pdf = cosHO * fabsf(cosHI) * common; - - *eval = make_float3(out, out, out); - } - } - } - } - else { - label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY; - } - return label; + if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { + /* some high number for MIS */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + label = LABEL_TRANSMIT | LABEL_SINGULAR; + } + else { + /* eq. 33 */ + float alpha2 = alpha_x * alpha_y; + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + float tanThetaM2 = 1 / (cosThetaM2)-1; + float D = alpha2 / (M_PI_F * cosThetaM4 * (alpha2 + tanThetaM2) * (alpha2 + tanThetaM2)); + + /* eval BRDF*cosNI */ + float cosNI = dot(N, *omega_in); + + /* eq. 34: now calculate G1(i,m) */ + float G1i = 2 / (1 + safe_sqrtf(1 + alpha2 * (1 - cosNI * cosNI) / (cosNI * cosNI))); + + /* eq. 21 */ + float cosHI = dot(m, *omega_in); + float cosHO = dot(m, I); + float Ht2 = m_eta * cosHI + cosHO; + Ht2 *= Ht2; + + /* see eval function for derivation */ + float common = (G1o * D) * (m_eta * m_eta) / (cosNO * Ht2); + float out = G1i * fabsf(cosHI * cosHO) * common; + *pdf = cosHO * fabsf(cosHI) * common; + + *eval = make_float3(out, out, out); + } + } + } + } + else { + label = (m_refractive) ? LABEL_TRANSMIT | LABEL_GLOSSY : LABEL_REFLECT | LABEL_GLOSSY; + } + return label; } /* Beckmann microfacet with Smith shadow-masking from: @@ -764,364 +803,392 @@ ccl_device int bsdf_microfacet_ggx_sample(KernelGlobals *kg, const ShaderClosure ccl_device int bsdf_microfacet_beckmann_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_beckmann_aniso_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = saturate(bsdf->alpha_y); + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = saturate(bsdf->alpha_y); - bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_beckmann_refraction_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_x = saturate(bsdf->alpha_x); - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_x = saturate(bsdf->alpha_x); + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; - bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); - bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } ccl_device_inline float bsdf_beckmann_G1(float alpha, float cos_n) { - cos_n *= cos_n; - float invA = alpha * safe_sqrtf((1.0f - cos_n) / cos_n); - if(invA < 0.625f) { - return 1.0f; - } - - float a = 1.0f / invA; - return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f); + cos_n *= cos_n; + float invA = alpha * safe_sqrtf((1.0f - cos_n) / cos_n); + if (invA < 0.625f) { + return 1.0f; + } + + float a = 1.0f / invA; + return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f); } -ccl_device_inline float bsdf_beckmann_aniso_G1(float alpha_x, float alpha_y, float cos_n, float cos_phi, float sin_phi) +ccl_device_inline float bsdf_beckmann_aniso_G1( + float alpha_x, float alpha_y, float cos_n, float cos_phi, float sin_phi) { - cos_n *= cos_n; - sin_phi *= sin_phi; - cos_phi *= cos_phi; - alpha_x *= alpha_x; - alpha_y *= alpha_y; - - float alphaO2 = (cos_phi*alpha_x + sin_phi*alpha_y) / (cos_phi + sin_phi); - float invA = safe_sqrtf(alphaO2 * (1 - cos_n) / cos_n); - if(invA < 0.625f) { - return 1.0f; - } - - float a = 1.0f / invA; - return ((2.181f*a + 3.535f)*a) / ((2.577f*a + 2.276f)*a + 1.0f); + cos_n *= cos_n; + sin_phi *= sin_phi; + cos_phi *= cos_phi; + alpha_x *= alpha_x; + alpha_y *= alpha_y; + + float alphaO2 = (cos_phi * alpha_x + sin_phi * alpha_y) / (cos_phi + sin_phi); + float invA = safe_sqrtf(alphaO2 * (1 - cos_n) / cos_n); + if (invA < 0.625f) { + return 1.0f; + } + + float a = 1.0f / invA; + return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f); } -ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float alpha_x = bsdf->alpha_x; - float alpha_y = bsdf->alpha_y; - bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - float3 N = bsdf->N; - - if(m_refractive || alpha_x*alpha_y <= 1e-7f) - return make_float3(0.0f, 0.0f, 0.0f); - - float cosNO = dot(N, I); - float cosNI = dot(N, omega_in); - - if(cosNO > 0 && cosNI > 0) { - /* get half vector */ - float3 m = normalize(omega_in + I); - - float alpha2 = alpha_x * alpha_y; - float D, G1o, G1i; - - if(alpha_x == alpha_y) { - /* isotropic - * eq. 20: (F*G*D)/(4*in*on) - * eq. 25: first we calculate D(m) */ - float cosThetaM = dot(N, m); - float cosThetaM2 = cosThetaM * cosThetaM; - float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); - - /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */ - G1o = bsdf_beckmann_G1(alpha_x, cosNO); - G1i = bsdf_beckmann_G1(alpha_x, cosNI); - } - else { - /* anisotropic */ - float3 X, Y, Z = N; - make_orthonormals_tangent(Z, bsdf->T, &X, &Y); - - /* distribution */ - float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); - float slope_x = -local_m.x/(local_m.z*alpha_x); - float slope_y = -local_m.y/(local_m.z*alpha_y); - - float cosThetaM = local_m.z; - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - - D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4); - - /* G1(i,m) and G1(o,m) */ - G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y)); - G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y)); - } - - float G = G1o * G1i; - - /* eq. 20 */ - float common = D * 0.25f / cosNO; - float out = G * common; - - /* eq. 2 in distribution of visible normals sampling - * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ - - /* eq. 38 - but see also: - * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf - * pdf = pm * 0.25 / dot(m, I); */ - *pdf = G1o * common; - - return make_float3(out, out, out); - } - - return make_float3(0.0f, 0.0f, 0.0f); + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + float3 N = bsdf->N; + + if (m_refractive || alpha_x * alpha_y <= 1e-7f) + return make_float3(0.0f, 0.0f, 0.0f); + + float cosNO = dot(N, I); + float cosNI = dot(N, omega_in); + + if (cosNO > 0 && cosNI > 0) { + /* get half vector */ + float3 m = normalize(omega_in + I); + + float alpha2 = alpha_x * alpha_y; + float D, G1o, G1i; + + if (alpha_x == alpha_y) { + /* isotropic + * eq. 20: (F*G*D)/(4*in*on) + * eq. 25: first we calculate D(m) */ + float cosThetaM = dot(N, m); + float cosThetaM2 = cosThetaM * cosThetaM; + float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); + + /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */ + G1o = bsdf_beckmann_G1(alpha_x, cosNO); + G1i = bsdf_beckmann_G1(alpha_x, cosNI); + } + else { + /* anisotropic */ + float3 X, Y, Z = N; + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); + + /* distribution */ + float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); + float slope_x = -local_m.x / (local_m.z * alpha_x); + float slope_y = -local_m.y / (local_m.z * alpha_y); + + float cosThetaM = local_m.z; + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + + D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4); + + /* G1(i,m) and G1(o,m) */ + G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y)); + G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y)); + } + + float G = G1o * G1i; + + /* eq. 20 */ + float common = D * 0.25f / cosNO; + float out = G * common; + + /* eq. 2 in distribution of visible normals sampling + * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ + + /* eq. 38 - but see also: + * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf + * pdf = pm * 0.25 / dot(m, I); */ + *pdf = G1o * common; + + return make_float3(out, out, out); + } + + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float alpha_x = bsdf->alpha_x; - float alpha_y = bsdf->alpha_y; - float m_eta = bsdf->ior; - bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - float3 N = bsdf->N; - - if(!m_refractive || alpha_x*alpha_y <= 1e-7f) - return make_float3(0.0f, 0.0f, 0.0f); - - float cosNO = dot(N, I); - float cosNI = dot(N, omega_in); - - if(cosNO <= 0 || cosNI >= 0) - return make_float3(0.0f, 0.0f, 0.0f); - - /* compute half-vector of the refraction (eq. 16) */ - float3 ht = -(m_eta * omega_in + I); - float3 Ht = normalize(ht); - float cosHO = dot(Ht, I); - float cosHI = dot(Ht, omega_in); - - /* eq. 25: first we calculate D(m) with m=Ht: */ - float alpha2 = alpha_x * alpha_y; - float cosThetaM = min(dot(N, Ht), 1.0f); - float cosThetaM2 = cosThetaM * cosThetaM; - float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); - - /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */ - float G1o = bsdf_beckmann_G1(alpha_x, cosNO); - float G1i = bsdf_beckmann_G1(alpha_x, cosNI); - float G = G1o * G1i; - - /* probability */ - float Ht2 = dot(ht, ht); - - /* eq. 2 in distribution of visible normals sampling - * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ - - /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2) - * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */ - float common = D * (m_eta * m_eta) / (cosNO * Ht2); - float out = G * fabsf(cosHI * cosHO) * common; - *pdf = G1o * fabsf(cosHO * cosHI) * common; - - return make_float3(out, out, out); + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + float m_eta = bsdf->ior; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + float3 N = bsdf->N; + + if (!m_refractive || alpha_x * alpha_y <= 1e-7f) + return make_float3(0.0f, 0.0f, 0.0f); + + float cosNO = dot(N, I); + float cosNI = dot(N, omega_in); + + if (cosNO <= 0 || cosNI >= 0) + return make_float3(0.0f, 0.0f, 0.0f); + + /* compute half-vector of the refraction (eq. 16) */ + float3 ht = -(m_eta * omega_in + I); + float3 Ht = normalize(ht); + float cosHO = dot(Ht, I); + float cosHI = dot(Ht, omega_in); + + /* eq. 25: first we calculate D(m) with m=Ht: */ + float alpha2 = alpha_x * alpha_y; + float cosThetaM = min(dot(N, Ht), 1.0f); + float cosThetaM2 = cosThetaM * cosThetaM; + float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); + + /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */ + float G1o = bsdf_beckmann_G1(alpha_x, cosNO); + float G1i = bsdf_beckmann_G1(alpha_x, cosNI); + float G = G1o * G1i; + + /* probability */ + float Ht2 = dot(ht, ht); + + /* eq. 2 in distribution of visible normals sampling + * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */ + + /* out = fabsf(cosHI * cosHO) * (m_eta * m_eta) * G * D / (cosNO * Ht2) + * pdf = pm * (m_eta * m_eta) * fabsf(cosHI) / Ht2 */ + float common = D * (m_eta * m_eta) / (cosNO * Ht2); + float out = G * fabsf(cosHI * cosHO) * common; + *pdf = G1o * fabsf(cosHO * cosHI) * common; + + return make_float3(out, out, out); } -ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals *kg, + const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float alpha_x = bsdf->alpha_x; - float alpha_y = bsdf->alpha_y; - bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - float3 N = bsdf->N; - int label; - - float cosNO = dot(N, I); - if(cosNO > 0) { - float3 X, Y, Z = N; - - if(alpha_x == alpha_y) - make_orthonormals(Z, &X, &Y); - else - make_orthonormals_tangent(Z, bsdf->T, &X, &Y); - - /* importance sampling with distribution of visible normals. vectors are - * transformed to local space before and after */ - float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO); - float3 local_m; - float G1o; - - local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x, - randu, randv, true, &G1o); - - float3 m = X*local_m.x + Y*local_m.y + Z*local_m.z; - float cosThetaM = local_m.z; - - /* reflection or refraction? */ - if(!m_refractive) { - label = LABEL_REFLECT | LABEL_GLOSSY; - float cosMO = dot(m, I); - - if(cosMO > 0) { - /* eq. 39 - compute actual reflected direction */ - *omega_in = 2 * cosMO * m - I; - - if(dot(Ng, *omega_in) > 0) { - if(alpha_x*alpha_y <= 1e-7f) { - /* some high number for MIS */ - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - label = LABEL_REFLECT | LABEL_SINGULAR; - } - else { - /* microfacet normal is visible to this ray - * eq. 25 */ - float alpha2 = alpha_x * alpha_y; - float D, G1i; - - if(alpha_x == alpha_y) { - /* istropic distribution */ - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - float tanThetaM2 = 1/(cosThetaM2) - 1; - D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); - - /* eval BRDF*cosNI */ - float cosNI = dot(N, *omega_in); - - /* eq. 26, 27: now calculate G1(i,m) */ - G1i = bsdf_beckmann_G1(alpha_x, cosNI); - } - else { - /* anisotropic distribution */ - float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); - float slope_x = -local_m.x/(local_m.z*alpha_x); - float slope_y = -local_m.y/(local_m.z*alpha_y); - - float cosThetaM = local_m.z; - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - - D = expf(-slope_x*slope_x - slope_y*slope_y) / (M_PI_F * alpha2 * cosThetaM4); - - /* G1(i,m) */ - G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, dot(*omega_in, N), dot(*omega_in, X), dot(*omega_in, Y)); - } - - float G = G1o * G1i; - - /* see eval function for derivation */ - float common = D * 0.25f / cosNO; - float out = G * common; - *pdf = G1o * common; - - *eval = make_float3(out, out, out); - } + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float alpha_x = bsdf->alpha_x; + float alpha_y = bsdf->alpha_y; + bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; + float3 N = bsdf->N; + int label; + + float cosNO = dot(N, I); + if (cosNO > 0) { + float3 X, Y, Z = N; + + if (alpha_x == alpha_y) + make_orthonormals(Z, &X, &Y); + else + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); + + /* importance sampling with distribution of visible normals. vectors are + * transformed to local space before and after */ + float3 local_I = make_float3(dot(X, I), dot(Y, I), cosNO); + float3 local_m; + float G1o; + + local_m = microfacet_sample_stretched(kg, local_I, alpha_x, alpha_x, randu, randv, true, &G1o); + + float3 m = X * local_m.x + Y * local_m.y + Z * local_m.z; + float cosThetaM = local_m.z; + + /* reflection or refraction? */ + if (!m_refractive) { + label = LABEL_REFLECT | LABEL_GLOSSY; + float cosMO = dot(m, I); + + if (cosMO > 0) { + /* eq. 39 - compute actual reflected direction */ + *omega_in = 2 * cosMO * m - I; + + if (dot(Ng, *omega_in) > 0) { + if (alpha_x * alpha_y <= 1e-7f) { + /* some high number for MIS */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + label = LABEL_REFLECT | LABEL_SINGULAR; + } + else { + /* microfacet normal is visible to this ray + * eq. 25 */ + float alpha2 = alpha_x * alpha_y; + float D, G1i; + + if (alpha_x == alpha_y) { + /* istropic distribution */ + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + float tanThetaM2 = 1 / (cosThetaM2)-1; + D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); + + /* eval BRDF*cosNI */ + float cosNI = dot(N, *omega_in); + + /* eq. 26, 27: now calculate G1(i,m) */ + G1i = bsdf_beckmann_G1(alpha_x, cosNI); + } + else { + /* anisotropic distribution */ + float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m)); + float slope_x = -local_m.x / (local_m.z * alpha_x); + float slope_y = -local_m.y / (local_m.z * alpha_y); + + float cosThetaM = local_m.z; + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + + D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4); + + /* G1(i,m) */ + G1i = bsdf_beckmann_aniso_G1( + alpha_x, alpha_y, dot(*omega_in, N), dot(*omega_in, X), dot(*omega_in, Y)); + } + + float G = G1o * G1i; + + /* see eval function for derivation */ + float common = D * 0.25f / cosNO; + float out = G * common; + *pdf = G1o * common; + + *eval = make_float3(out, out, out); + } #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; - *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; + *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx; + *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy; #endif - } - } - } - else { - label = LABEL_TRANSMIT | LABEL_GLOSSY; - - /* CAUTION: the i and o variables are inverted relative to the paper - * eq. 39 - compute actual refractive direction */ - float3 R, T; + } + } + } + else { + label = LABEL_TRANSMIT | LABEL_GLOSSY; + + /* CAUTION: the i and o variables are inverted relative to the paper + * eq. 39 - compute actual refractive direction */ + float3 R, T; #ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; + float3 dRdx, dRdy, dTdx, dTdy; #endif - float m_eta = bsdf->ior, fresnel; - bool inside; - - fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, + float m_eta = bsdf->ior, fresnel; + bool inside; + + fresnel = fresnel_dielectric(m_eta, + m, + I, + &R, + &T, #ifdef __RAY_DIFFERENTIALS__ - dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy, + dIdx, + dIdy, + &dRdx, + &dRdy, + &dTdx, + &dTdy, #endif - &inside); + &inside); - if(!inside && fresnel != 1.0f) { - *omega_in = T; + if (!inside && fresnel != 1.0f) { + *omega_in = T; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; + *domega_in_dx = dTdx; + *domega_in_dy = dTdy; #endif - if(alpha_x*alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { - /* some high number for MIS */ - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - label = LABEL_TRANSMIT | LABEL_SINGULAR; - } - else { - /* eq. 33 */ - float alpha2 = alpha_x * alpha_y; - float cosThetaM2 = cosThetaM * cosThetaM; - float cosThetaM4 = cosThetaM2 * cosThetaM2; - float tanThetaM2 = 1/(cosThetaM2) - 1; - float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); - - /* eval BRDF*cosNI */ - float cosNI = dot(N, *omega_in); - - /* eq. 26, 27: now calculate G1(i,m) */ - float G1i = bsdf_beckmann_G1(alpha_x, cosNI); - float G = G1o * G1i; - - /* eq. 21 */ - float cosHI = dot(m, *omega_in); - float cosHO = dot(m, I); - float Ht2 = m_eta * cosHI + cosHO; - Ht2 *= Ht2; - - /* see eval function for derivation */ - float common = D * (m_eta * m_eta) / (cosNO * Ht2); - float out = G * fabsf(cosHI * cosHO) * common; - *pdf = G1o * cosHO * fabsf(cosHI) * common; - - *eval = make_float3(out, out, out); - } - } - } - } - else { - label = (m_refractive) ? LABEL_TRANSMIT|LABEL_GLOSSY : LABEL_REFLECT|LABEL_GLOSSY; - } - return label; + if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) { + /* some high number for MIS */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + label = LABEL_TRANSMIT | LABEL_SINGULAR; + } + else { + /* eq. 33 */ + float alpha2 = alpha_x * alpha_y; + float cosThetaM2 = cosThetaM * cosThetaM; + float cosThetaM4 = cosThetaM2 * cosThetaM2; + float tanThetaM2 = 1 / (cosThetaM2)-1; + float D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4); + + /* eval BRDF*cosNI */ + float cosNI = dot(N, *omega_in); + + /* eq. 26, 27: now calculate G1(i,m) */ + float G1i = bsdf_beckmann_G1(alpha_x, cosNI); + float G = G1o * G1i; + + /* eq. 21 */ + float cosHI = dot(m, *omega_in); + float cosHO = dot(m, I); + float Ht2 = m_eta * cosHI + cosHO; + Ht2 *= Ht2; + + /* see eval function for derivation */ + float common = D * (m_eta * m_eta) / (cosNO * Ht2); + float out = G * fabsf(cosHI * cosHO) * common; + *pdf = G1o * cosHO * fabsf(cosHI) * common; + + *eval = make_float3(out, out, out); + } + } + } + } + else { + label = (m_refractive) ? LABEL_TRANSMIT | LABEL_GLOSSY : LABEL_REFLECT | LABEL_GLOSSY; + } + return label; } CCL_NAMESPACE_END -#endif /* __BSDF_MICROFACET_H__ */ +#endif /* __BSDF_MICROFACET_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h index 2f2c35d5d1f..2cc1a9c5299 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi.h @@ -23,149 +23,168 @@ CCL_NAMESPACE_BEGIN /* Isotropic GGX microfacet distribution */ ccl_device_forceinline float D_ggx(float3 wm, float alpha) { - wm.z *= wm.z; - alpha *= alpha; - float tmp = (1.0f - wm.z) + alpha * wm.z; - return alpha / max(M_PI_F * tmp*tmp, 1e-7f); + wm.z *= wm.z; + alpha *= alpha; + float tmp = (1.0f - wm.z) + alpha * wm.z; + return alpha / max(M_PI_F * tmp * tmp, 1e-7f); } /* Anisotropic GGX microfacet distribution */ ccl_device_forceinline float D_ggx_aniso(const float3 wm, const float2 alpha) { - float slope_x = -wm.x/alpha.x; - float slope_y = -wm.y/alpha.y; - float tmp = wm.z*wm.z + slope_x*slope_x + slope_y*slope_y; + float slope_x = -wm.x / alpha.x; + float slope_y = -wm.y / alpha.y; + float tmp = wm.z * wm.z + slope_x * slope_x + slope_y * slope_y; - return 1.0f / max(M_PI_F * tmp*tmp * alpha.x*alpha.y, 1e-7f); + return 1.0f / max(M_PI_F * tmp * tmp * alpha.x * alpha.y, 1e-7f); } /* Sample slope distribution (based on page 14 of the supplemental implementation). */ -ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, const float randx, const float randy) -{ - if(cosI > 0.9999f || fabsf(cosI) < 1e-6f) { - const float r = sqrtf(randx / max(1.0f - randx, 1e-7f)); - const float phi = M_2PI_F * randy; - return make_float2(r*cosf(phi), r*sinf(phi)); - } - - const float sinI = safe_sqrtf(1.0f - cosI*cosI); - const float tanI = sinI/cosI; - const float projA = 0.5f * (cosI + 1.0f); - if(projA < 0.0001f) - return make_float2(0.0f, 0.0f); - const float A = 2.0f*randx*projA / cosI - 1.0f; - float tmp = A*A-1.0f; - if(fabsf(tmp) < 1e-7f) - return make_float2(0.0f, 0.0f); - tmp = 1.0f / tmp; - const float D = safe_sqrtf(tanI*tanI*tmp*tmp - (A*A-tanI*tanI)*tmp); - - const float slopeX2 = tanI*tmp + D; - const float slopeX = (A < 0.0f || slopeX2 > 1.0f/tanI)? (tanI*tmp - D) : slopeX2; - - float U2; - if(randy >= 0.5f) - U2 = 2.0f*(randy - 0.5f); - else - U2 = 2.0f*(0.5f - randy); - const float z = (U2*(U2*(U2*0.27385f-0.73369f)+0.46341f)) / (U2*(U2*(U2*0.093073f+0.309420f)-1.0f)+0.597999f); - const float slopeY = z * sqrtf(1.0f + slopeX*slopeX); - - if(randy >= 0.5f) - return make_float2(slopeX, slopeY); - else - return make_float2(slopeX, -slopeY); +ccl_device_forceinline float2 mf_sampleP22_11(const float cosI, + const float randx, + const float randy) +{ + if (cosI > 0.9999f || fabsf(cosI) < 1e-6f) { + const float r = sqrtf(randx / max(1.0f - randx, 1e-7f)); + const float phi = M_2PI_F * randy; + return make_float2(r * cosf(phi), r * sinf(phi)); + } + + const float sinI = safe_sqrtf(1.0f - cosI * cosI); + const float tanI = sinI / cosI; + const float projA = 0.5f * (cosI + 1.0f); + if (projA < 0.0001f) + return make_float2(0.0f, 0.0f); + const float A = 2.0f * randx * projA / cosI - 1.0f; + float tmp = A * A - 1.0f; + if (fabsf(tmp) < 1e-7f) + return make_float2(0.0f, 0.0f); + tmp = 1.0f / tmp; + const float D = safe_sqrtf(tanI * tanI * tmp * tmp - (A * A - tanI * tanI) * tmp); + + const float slopeX2 = tanI * tmp + D; + const float slopeX = (A < 0.0f || slopeX2 > 1.0f / tanI) ? (tanI * tmp - D) : slopeX2; + + float U2; + if (randy >= 0.5f) + U2 = 2.0f * (randy - 0.5f); + else + U2 = 2.0f * (0.5f - randy); + const float z = (U2 * (U2 * (U2 * 0.27385f - 0.73369f) + 0.46341f)) / + (U2 * (U2 * (U2 * 0.093073f + 0.309420f) - 1.0f) + 0.597999f); + const float slopeY = z * sqrtf(1.0f + slopeX * slopeX); + + if (randy >= 0.5f) + return make_float2(slopeX, slopeY); + else + return make_float2(slopeX, -slopeY); } /* Visible normal sampling for the GGX distribution (based on page 7 of the supplemental implementation). */ -ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, const float2 alpha, const float randx, const float randy) +ccl_device_forceinline float3 mf_sample_vndf(const float3 wi, + const float2 alpha, + const float randx, + const float randy) { - const float3 wi_11 = normalize(make_float3(alpha.x*wi.x, alpha.y*wi.y, wi.z)); - const float2 slope_11 = mf_sampleP22_11(wi_11.z, randx, randy); + const float3 wi_11 = normalize(make_float3(alpha.x * wi.x, alpha.y * wi.y, wi.z)); + const float2 slope_11 = mf_sampleP22_11(wi_11.z, randx, randy); - const float3 cossin_phi = safe_normalize(make_float3(wi_11.x, wi_11.y, 0.0f)); - const float slope_x = alpha.x*(cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y); - const float slope_y = alpha.y*(cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y); + const float3 cossin_phi = safe_normalize(make_float3(wi_11.x, wi_11.y, 0.0f)); + const float slope_x = alpha.x * (cossin_phi.x * slope_11.x - cossin_phi.y * slope_11.y); + const float slope_y = alpha.y * (cossin_phi.y * slope_11.x + cossin_phi.x * slope_11.y); - kernel_assert(isfinite(slope_x)); - return normalize(make_float3(-slope_x, -slope_y, 1.0f)); + kernel_assert(isfinite(slope_x)); + return normalize(make_float3(-slope_x, -slope_y, 1.0f)); } /* === Phase functions: Glossy and Glass === */ /* Phase function for reflective materials. */ -ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, float3 *weight, const float3 wm) +ccl_device_forceinline float3 mf_sample_phase_glossy(const float3 wi, + float3 *weight, + const float3 wm) { - return -wi + 2.0f * wm * dot(wi, wm); + return -wi + 2.0f * wm * dot(wi, wm); } -ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, const float lambda, const float3 wo, const float2 alpha) +ccl_device_forceinline float3 mf_eval_phase_glossy(const float3 w, + const float lambda, + const float3 wo, + const float2 alpha) { - if(w.z > 0.9999f) - return make_float3(0.0f, 0.0f, 0.0f); + if (w.z > 0.9999f) + return make_float3(0.0f, 0.0f, 0.0f); - const float3 wh = normalize(wo - w); - if(wh.z < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); + const float3 wh = normalize(wo - w); + if (wh.z < 0.0f) + return make_float3(0.0f, 0.0f, 0.0f); - float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z; + float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z; - const float dotW_WH = dot(-w, wh); - if(dotW_WH < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); + const float dotW_WH = dot(-w, wh); + if (dotW_WH < 0.0f) + return make_float3(0.0f, 0.0f, 0.0f); - float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f); - if(alpha.x == alpha.y) - phase *= D_ggx(wh, alpha.x); - else - phase *= D_ggx_aniso(wh, alpha); + float phase = max(0.0f, dotW_WH) * 0.25f / max(pArea * dotW_WH, 1e-7f); + if (alpha.x == alpha.y) + phase *= D_ggx(wh, alpha.x); + else + phase *= D_ggx_aniso(wh, alpha); - return make_float3(phase, phase, phase); + return make_float3(phase, phase, phase); } /* Phase function for dielectric transmissive materials, including both reflection and refraction according to the dielectric fresnel term. */ -ccl_device_forceinline float3 mf_sample_phase_glass(const float3 wi, const float eta, const float3 wm, const float randV, bool *outside) -{ - float cosI = dot(wi, wm); - float f = fresnel_dielectric_cos(cosI, eta); - if(randV < f) { - *outside = true; - return -wi + 2.0f * wm * cosI; - } - *outside = false; - float inv_eta = 1.0f/eta; - float cosT = -safe_sqrtf(1.0f - (1.0f - cosI*cosI) * inv_eta*inv_eta); - return normalize(wm*(cosI*inv_eta + cosT) - wi*inv_eta); -} - -ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, const float lambda, const float3 wo, const bool wo_outside, const float2 alpha, const float eta) -{ - if(w.z > 0.9999f) - return make_float3(0.0f, 0.0f, 0.0f); - - float pArea = (w.z < -0.9999f)? 1.0f: lambda*w.z; - float v; - if(wo_outside) { - const float3 wh = normalize(wo - w); - if(wh.z < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); - - const float dotW_WH = dot(-w, wh); - v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f / (pArea * dotW_WH); - } - else { - float3 wh = normalize(wo*eta - w); - if(wh.z < 0.0f) - wh = -wh; - const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh); - if(dotW_WH < 0.0f) - return make_float3(0.0f, 0.0f, 0.0f); - - float temp = dotW_WH + eta*dotWO_WH; - v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) * D_ggx(wh, alpha.x) / (pArea * temp * temp); - } - - return make_float3(v, v, v); +ccl_device_forceinline float3 mf_sample_phase_glass( + const float3 wi, const float eta, const float3 wm, const float randV, bool *outside) +{ + float cosI = dot(wi, wm); + float f = fresnel_dielectric_cos(cosI, eta); + if (randV < f) { + *outside = true; + return -wi + 2.0f * wm * cosI; + } + *outside = false; + float inv_eta = 1.0f / eta; + float cosT = -safe_sqrtf(1.0f - (1.0f - cosI * cosI) * inv_eta * inv_eta); + return normalize(wm * (cosI * inv_eta + cosT) - wi * inv_eta); +} + +ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, + const float lambda, + const float3 wo, + const bool wo_outside, + const float2 alpha, + const float eta) +{ + if (w.z > 0.9999f) + return make_float3(0.0f, 0.0f, 0.0f); + + float pArea = (w.z < -0.9999f) ? 1.0f : lambda * w.z; + float v; + if (wo_outside) { + const float3 wh = normalize(wo - w); + if (wh.z < 0.0f) + return make_float3(0.0f, 0.0f, 0.0f); + + const float dotW_WH = dot(-w, wh); + v = fresnel_dielectric_cos(dotW_WH, eta) * max(0.0f, dotW_WH) * D_ggx(wh, alpha.x) * 0.25f / + (pArea * dotW_WH); + } + else { + float3 wh = normalize(wo * eta - w); + if (wh.z < 0.0f) + wh = -wh; + const float dotW_WH = dot(-w, wh), dotWO_WH = dot(wo, wh); + if (dotW_WH < 0.0f) + return make_float3(0.0f, 0.0f, 0.0f); + + float temp = dotW_WH + eta * dotWO_WH; + v = (1.0f - fresnel_dielectric_cos(dotW_WH, eta)) * max(0.0f, dotW_WH) * max(0.0f, -dotWO_WH) * + D_ggx(wh, alpha.x) / (pArea * temp * temp); + } + + return make_float3(v, v, v); } /* === Utility functions for the random walks === */ @@ -173,64 +192,65 @@ ccl_device_forceinline float3 mf_eval_phase_glass(const float3 w, const float la /* Smith Lambda function for GGX (based on page 12 of the supplemental implementation). */ ccl_device_forceinline float mf_lambda(const float3 w, const float2 alpha) { - if(w.z > 0.9999f) - return 0.0f; - else if(w.z < -0.9999f) - return -0.9999f; + if (w.z > 0.9999f) + return 0.0f; + else if (w.z < -0.9999f) + return -0.9999f; - const float inv_wz2 = 1.0f / max(w.z*w.z, 1e-7f); - const float2 wa = make_float2(w.x, w.y)*alpha; - float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2); - if(w.z <= 0.0f) - v = -v; + const float inv_wz2 = 1.0f / max(w.z * w.z, 1e-7f); + const float2 wa = make_float2(w.x, w.y) * alpha; + float v = sqrtf(1.0f + dot(wa, wa) * inv_wz2); + if (w.z <= 0.0f) + v = -v; - return 0.5f*(v - 1.0f); + return 0.5f * (v - 1.0f); } /* Height distribution CDF (based on page 4 of the supplemental implementation). */ ccl_device_forceinline float mf_invC1(const float h) { - return 2.0f * saturate(h) - 1.0f; + return 2.0f * saturate(h) - 1.0f; } ccl_device_forceinline float mf_C1(const float h) { - return saturate(0.5f * (h + 1.0f)); + return saturate(0.5f * (h + 1.0f)); } /* Masking function (based on page 16 of the supplemental implementation). */ ccl_device_forceinline float mf_G1(const float3 w, const float C1, const float lambda) { - if(w.z > 0.9999f) - return 1.0f; - if(w.z < 1e-5f) - return 0.0f; - return powf(C1, lambda); + if (w.z > 0.9999f) + return 1.0f; + if (w.z < 1e-5f) + return 0.0f; + return powf(C1, lambda); } /* Sampling from the visible height distribution (based on page 17 of the supplemental implementation). */ -ccl_device_forceinline bool mf_sample_height(const float3 w, float *h, float *C1, float *G1, float *lambda, const float U) -{ - if(w.z > 0.9999f) - return false; - if(w.z < -0.9999f) { - *C1 *= U; - *h = mf_invC1(*C1); - *G1 = mf_G1(w, *C1, *lambda); - } - else if(fabsf(w.z) >= 0.0001f) { - if(U > 1.0f - *G1) - return false; - if(*lambda >= 0.0f) { - *C1 = 1.0f; - } - else { - *C1 *= powf(1.0f-U, -1.0f / *lambda); - } - *h = mf_invC1(*C1); - *G1 = mf_G1(w, *C1, *lambda); - } - return true; +ccl_device_forceinline bool mf_sample_height( + const float3 w, float *h, float *C1, float *G1, float *lambda, const float U) +{ + if (w.z > 0.9999f) + return false; + if (w.z < -0.9999f) { + *C1 *= U; + *h = mf_invC1(*C1); + *G1 = mf_G1(w, *C1, *lambda); + } + else if (fabsf(w.z) >= 0.0001f) { + if (U > 1.0f - *G1) + return false; + if (*lambda >= 0.0f) { + *C1 = 1.0f; + } + else { + *C1 *= powf(1.0f - U, -1.0f / *lambda); + } + *h = mf_invC1(*C1); + *G1 = mf_G1(w, *C1, *lambda); + } + return true; } /* === PDF approximations for the different phase functions. === @@ -240,80 +260,92 @@ ccl_device_forceinline bool mf_sample_height(const float3 w, float *h, float *C1 * the missing energy is then approximated as a diffuse reflection for the PDF. */ ccl_device_forceinline float mf_ggx_albedo(float r) { - float albedo = 0.806495f*expf(-1.98712f*r*r) + 0.199531f; - albedo -= ((((((1.76741f*r - 8.43891f)*r + 15.784f)*r - 14.398f)*r + 6.45221f)*r - 1.19722f)*r + 0.027803f)*r + 0.00568739f; - return saturate(albedo); + float albedo = 0.806495f * expf(-1.98712f * r * r) + 0.199531f; + albedo -= ((((((1.76741f * r - 8.43891f) * r + 15.784f) * r - 14.398f) * r + 6.45221f) * r - + 1.19722f) * + r + + 0.027803f) * + r + + 0.00568739f; + return saturate(albedo); } ccl_device_inline float mf_ggx_transmission_albedo(float a, float ior) { - if(ior < 1.0f) { - ior = 1.0f/ior; - } - a = saturate(a); - ior = clamp(ior, 1.0f, 3.0f); - float I_1 = 0.0476898f*expf(-0.978352f*(ior-0.65657f)*(ior-0.65657f)) - 0.033756f*ior + 0.993261f; - float R_1 = (((0.116991f*a - 0.270369f)*a + 0.0501366f)*a - 0.00411511f)*a + 1.00008f; - float I_2 = (((-2.08704f*ior + 26.3298f)*ior - 127.906f)*ior + 292.958f)*ior - 287.946f + 199.803f/(ior*ior) - 101.668f/(ior*ior*ior); - float R_2 = ((((5.3725f*a -24.9307f)*a + 22.7437f)*a - 3.40751f)*a + 0.0986325f)*a + 0.00493504f; - - return saturate(1.0f + I_2*R_2*0.0019127f - (1.0f - I_1)*(1.0f - R_1)*9.3205f); + if (ior < 1.0f) { + ior = 1.0f / ior; + } + a = saturate(a); + ior = clamp(ior, 1.0f, 3.0f); + float I_1 = 0.0476898f * expf(-0.978352f * (ior - 0.65657f) * (ior - 0.65657f)) - + 0.033756f * ior + 0.993261f; + float R_1 = (((0.116991f * a - 0.270369f) * a + 0.0501366f) * a - 0.00411511f) * a + 1.00008f; + float I_2 = (((-2.08704f * ior + 26.3298f) * ior - 127.906f) * ior + 292.958f) * ior - 287.946f + + 199.803f / (ior * ior) - 101.668f / (ior * ior * ior); + float R_2 = ((((5.3725f * a - 24.9307f) * a + 22.7437f) * a - 3.40751f) * a + 0.0986325f) * a + + 0.00493504f; + + return saturate(1.0f + I_2 * R_2 * 0.0019127f - (1.0f - I_1) * (1.0f - R_1) * 9.3205f); } ccl_device_forceinline float mf_ggx_pdf(const float3 wi, const float3 wo, const float alpha) { - float D = D_ggx(normalize(wi+wo), alpha); - float lambda = mf_lambda(wi, make_float2(alpha, alpha)); - float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); + float D = D_ggx(normalize(wi + wo), alpha); + float lambda = mf_lambda(wi, make_float2(alpha, alpha)); + float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); - float multiscatter = wo.z * M_1_PI_F; + float multiscatter = wo.z * M_1_PI_F; - float albedo = mf_ggx_albedo(alpha); - return albedo*singlescatter + (1.0f - albedo)*multiscatter; + float albedo = mf_ggx_albedo(alpha); + return albedo * singlescatter + (1.0f - albedo) * multiscatter; } ccl_device_forceinline float mf_ggx_aniso_pdf(const float3 wi, const float3 wo, const float2 alpha) { - float D = D_ggx_aniso(normalize(wi+wo), alpha); - float lambda = mf_lambda(wi, alpha); - float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); + float D = D_ggx_aniso(normalize(wi + wo), alpha); + float lambda = mf_lambda(wi, alpha); + float singlescatter = 0.25f * D / max((1.0f + lambda) * wi.z, 1e-7f); - float multiscatter = wo.z * M_1_PI_F; + float multiscatter = wo.z * M_1_PI_F; - float albedo = mf_ggx_albedo(sqrtf(alpha.x*alpha.y)); - return albedo*singlescatter + (1.0f - albedo)*multiscatter; + float albedo = mf_ggx_albedo(sqrtf(alpha.x * alpha.y)); + return albedo * singlescatter + (1.0f - albedo) * multiscatter; } -ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, const float alpha, const float eta) +ccl_device_forceinline float mf_glass_pdf(const float3 wi, + const float3 wo, + const float alpha, + const float eta) { - bool reflective = (wi.z*wo.z > 0.0f); - - float wh_len; - float3 wh = normalize_len(wi + (reflective? wo : (wo*eta)), &wh_len); - if(wh.z < 0.0f) - wh = -wh; - float3 r_wi = (wi.z < 0.0f)? -wi: wi; - float lambda = mf_lambda(r_wi, make_float2(alpha, alpha)); - float D = D_ggx(wh, alpha); - float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta); - - float multiscatter = fabsf(wo.z * M_1_PI_F); - if(reflective) { - float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f); - float albedo = mf_ggx_albedo(alpha); - return fresnel * (albedo*singlescatter + (1.0f - albedo)*multiscatter); - } - else { - float singlescatter = fabsf(dot(r_wi, wh)*dot(wo, wh) * D * eta*eta / max((1.0f + lambda) * r_wi.z * wh_len*wh_len, 1e-7f)); - float albedo = mf_ggx_transmission_albedo(alpha, eta); - return (1.0f - fresnel) * (albedo*singlescatter + (1.0f - albedo)*multiscatter); - } + bool reflective = (wi.z * wo.z > 0.0f); + + float wh_len; + float3 wh = normalize_len(wi + (reflective ? wo : (wo * eta)), &wh_len); + if (wh.z < 0.0f) + wh = -wh; + float3 r_wi = (wi.z < 0.0f) ? -wi : wi; + float lambda = mf_lambda(r_wi, make_float2(alpha, alpha)); + float D = D_ggx(wh, alpha); + float fresnel = fresnel_dielectric_cos(dot(r_wi, wh), eta); + + float multiscatter = fabsf(wo.z * M_1_PI_F); + if (reflective) { + float singlescatter = 0.25f * D / max((1.0f + lambda) * r_wi.z, 1e-7f); + float albedo = mf_ggx_albedo(alpha); + return fresnel * (albedo * singlescatter + (1.0f - albedo) * multiscatter); + } + else { + float singlescatter = fabsf(dot(r_wi, wh) * dot(wo, wh) * D * eta * eta / + max((1.0f + lambda) * r_wi.z * wh_len * wh_len, 1e-7f)); + float albedo = mf_ggx_transmission_albedo(alpha, eta); + return (1.0f - fresnel) * (albedo * singlescatter + (1.0f - albedo) * multiscatter); + } } /* === Actual random walk implementations, one version of mf_eval and mf_sample per phase function. === */ -#define MF_NAME_JOIN(x,y) x ## _ ## y -#define MF_NAME_EVAL(x,y) MF_NAME_JOIN(x,y) +#define MF_NAME_JOIN(x, y) x##_##y +#define MF_NAME_EVAL(x, y) MF_NAME_JOIN(x, y) #define MF_FUNCTION_FULL_NAME(prefix) MF_NAME_EVAL(prefix, MF_PHASE_FUNCTION) #define MF_PHASE_FUNCTION glass @@ -326,10 +358,10 @@ ccl_device_forceinline float mf_glass_pdf(const float3 wi, const float3 wo, cons ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughness) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)sc; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc; - bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); - bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); + bsdf->alpha_x = fmaxf(roughness, bsdf->alpha_x); + bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y); } /* === Closure implementations === */ @@ -338,293 +370,395 @@ ccl_device void bsdf_microfacet_multi_ggx_blur(ShaderClosure *sc, float roughnes ccl_device int bsdf_microfacet_multi_ggx_common_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); - bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f); - bsdf->extra->color.x = saturate(bsdf->extra->color.x); - bsdf->extra->color.y = saturate(bsdf->extra->color.y); - bsdf->extra->color.z = saturate(bsdf->extra->color.z); - bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); - bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); - bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = clamp(bsdf->alpha_y, 1e-4f, 1.0f); + bsdf->extra->color.x = saturate(bsdf->extra->color.x); + bsdf->extra->color.y = saturate(bsdf->extra->color.y); + bsdf->extra->color.z = saturate(bsdf->extra->color.z); + bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); + bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); + bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; + return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG; } ccl_device int bsdf_microfacet_multi_ggx_aniso_setup(MicrofacetBsdf *bsdf) { - if(is_zero(bsdf->T)) - bsdf->T = make_float3(1.0f, 0.0f, 0.0f); + if (is_zero(bsdf->T)) + bsdf->T = make_float3(1.0f, 0.0f, 0.0f); - bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; - return bsdf_microfacet_multi_ggx_common_setup(bsdf); + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) +ccl_device int bsdf_microfacet_multi_ggx_aniso_fresnel_setup(MicrofacetBsdf *bsdf, + const ShaderData *sd) { - if(is_zero(bsdf->T)) - bsdf->T = make_float3(1.0f, 0.0f, 0.0f); + if (is_zero(bsdf->T)) + bsdf->T = make_float3(1.0f, 0.0f, 0.0f); - bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; - float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); - float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); - bsdf->sample_weight *= F; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; - return bsdf_microfacet_multi_ggx_common_setup(bsdf); + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } ccl_device int bsdf_microfacet_multi_ggx_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; - return bsdf_microfacet_multi_ggx_common_setup(bsdf); + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) { - bsdf->alpha_y = bsdf->alpha_x; + bsdf->alpha_y = bsdf->alpha_x; - bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID; - float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); - float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); - bsdf->sample_weight *= F; + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; - return bsdf_microfacet_multi_ggx_common_setup(bsdf); + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } ccl_device int bsdf_microfacet_multi_ggx_refraction_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_y = bsdf->alpha_x; - - bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; + bsdf->alpha_y = bsdf->alpha_x; - return bsdf_microfacet_multi_ggx_common_setup(bsdf); -} + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID; -ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { - *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); + return bsdf_microfacet_multi_ggx_common_setup(bsdf); } -ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - - if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) { - return make_float3(0.0f, 0.0f, 0.0f); - } - - bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID); - - bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y); - float3 X, Y, Z; - Z = bsdf->N; - if(is_aniso) - make_orthonormals_tangent(Z, bsdf->T, &X, &Y); - else - make_orthonormals(Z, &X, &Y); - - float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); - float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); - - if(is_aniso) - *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y)); - else - *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x); - return mf_eval_glossy(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0); +ccl_device float3 bsdf_microfacet_multi_ggx_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf, + ccl_addr_space uint *lcg_state) +{ + *pdf = 0.0f; + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state) +ccl_device float3 bsdf_microfacet_multi_ggx_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf, + ccl_addr_space uint *lcg_state) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + + if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { + return make_float3(0.0f, 0.0f, 0.0f); + } + + bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID); + + bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y); + float3 X, Y, Z; + Z = bsdf->N; + if (is_aniso) + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); + else + make_orthonormals(Z, &X, &Y); + + float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); + float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); + + if (is_aniso) + *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y)); + else + *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x); + return mf_eval_glossy(localI, + localO, + true, + bsdf->extra->color, + bsdf->alpha_x, + bsdf->alpha_y, + lcg_state, + bsdf->ior, + use_fresnel, + bsdf->extra->cspec0); +} + +ccl_device int bsdf_microfacet_multi_ggx_sample(KernelGlobals *kg, + const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf, + ccl_addr_space uint *lcg_state) +{ + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; - float3 X, Y, Z; - Z = bsdf->N; + float3 X, Y, Z; + Z = bsdf->N; - if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) { - *omega_in = 2*dot(Z, I)*Z - I; - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); + if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { + *omega_in = 2 * dot(Z, I) * Z - I; + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; - *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; + *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; + *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; #endif - return LABEL_REFLECT|LABEL_SINGULAR; - } - - bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID); - - bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y); - if(is_aniso) - make_orthonormals_tangent(Z, bsdf->T, &X, &Y); - else - make_orthonormals(Z, &X, &Y); - - float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); - float3 localO; - - *eval = mf_sample_glossy(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0); - if(is_aniso) - *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y)); - else - *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x); - *eval *= *pdf; - - *omega_in = X*localO.x + Y*localO.y + Z*localO.z; + return LABEL_REFLECT | LABEL_SINGULAR; + } + + bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID); + + bool is_aniso = (bsdf->alpha_x != bsdf->alpha_y); + if (is_aniso) + make_orthonormals_tangent(Z, bsdf->T, &X, &Y); + else + make_orthonormals(Z, &X, &Y); + + float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); + float3 localO; + + *eval = mf_sample_glossy(localI, + &localO, + bsdf->extra->color, + bsdf->alpha_x, + bsdf->alpha_y, + lcg_state, + bsdf->ior, + use_fresnel, + bsdf->extra->cspec0); + if (is_aniso) + *pdf = mf_ggx_aniso_pdf(localI, localO, make_float2(bsdf->alpha_x, bsdf->alpha_y)); + else + *pdf = mf_ggx_pdf(localI, localO, bsdf->alpha_x); + *eval *= *pdf; + + *omega_in = X * localO.x + Y * localO.y + Z * localO.z; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; - *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; + *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; + *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; #endif - return LABEL_REFLECT|LABEL_GLOSSY; + return LABEL_REFLECT | LABEL_GLOSSY; } /* Multiscattering GGX Glass closure */ ccl_device int bsdf_microfacet_multi_ggx_glass_setup(MicrofacetBsdf *bsdf) { - bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); - bsdf->alpha_y = bsdf->alpha_x; - bsdf->ior = max(0.0f, bsdf->ior); - bsdf->extra->color.x = saturate(bsdf->extra->color.x); - bsdf->extra->color.y = saturate(bsdf->extra->color.y); - bsdf->extra->color.z = saturate(bsdf->extra->color.z); + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = max(0.0f, bsdf->ior); + bsdf->extra->color.x = saturate(bsdf->extra->color.x); + bsdf->extra->color.y = saturate(bsdf->extra->color.y); + bsdf->extra->color.z = saturate(bsdf->extra->color.z); - bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID; + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; + return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG; } -ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, const ShaderData *sd) +ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(MicrofacetBsdf *bsdf, + const ShaderData *sd) { - bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); - bsdf->alpha_y = bsdf->alpha_x; - bsdf->ior = max(0.0f, bsdf->ior); - bsdf->extra->color.x = saturate(bsdf->extra->color.x); - bsdf->extra->color.y = saturate(bsdf->extra->color.y); - bsdf->extra->color.z = saturate(bsdf->extra->color.z); - bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); - bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); - bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); - - bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID; - - float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); - float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); - bsdf->sample_weight *= F; - - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_NEEDS_LCG; -} - -ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - - if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) { - return make_float3(0.0f, 0.0f, 0.0f); - } - - float3 X, Y, Z; - Z = bsdf->N; - make_orthonormals(Z, &X, &Y); - - float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); - float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); - - *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); - return mf_eval_glass(localI, localO, false, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, false, bsdf->extra->color); -} - -ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf, ccl_addr_space uint *lcg_state) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - - if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) { - return make_float3(0.0f, 0.0f, 0.0f); - } - - bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID); - - float3 X, Y, Z; - Z = bsdf->N; - make_orthonormals(Z, &X, &Y); - - float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); - float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); - - *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); - return mf_eval_glass(localI, localO, true, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0); -} - -ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf, ccl_addr_space uint *lcg_state) + bsdf->alpha_x = clamp(bsdf->alpha_x, 1e-4f, 1.0f); + bsdf->alpha_y = bsdf->alpha_x; + bsdf->ior = max(0.0f, bsdf->ior); + bsdf->extra->color.x = saturate(bsdf->extra->color.x); + bsdf->extra->color.y = saturate(bsdf->extra->color.y); + bsdf->extra->color.z = saturate(bsdf->extra->color.z); + bsdf->extra->cspec0.x = saturate(bsdf->extra->cspec0.x); + bsdf->extra->cspec0.y = saturate(bsdf->extra->cspec0.y); + bsdf->extra->cspec0.z = saturate(bsdf->extra->cspec0.z); + + bsdf->type = CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID; + + float F0 = fresnel_dielectric_cos(1.0f, bsdf->ior); + float F = average(interpolate_fresnel_color(sd->I, bsdf->N, bsdf->ior, F0, bsdf->extra->cspec0)); + bsdf->sample_weight *= F; + + return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_NEEDS_LCG; +} + +ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf, + ccl_addr_space uint *lcg_state) +{ + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + + if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { + return make_float3(0.0f, 0.0f, 0.0f); + } + + float3 X, Y, Z; + Z = bsdf->N; + make_orthonormals(Z, &X, &Y); + + float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); + float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); + + *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); + return mf_eval_glass(localI, + localO, + false, + bsdf->extra->color, + bsdf->alpha_x, + bsdf->alpha_y, + lcg_state, + bsdf->ior, + false, + bsdf->extra->color); +} + +ccl_device float3 bsdf_microfacet_multi_ggx_glass_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf, + ccl_addr_space uint *lcg_state) +{ + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + + if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { + return make_float3(0.0f, 0.0f, 0.0f); + } + + bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID); + + float3 X, Y, Z; + Z = bsdf->N; + make_orthonormals(Z, &X, &Y); + + float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); + float3 localO = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z)); + + *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); + return mf_eval_glass(localI, + localO, + true, + bsdf->extra->color, + bsdf->alpha_x, + bsdf->alpha_y, + lcg_state, + bsdf->ior, + use_fresnel, + bsdf->extra->cspec0); +} + +ccl_device int bsdf_microfacet_multi_ggx_glass_sample(KernelGlobals *kg, + const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf, + ccl_addr_space uint *lcg_state) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; - float3 X, Y, Z; - Z = bsdf->N; + float3 X, Y, Z; + Z = bsdf->N; - if(bsdf->alpha_x*bsdf->alpha_y < 1e-7f) { - float3 R, T; + if (bsdf->alpha_x * bsdf->alpha_y < 1e-7f) { + float3 R, T; #ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; + float3 dRdx, dRdy, dTdx, dTdy; #endif - bool inside; - float fresnel = fresnel_dielectric(bsdf->ior, Z, I, &R, &T, + bool inside; + float fresnel = fresnel_dielectric(bsdf->ior, + Z, + I, + &R, + &T, #ifdef __RAY_DIFFERENTIALS__ - dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy, + dIdx, + dIdy, + &dRdx, + &dRdy, + &dTdx, + &dTdy, #endif - &inside); + &inside); - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - if(randu < fresnel) { - *omega_in = R; + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + if (randu < fresnel) { + *omega_in = R; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dRdx; - *domega_in_dy = dRdy; + *domega_in_dx = dRdx; + *domega_in_dy = dRdy; #endif - return LABEL_REFLECT|LABEL_SINGULAR; - } - else { - *omega_in = T; + return LABEL_REFLECT | LABEL_SINGULAR; + } + else { + *omega_in = T; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; + *domega_in_dx = dTdx; + *domega_in_dy = dTdy; #endif - return LABEL_TRANSMIT|LABEL_SINGULAR; - } - } - - bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID); - - make_orthonormals(Z, &X, &Y); - - float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); - float3 localO; - - *eval = mf_sample_glass(localI, &localO, bsdf->extra->color, bsdf->alpha_x, bsdf->alpha_y, lcg_state, bsdf->ior, use_fresnel, bsdf->extra->cspec0); - *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); - *eval *= *pdf; - - *omega_in = X*localO.x + Y*localO.y + Z*localO.z; - if(localO.z*localI.z > 0.0f) { + return LABEL_TRANSMIT | LABEL_SINGULAR; + } + } + + bool use_fresnel = (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID); + + make_orthonormals(Z, &X, &Y); + + float3 localI = make_float3(dot(I, X), dot(I, Y), dot(I, Z)); + float3 localO; + + *eval = mf_sample_glass(localI, + &localO, + bsdf->extra->color, + bsdf->alpha_x, + bsdf->alpha_y, + lcg_state, + bsdf->ior, + use_fresnel, + bsdf->extra->cspec0); + *pdf = mf_glass_pdf(localI, localO, bsdf->alpha_x, bsdf->ior); + *eval *= *pdf; + + *omega_in = X * localO.x + Y * localO.y + Z * localO.z; + if (localO.z * localI.z > 0.0f) { #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; - *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; + *domega_in_dx = (2 * dot(Z, dIdx)) * Z - dIdx; + *domega_in_dy = (2 * dot(Z, dIdy)) * Z - dIdy; #endif - return LABEL_REFLECT|LABEL_GLOSSY; - } - else { + return LABEL_REFLECT | LABEL_GLOSSY; + } + else { #ifdef __RAY_DIFFERENTIALS__ - float cosI = dot(Z, I); - float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI*cosI))), 1e-7f); - *domega_in_dx = -(bsdf->ior * dIdx) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z; - *domega_in_dy = -(bsdf->ior * dIdy) + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z; + float cosI = dot(Z, I); + float dnp = max(sqrtf(1.0f - (bsdf->ior * bsdf->ior * (1.0f - cosI * cosI))), 1e-7f); + *domega_in_dx = -(bsdf->ior * dIdx) + + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdx, Z)) * Z; + *domega_in_dy = -(bsdf->ior * dIdy) + + ((bsdf->ior - bsdf->ior * bsdf->ior * cosI / dnp) * dot(dIdy, Z)) * Z; #endif - return LABEL_TRANSMIT|LABEL_GLOSSY; - } + return LABEL_TRANSMIT | LABEL_GLOSSY; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h index 5d300ef6db5..79247ee8057 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h @@ -25,247 +25,251 @@ * energy is used. In combination with MIS, that is enough to produce an unbiased result, although * the balance heuristic isn't necessarily optimal anymore. */ -ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)( - float3 wi, - float3 wo, - const bool wo_outside, - const float3 color, - const float alpha_x, - const float alpha_y, - ccl_addr_space uint *lcg_state, - const float eta, - bool use_fresnel, - const float3 cspec0) +ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, + float3 wo, + const bool wo_outside, + const float3 color, + const float alpha_x, + const float alpha_y, + ccl_addr_space uint *lcg_state, + const float eta, + bool use_fresnel, + const float3 cspec0) { - /* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */ - bool swapped = false; + /* Evaluating for a shallower incoming direction produces less noise, and the properties of the BSDF guarantee reciprocity. */ + bool swapped = false; #ifdef MF_MULTI_GLASS - if(wi.z*wo.z < 0.0f) { - /* Glass transmission is a special case and requires the directions to change hemisphere. */ - if(-wo.z < wi.z) { - swapped = true; - float3 tmp = -wo; - wo = -wi; - wi = tmp; - } - } - else + if (wi.z * wo.z < 0.0f) { + /* Glass transmission is a special case and requires the directions to change hemisphere. */ + if (-wo.z < wi.z) { + swapped = true; + float3 tmp = -wo; + wo = -wi; + wi = tmp; + } + } + else #endif - if(wo.z < wi.z) { - swapped = true; - float3 tmp = wo; - wo = wi; - wi = tmp; - } + if (wo.z < wi.z) { + swapped = true; + float3 tmp = wo; + wo = wi; + wi = tmp; + } - if(wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside)) - return make_float3(0.0f, 0.0f, 0.0f); + if (wi.z < 1e-5f || (wo.z < 1e-5f && wo_outside) || (wo.z > -1e-5f && !wo_outside)) + return make_float3(0.0f, 0.0f, 0.0f); - const float2 alpha = make_float2(alpha_x, alpha_y); + const float2 alpha = make_float2(alpha_x, alpha_y); - float lambda_r = mf_lambda(-wi, alpha); - float shadowing_lambda = mf_lambda(wo_outside? wo: -wo, alpha); + float lambda_r = mf_lambda(-wi, alpha); + float shadowing_lambda = mf_lambda(wo_outside ? wo : -wo, alpha); - /* Analytically compute single scattering for lower noise. */ - float3 eval; - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - const float3 wh = normalize(wi+wo); + /* Analytically compute single scattering for lower noise. */ + float3 eval; + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + const float3 wh = normalize(wi + wo); #ifdef MF_MULTI_GLASS - eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta); - if(wo_outside) - eval *= -lambda_r / (shadowing_lambda - lambda_r); - else - eval *= -lambda_r * beta(-lambda_r, shadowing_lambda+1.0f); -#else /* MF_MULTI_GLOSSY */ - const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda); - float val = G2 * 0.25f / wi.z; - if(alpha.x == alpha.y) - val *= D_ggx(wh, alpha.x); - else - val *= D_ggx_aniso(wh, alpha); - eval = make_float3(val, val, val); + eval = mf_eval_phase_glass(-wi, lambda_r, wo, wo_outside, alpha, eta); + if (wo_outside) + eval *= -lambda_r / (shadowing_lambda - lambda_r); + else + eval *= -lambda_r * beta(-lambda_r, shadowing_lambda + 1.0f); +#else /* MF_MULTI_GLOSSY */ + const float G2 = 1.0f / (1.0f - (lambda_r + 1.0f) + shadowing_lambda); + float val = G2 * 0.25f / wi.z; + if (alpha.x == alpha.y) + val *= D_ggx(wh, alpha.x); + else + val *= D_ggx_aniso(wh, alpha); + eval = make_float3(val, val, val); #endif - float F0 = fresnel_dielectric_cos(1.0f, eta); - if(use_fresnel) { - throughput = interpolate_fresnel_color(wi, wh, eta, F0, cspec0); + float F0 = fresnel_dielectric_cos(1.0f, eta); + if (use_fresnel) { + throughput = interpolate_fresnel_color(wi, wh, eta, F0, cspec0); - eval *= throughput; - } + eval *= throughput; + } - float3 wr = -wi; - float hr = 1.0f; - float C1_r = 1.0f; - float G1_r = 0.0f; - bool outside = true; + float3 wr = -wi; + float hr = 1.0f; + float C1_r = 1.0f; + float G1_r = 0.0f; + bool outside = true; - for(int order = 0; order < 10; order++) { - /* Sample microfacet height. */ - float height_rand = lcg_step_float_addrspace(lcg_state); - if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) - break; - /* Sample microfacet normal. */ - float vndf_rand_y = lcg_step_float_addrspace(lcg_state); - float vndf_rand_x = lcg_step_float_addrspace(lcg_state); - float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y); + for (int order = 0; order < 10; order++) { + /* Sample microfacet height. */ + float height_rand = lcg_step_float_addrspace(lcg_state); + if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) + break; + /* Sample microfacet normal. */ + float vndf_rand_y = lcg_step_float_addrspace(lcg_state); + float vndf_rand_x = lcg_step_float_addrspace(lcg_state); + float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y); #ifdef MF_MULTI_GLASS - if(order == 0 && use_fresnel) { - /* Evaluate amount of scattering towards wo on this microfacet. */ - float3 phase; - if(outside) - phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta); - else - phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta); + if (order == 0 && use_fresnel) { + /* Evaluate amount of scattering towards wo on this microfacet. */ + float3 phase; + if (outside) + phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta); + else + phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta); - eval = throughput * phase * mf_G1(wo_outside ? wo : -wo, mf_C1((outside == wo_outside) ? hr : -hr), shadowing_lambda); - } + eval = throughput * phase * + mf_G1(wo_outside ? wo : -wo, + mf_C1((outside == wo_outside) ? hr : -hr), + shadowing_lambda); + } #endif - if(order > 0) { - /* Evaluate amount of scattering towards wo on this microfacet. */ - float3 phase; + if (order > 0) { + /* Evaluate amount of scattering towards wo on this microfacet. */ + float3 phase; #ifdef MF_MULTI_GLASS - if(outside) - phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta); - else - phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f/eta); -#else /* MF_MULTI_GLOSSY */ - phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput; + if (outside) + phase = mf_eval_phase_glass(wr, lambda_r, wo, wo_outside, alpha, eta); + else + phase = mf_eval_phase_glass(wr, lambda_r, -wo, !wo_outside, alpha, 1.0f / eta); +#else /* MF_MULTI_GLOSSY */ + phase = mf_eval_phase_glossy(wr, lambda_r, wo, alpha) * throughput; #endif - eval += throughput * phase * mf_G1(wo_outside? wo: -wo, mf_C1((outside == wo_outside)? hr: -hr), shadowing_lambda); - } - if(order+1 < 10) { - /* Bounce from the microfacet. */ + eval += throughput * phase * + mf_G1(wo_outside ? wo : -wo, + mf_C1((outside == wo_outside) ? hr : -hr), + shadowing_lambda); + } + if (order + 1 < 10) { + /* Bounce from the microfacet. */ #ifdef MF_MULTI_GLASS - bool next_outside; - float3 wi_prev = -wr; - float phase_rand = lcg_step_float_addrspace(lcg_state); - wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, phase_rand, &next_outside); - if(!next_outside) { - outside = !outside; - wr = -wr; - hr = -hr; - } + bool next_outside; + float3 wi_prev = -wr; + float phase_rand = lcg_step_float_addrspace(lcg_state); + wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside); + if (!next_outside) { + outside = !outside; + wr = -wr; + hr = -hr; + } - if(use_fresnel && !next_outside) { - throughput *= color; - } - else if(use_fresnel && order > 0) { - throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0); - } -#else /* MF_MULTI_GLOSSY */ - if(use_fresnel && order > 0) { - throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0); - } - wr = mf_sample_phase_glossy(-wr, &throughput, wm); + if (use_fresnel && !next_outside) { + throughput *= color; + } + else if (use_fresnel && order > 0) { + throughput *= interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0); + } +#else /* MF_MULTI_GLOSSY */ + if (use_fresnel && order > 0) { + throughput *= interpolate_fresnel_color(-wr, wm, eta, F0, cspec0); + } + wr = mf_sample_phase_glossy(-wr, &throughput, wm); #endif - lambda_r = mf_lambda(wr, alpha); + lambda_r = mf_lambda(wr, alpha); - if(!use_fresnel) - throughput *= color; + if (!use_fresnel) + throughput *= color; - C1_r = mf_C1(hr); - G1_r = mf_G1(wr, C1_r, lambda_r); - } - } + C1_r = mf_C1(hr); + G1_r = mf_G1(wr, C1_r, lambda_r); + } + } - if(swapped) - eval *= fabsf(wi.z / wo.z); - return eval; + if (swapped) + eval *= fabsf(wi.z / wo.z); + return eval; } /* Perform a random walk on the microsurface starting from wi, returning the direction in which the walk * escaped the surface in wo. The function returns the throughput between wi and wo. * Without reflection losses due to coloring or fresnel absorption in conductors, the sampling is optimal. */ -ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)( - float3 wi, - float3 *wo, - const float3 color, - const float alpha_x, - const float alpha_y, - ccl_addr_space uint *lcg_state, - const float eta, - bool use_fresnel, - const float3 cspec0) +ccl_device_forceinline float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, + float3 *wo, + const float3 color, + const float alpha_x, + const float alpha_y, + ccl_addr_space uint *lcg_state, + const float eta, + bool use_fresnel, + const float3 cspec0) { - const float2 alpha = make_float2(alpha_x, alpha_y); + const float2 alpha = make_float2(alpha_x, alpha_y); - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float3 wr = -wi; - float lambda_r = mf_lambda(wr, alpha); - float hr = 1.0f; - float C1_r = 1.0f; - float G1_r = 0.0f; - bool outside = true; + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 wr = -wi; + float lambda_r = mf_lambda(wr, alpha); + float hr = 1.0f; + float C1_r = 1.0f; + float G1_r = 0.0f; + bool outside = true; - float F0 = fresnel_dielectric_cos(1.0f, eta); - if(use_fresnel) { - throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0); - } + float F0 = fresnel_dielectric_cos(1.0f, eta); + if (use_fresnel) { + throughput = interpolate_fresnel_color(wi, normalize(wi + wr), eta, F0, cspec0); + } - int order; - for(order = 0; order < 10; order++) { - /* Sample microfacet height. */ - float height_rand = lcg_step_float_addrspace(lcg_state); - if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) { - /* The random walk has left the surface. */ - *wo = outside? wr: -wr; - return throughput; - } - /* Sample microfacet normal. */ - float vndf_rand_y = lcg_step_float_addrspace(lcg_state); - float vndf_rand_x = lcg_step_float_addrspace(lcg_state); - float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y); + int order; + for (order = 0; order < 10; order++) { + /* Sample microfacet height. */ + float height_rand = lcg_step_float_addrspace(lcg_state); + if (!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, height_rand)) { + /* The random walk has left the surface. */ + *wo = outside ? wr : -wr; + return throughput; + } + /* Sample microfacet normal. */ + float vndf_rand_y = lcg_step_float_addrspace(lcg_state); + float vndf_rand_x = lcg_step_float_addrspace(lcg_state); + float3 wm = mf_sample_vndf(-wr, alpha, vndf_rand_x, vndf_rand_y); - /* First-bounce color is already accounted for in mix weight. */ - if(!use_fresnel && order > 0) - throughput *= color; + /* First-bounce color is already accounted for in mix weight. */ + if (!use_fresnel && order > 0) + throughput *= color; - /* Bounce from the microfacet. */ + /* Bounce from the microfacet. */ #ifdef MF_MULTI_GLASS - bool next_outside; - float3 wi_prev = -wr; - float phase_rand = lcg_step_float_addrspace(lcg_state); - wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, phase_rand, &next_outside); - if(!next_outside) { - hr = -hr; - wr = -wr; - outside = !outside; - } + bool next_outside; + float3 wi_prev = -wr; + float phase_rand = lcg_step_float_addrspace(lcg_state); + wr = mf_sample_phase_glass(-wr, outside ? eta : 1.0f / eta, wm, phase_rand, &next_outside); + if (!next_outside) { + hr = -hr; + wr = -wr; + outside = !outside; + } - if(use_fresnel) { - if(!next_outside) { - throughput *= color; - } - else { - float3 t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0); + if (use_fresnel) { + if (!next_outside) { + throughput *= color; + } + else { + float3 t_color = interpolate_fresnel_color(wi_prev, wm, eta, F0, cspec0); - if(order == 0) - throughput = t_color; - else - throughput *= t_color; - } - } -#else /* MF_MULTI_GLOSSY */ - if(use_fresnel) { - float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0); + if (order == 0) + throughput = t_color; + else + throughput *= t_color; + } + } +#else /* MF_MULTI_GLOSSY */ + if (use_fresnel) { + float3 t_color = interpolate_fresnel_color(-wr, wm, eta, F0, cspec0); - if(order == 0) - throughput = t_color; - else - throughput *= t_color; - } - wr = mf_sample_phase_glossy(-wr, &throughput, wm); + if (order == 0) + throughput = t_color; + else + throughput *= t_color; + } + wr = mf_sample_phase_glossy(-wr, &throughput, wm); #endif - /* Update random walk parameters. */ - lambda_r = mf_lambda(wr, alpha); - G1_r = mf_G1(wr, C1_r, lambda_r); - } - *wo = make_float3(0.0f, 0.0f, 1.0f); - return make_float3(0.0f, 0.0f, 0.0f); + /* Update random walk parameters. */ + lambda_r = mf_lambda(wr, alpha); + G1_r = mf_G1(wr, C1_r, lambda_r); + } + *wo = make_float3(0.0f, 0.0f, 1.0f); + return make_float3(0.0f, 0.0f, 0.0f); } #undef MF_MULTI_GLASS diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h index 3446d1609d9..104ed5b2818 100644 --- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h +++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h @@ -20,92 +20,110 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct OrenNayarBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float roughness; - float a; - float b; + float roughness; + float a; + float b; } OrenNayarBsdf; -ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 n, float3 v, float3 l) +ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, + float3 n, + float3 v, + float3 l) { - const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc; - float nl = max(dot(n, l), 0.0f); - float nv = max(dot(n, v), 0.0f); - float t = dot(l, v) - nl * nv; - - if(t > 0.0f) - t /= max(nl, nv) + FLT_MIN; - float is = nl * (bsdf->a + bsdf->b * t); - return make_float3(is, is, is); + const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc; + float nl = max(dot(n, l), 0.0f); + float nv = max(dot(n, v), 0.0f); + float t = dot(l, v) - nl * nv; + + if (t > 0.0f) + t /= max(nl, nv) + FLT_MIN; + float is = nl * (bsdf->a + bsdf->b * t); + return make_float3(is, is, is); } ccl_device int bsdf_oren_nayar_setup(OrenNayarBsdf *bsdf) { - float sigma = bsdf->roughness; + float sigma = bsdf->roughness; - bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID; + bsdf->type = CLOSURE_BSDF_OREN_NAYAR_ID; - sigma = saturate(sigma); + sigma = saturate(sigma); - float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma); + float div = 1.0f / (M_PI_F + ((3.0f * M_PI_F - 4.0f) / 6.0f) * sigma); - bsdf->a = 1.0f * div; - bsdf->b = sigma * div; + bsdf->a = 1.0f * div; + bsdf->b = sigma * div; - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device bool bsdf_oren_nayar_merge(const ShaderClosure *a, const ShaderClosure *b) { - const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf*)a; - const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf*)b; + const OrenNayarBsdf *bsdf_a = (const OrenNayarBsdf *)a; + const OrenNayarBsdf *bsdf_b = (const OrenNayarBsdf *)b; - return (isequal_float3(bsdf_a->N, bsdf_b->N)) && - (bsdf_a->roughness == bsdf_b->roughness); + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->roughness == bsdf_b->roughness); } -ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc; - if(dot(bsdf->N, omega_in) > 0.0f) { - *pdf = 0.5f * M_1_PI_F; - return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in); - } - else { - *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); - } + const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc; + if (dot(bsdf->N, omega_in) > 0.0f) { + *pdf = 0.5f * M_1_PI_F; + return bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, omega_in); + } + else { + *pdf = 0.0f; + return make_float3(0.0f, 0.0f, 0.0f); + } } -ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_oren_nayar_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc; - sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf); + const OrenNayarBsdf *bsdf = (const OrenNayarBsdf *)sc; + sample_uniform_hemisphere(bsdf->N, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0.0f) { - *eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in); + if (dot(Ng, *omega_in) > 0.0f) { + *eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in); #ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the bounce - *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + // TODO: find a better approximation for the bounce + *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; #endif - } - else { - *pdf = 0.0f; - *eval = make_float3(0.0f, 0.0f, 0.0f); - } + } + else { + *pdf = 0.0f; + *eval = make_float3(0.0f, 0.0f, 0.0f); + } - return LABEL_REFLECT|LABEL_DIFFUSE; + return LABEL_REFLECT | LABEL_DIFFUSE; } - CCL_NAMESPACE_END -#endif /* __BSDF_OREN_NAYAR_H__ */ +#endif /* __BSDF_OREN_NAYAR_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index 83da05ac435..b6fd0e68681 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -38,105 +38,118 @@ CCL_NAMESPACE_BEGIN #ifdef __OSL__ typedef ccl_addr_space struct PhongRampBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float exponent; - float3 *colors; + float exponent; + float3 *colors; } PhongRampBsdf; ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos) { - int MAXCOLORS = 8; - - float npos = pos * (float)(MAXCOLORS - 1); - int ipos = float_to_int(npos); - if(ipos < 0) - return colors[0]; - if(ipos >= (MAXCOLORS - 1)) - return colors[MAXCOLORS - 1]; - float offset = npos - (float)ipos; - return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; + int MAXCOLORS = 8; + + float npos = pos * (float)(MAXCOLORS - 1); + int ipos = float_to_int(npos); + if (ipos < 0) + return colors[0]; + if (ipos >= (MAXCOLORS - 1)) + return colors[MAXCOLORS - 1]; + float offset = npos - (float)ipos; + return colors[ipos] * (1.0f - offset) + colors[ipos + 1] * offset; } ccl_device int bsdf_phong_ramp_setup(PhongRampBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID; - bsdf->exponent = max(bsdf->exponent, 0.0f); - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_PHONG_RAMP_ID; + bsdf->exponent = max(bsdf->exponent, 0.0f); + return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc; - float m_exponent = bsdf->exponent; - float cosNI = dot(bsdf->N, omega_in); - float cosNO = dot(bsdf->N, I); - - if(cosNI > 0 && cosNO > 0) { - // reflect the view vector - float3 R = (2 * cosNO) * bsdf->N - I; - float cosRI = dot(R, omega_in); - if(cosRI > 0) { - float cosp = powf(cosRI, m_exponent); - float common = 0.5f * M_1_PI_F * cosp; - float out = cosNI * (m_exponent + 2) * common; - *pdf = (m_exponent + 1) * common; - return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; - } - } - - return make_float3(0.0f, 0.0f, 0.0f); + const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc; + float m_exponent = bsdf->exponent; + float cosNI = dot(bsdf->N, omega_in); + float cosNO = dot(bsdf->N, I); + + if (cosNI > 0 && cosNO > 0) { + // reflect the view vector + float3 R = (2 * cosNO) * bsdf->N - I; + float cosRI = dot(R, omega_in); + if (cosRI > 0) { + float cosp = powf(cosRI, m_exponent); + float common = 0.5f * M_1_PI_F * cosp; + float out = cosNI * (m_exponent + 2) * common; + *pdf = (m_exponent + 1) * common; + return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; + } + } + + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_phong_ramp_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const PhongRampBsdf *bsdf = (const PhongRampBsdf*)sc; - float cosNO = dot(bsdf->N, I); - float m_exponent = bsdf->exponent; - - if(cosNO > 0) { - // reflect the view vector - float3 R = (2 * cosNO) * bsdf->N - I; - -#ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; -#endif - - float3 T, B; - make_orthonormals (R, &T, &B); - float phi = M_2PI_F * randu; - float cosTheta = powf(randv, 1 / (m_exponent + 1)); - float sinTheta2 = 1 - cosTheta * cosTheta; - float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0; - *omega_in = (cosf(phi) * sinTheta) * T + - (sinf(phi) * sinTheta) * B + - ( cosTheta) * R; - if(dot(Ng, *omega_in) > 0.0f) - { - // common terms for pdf and eval - float cosNI = dot(bsdf->N, *omega_in); - // make sure the direction we chose is still in the right hemisphere - if(cosNI > 0) - { - float cosp = powf(cosTheta, m_exponent); - float common = 0.5f * M_1_PI_F * cosp; - *pdf = (m_exponent + 1) * common; - float out = cosNI * (m_exponent + 2) * common; - *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; - } - } - } - return LABEL_REFLECT|LABEL_GLOSSY; + const PhongRampBsdf *bsdf = (const PhongRampBsdf *)sc; + float cosNO = dot(bsdf->N, I); + float m_exponent = bsdf->exponent; + + if (cosNO > 0) { + // reflect the view vector + float3 R = (2 * cosNO) * bsdf->N - I; + +# ifdef __RAY_DIFFERENTIALS__ + *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; +# endif + + float3 T, B; + make_orthonormals(R, &T, &B); + float phi = M_2PI_F * randu; + float cosTheta = powf(randv, 1 / (m_exponent + 1)); + float sinTheta2 = 1 - cosTheta * cosTheta; + float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0; + *omega_in = (cosf(phi) * sinTheta) * T + (sinf(phi) * sinTheta) * B + (cosTheta)*R; + if (dot(Ng, *omega_in) > 0.0f) { + // common terms for pdf and eval + float cosNI = dot(bsdf->N, *omega_in); + // make sure the direction we chose is still in the right hemisphere + if (cosNI > 0) { + float cosp = powf(cosTheta, m_exponent); + float common = 0.5f * M_1_PI_F * cosp; + *pdf = (m_exponent + 1) * common; + float out = cosNI * (m_exponent + 2) * common; + *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out; + } + } + } + return LABEL_REFLECT | LABEL_GLOSSY; } -#endif /* __OSL__ */ +#endif /* __OSL__ */ CCL_NAMESPACE_END -#endif /* __BSDF_PHONG_RAMP_H__ */ +#endif /* __BSDF_PHONG_RAMP_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h index 2f65fd54be2..d7795974ef5 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h @@ -25,101 +25,113 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct PrincipledDiffuseBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float roughness; + float roughness; } PrincipledDiffuseBsdf; -ccl_device float3 calculate_principled_diffuse_brdf(const PrincipledDiffuseBsdf *bsdf, - float3 N, float3 V, float3 L, float3 H, float *pdf) +ccl_device float3 calculate_principled_diffuse_brdf( + const PrincipledDiffuseBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf) { - float NdotL = max(dot(N, L), 0.0f); - float NdotV = max(dot(N, V), 0.0f); + float NdotL = max(dot(N, L), 0.0f); + float NdotV = max(dot(N, V), 0.0f); - if(NdotL < 0 || NdotV < 0) { - *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); - } + if (NdotL < 0 || NdotV < 0) { + *pdf = 0.0f; + return make_float3(0.0f, 0.0f, 0.0f); + } - float LdotH = dot(L, H); + float LdotH = dot(L, H); - float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV); - const float Fd90 = 0.5f + 2.0f * LdotH*LdotH * bsdf->roughness; - float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV); + float FL = schlick_fresnel(NdotL), FV = schlick_fresnel(NdotV); + const float Fd90 = 0.5f + 2.0f * LdotH * LdotH * bsdf->roughness; + float Fd = (1.0f * (1.0f - FL) + Fd90 * FL) * (1.0f * (1.0f - FV) + Fd90 * FV); - float value = M_1_PI_F * NdotL * Fd; + float value = M_1_PI_F * NdotL * Fd; - return make_float3(value, value, value); + return make_float3(value, value, value); } ccl_device int bsdf_principled_diffuse_setup(PrincipledDiffuseBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device bool bsdf_principled_diffuse_merge(const ShaderClosure *a, const ShaderClosure *b) { - const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf*)a; - const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf*)b; + const PrincipledDiffuseBsdf *bsdf_a = (const PrincipledDiffuseBsdf *)a; + const PrincipledDiffuseBsdf *bsdf_b = (const PrincipledDiffuseBsdf *)b; - return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness); + return (isequal_float3(bsdf_a->N, bsdf_b->N) && bsdf_a->roughness == bsdf_b->roughness); } -ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, const float3 I, - const float3 omega_in, float *pdf) +ccl_device float3 bsdf_principled_diffuse_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc; - - float3 N = bsdf->N; - float3 V = I; // outgoing - float3 L = omega_in; // incoming - float3 H = normalize(L + V); - - if(dot(N, omega_in) > 0.0f) { - *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; - return calculate_principled_diffuse_brdf(bsdf, N, V, L, H, pdf); - } - else { - *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); - } + const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc; + + float3 N = bsdf->N; + float3 V = I; // outgoing + float3 L = omega_in; // incoming + float3 H = normalize(L + V); + + if (dot(N, omega_in) > 0.0f) { + *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; + return calculate_principled_diffuse_brdf(bsdf, N, V, L, H, pdf); + } + else { + *pdf = 0.0f; + return make_float3(0.0f, 0.0f, 0.0f); + } } -ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc, const float3 I, - const float3 omega_in, float *pdf) +ccl_device float3 bsdf_principled_diffuse_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } ccl_device int bsdf_principled_diffuse_sample(const ShaderClosure *sc, - float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, - float3 *eval, float3 *omega_in, float3 *domega_in_dx, - float3 *domega_in_dy, float *pdf) + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc; + const PrincipledDiffuseBsdf *bsdf = (const PrincipledDiffuseBsdf *)sc; - float3 N = bsdf->N; + float3 N = bsdf->N; - sample_cos_hemisphere(N, randu, randv, omega_in, pdf); + sample_cos_hemisphere(N, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0) { - float3 H = normalize(I + *omega_in); + if (dot(Ng, *omega_in) > 0) { + float3 H = normalize(I + *omega_in); - *eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf); + *eval = calculate_principled_diffuse_brdf(bsdf, N, I, *omega_in, H, pdf); #ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); + // TODO: find a better approximation for the diffuse bounce + *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); + *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); #endif - } - else { - *pdf = 0.0f; - } - return LABEL_REFLECT|LABEL_DIFFUSE; + } + else { + *pdf = 0.0f; + } + return LABEL_REFLECT | LABEL_DIFFUSE; } CCL_NAMESPACE_END -#endif /* __BSDF_PRINCIPLED_DIFFUSE_H__ */ +#endif /* __BSDF_PRINCIPLED_DIFFUSE_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h index ccdcb1babd2..bc522095b3b 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h +++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h @@ -25,87 +25,99 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct PrincipledSheenBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; } PrincipledSheenBsdf; -ccl_device float3 calculate_principled_sheen_brdf(const PrincipledSheenBsdf *bsdf, - float3 N, float3 V, float3 L, float3 H, float *pdf) +ccl_device float3 calculate_principled_sheen_brdf( + const PrincipledSheenBsdf *bsdf, float3 N, float3 V, float3 L, float3 H, float *pdf) { - float NdotL = dot(N, L); - float NdotV = dot(N, V); + float NdotL = dot(N, L); + float NdotV = dot(N, V); - if(NdotL < 0 || NdotV < 0) { - *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); - } + if (NdotL < 0 || NdotV < 0) { + *pdf = 0.0f; + return make_float3(0.0f, 0.0f, 0.0f); + } - float LdotH = dot(L, H); + float LdotH = dot(L, H); - float value = schlick_fresnel(LdotH) * NdotL; + float value = schlick_fresnel(LdotH) * NdotL; - return make_float3(value, value, value); + return make_float3(value, value, value); } ccl_device int bsdf_principled_sheen_setup(PrincipledSheenBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL; + bsdf->type = CLOSURE_BSDF_PRINCIPLED_SHEEN_ID; + return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc, const float3 I, - const float3 omega_in, float *pdf) +ccl_device float3 bsdf_principled_sheen_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc; - - float3 N = bsdf->N; - float3 V = I; // outgoing - float3 L = omega_in; // incoming - float3 H = normalize(L + V); - - if(dot(N, omega_in) > 0.0f) { - *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; - return calculate_principled_sheen_brdf(bsdf, N, V, L, H, pdf); - } - else { - *pdf = 0.0f; - return make_float3(0.0f, 0.0f, 0.0f); - } + const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc; + + float3 N = bsdf->N; + float3 V = I; // outgoing + float3 L = omega_in; // incoming + float3 H = normalize(L + V); + + if (dot(N, omega_in) > 0.0f) { + *pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F; + return calculate_principled_sheen_brdf(bsdf, N, V, L, H, pdf); + } + else { + *pdf = 0.0f; + return make_float3(0.0f, 0.0f, 0.0f); + } } -ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc, const float3 I, - const float3 omega_in, float *pdf) +ccl_device float3 bsdf_principled_sheen_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } ccl_device int bsdf_principled_sheen_sample(const ShaderClosure *sc, - float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, - float3 *eval, float3 *omega_in, float3 *domega_in_dx, - float3 *domega_in_dy, float *pdf) + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc; + const PrincipledSheenBsdf *bsdf = (const PrincipledSheenBsdf *)sc; - float3 N = bsdf->N; + float3 N = bsdf->N; - sample_cos_hemisphere(N, randu, randv, omega_in, pdf); + sample_cos_hemisphere(N, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0) { - float3 H = normalize(I + *omega_in); + if (dot(Ng, *omega_in) > 0) { + float3 H = normalize(I + *omega_in); - *eval = calculate_principled_sheen_brdf(bsdf, N, I, *omega_in, H, pdf); + *eval = calculate_principled_sheen_brdf(bsdf, N, I, *omega_in, H, pdf); #ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the diffuse bounce - *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); - *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); + // TODO: find a better approximation for the diffuse bounce + *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx); + *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy); #endif - } - else { - *pdf = 0.0f; - } - return LABEL_REFLECT|LABEL_DIFFUSE; + } + else { + *pdf = 0.0f; + } + return LABEL_REFLECT | LABEL_DIFFUSE; } CCL_NAMESPACE_END -#endif /* __BSDF_PRINCIPLED_SHEEN_H__ */ +#endif /* __BSDF_PRINCIPLED_SHEEN_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h index 94f1c283af7..c24ba170915 100644 --- a/intern/cycles/kernel/closure/bsdf_reflection.h +++ b/intern/cycles/kernel/closure/bsdf_reflection.h @@ -39,42 +39,59 @@ CCL_NAMESPACE_BEGIN ccl_device int bsdf_reflection_setup(MicrofacetBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_REFLECTION_ID; - return SD_BSDF; + bsdf->type = CLOSURE_BSDF_REFLECTION_ID; + return SD_BSDF; } -ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_reflection_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_reflection_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_reflection_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float3 N = bsdf->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float3 N = bsdf->N; - // only one direction is possible - float cosNO = dot(N, I); - if(cosNO > 0) { - *omega_in = (2 * cosNO) * N - I; - if(dot(Ng, *omega_in) > 0) { + // only one direction is possible + float cosNO = dot(N, I); + if (cosNO > 0) { + *omega_in = (2 * cosNO) * N - I; + if (dot(Ng, *omega_in) > 0) { #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx; - *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy; + *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx; + *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy; #endif - /* Some high number for MIS. */ - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - } - } - return LABEL_REFLECT|LABEL_SINGULAR; + /* Some high number for MIS. */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + } + } + return LABEL_REFLECT | LABEL_SINGULAR; } CCL_NAMESPACE_END -#endif /* __BSDF_REFLECTION_H__ */ +#endif /* __BSDF_REFLECTION_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h index abdd01c7a1d..d4fbe86dac0 100644 --- a/intern/cycles/kernel/closure/bsdf_refraction.h +++ b/intern/cycles/kernel/closure/bsdf_refraction.h @@ -39,51 +39,77 @@ CCL_NAMESPACE_BEGIN ccl_device int bsdf_refraction_setup(MicrofacetBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_REFRACTION_ID; - return SD_BSDF; + bsdf->type = CLOSURE_BSDF_REFRACTION_ID; + return SD_BSDF; } -ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_refraction_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_refraction_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_refraction_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc; - float m_eta = bsdf->ior; - float3 N = bsdf->N; + const MicrofacetBsdf *bsdf = (const MicrofacetBsdf *)sc; + float m_eta = bsdf->ior; + float3 N = bsdf->N; - float3 R, T; + float3 R, T; #ifdef __RAY_DIFFERENTIALS__ - float3 dRdx, dRdy, dTdx, dTdy; + float3 dRdx, dRdy, dTdx, dTdy; #endif - bool inside; - float fresnel; - fresnel = fresnel_dielectric(m_eta, N, I, &R, &T, + bool inside; + float fresnel; + fresnel = fresnel_dielectric(m_eta, + N, + I, + &R, + &T, #ifdef __RAY_DIFFERENTIALS__ - dIdx, dIdy, &dRdx, &dRdy, &dTdx, &dTdy, + dIdx, + dIdy, + &dRdx, + &dRdy, + &dTdx, + &dTdy, #endif - &inside); + &inside); - if(!inside && fresnel != 1.0f) { - /* Some high number for MIS. */ - *pdf = 1e6f; - *eval = make_float3(1e6f, 1e6f, 1e6f); - *omega_in = T; + if (!inside && fresnel != 1.0f) { + /* Some high number for MIS. */ + *pdf = 1e6f; + *eval = make_float3(1e6f, 1e6f, 1e6f); + *omega_in = T; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = dTdx; - *domega_in_dy = dTdy; + *domega_in_dx = dTdx; + *domega_in_dy = dTdy; #endif - } - return LABEL_TRANSMIT|LABEL_SINGULAR; + } + return LABEL_TRANSMIT | LABEL_SINGULAR; } CCL_NAMESPACE_END -#endif /* __BSDF_REFRACTION_H__ */ +#endif /* __BSDF_REFRACTION_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h index 097a56f22eb..f37fd228087 100644 --- a/intern/cycles/kernel/closure/bsdf_toon.h +++ b/intern/cycles/kernel/closure/bsdf_toon.h @@ -36,183 +36,215 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct ToonBsdf { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float size; - float smooth; + float size; + float smooth; } ToonBsdf; /* DIFFUSE TOON */ ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID; - bsdf->size = saturate(bsdf->size); - bsdf->smooth = saturate(bsdf->smooth); + bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID; + bsdf->size = saturate(bsdf->size); + bsdf->smooth = saturate(bsdf->smooth); - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } ccl_device bool bsdf_toon_merge(const ShaderClosure *a, const ShaderClosure *b) { - const ToonBsdf *bsdf_a = (const ToonBsdf*)a; - const ToonBsdf *bsdf_b = (const ToonBsdf*)b; + const ToonBsdf *bsdf_a = (const ToonBsdf *)a; + const ToonBsdf *bsdf_b = (const ToonBsdf *)b; - return (isequal_float3(bsdf_a->N, bsdf_b->N)) && - (bsdf_a->size == bsdf_b->size) && - (bsdf_a->smooth == bsdf_b->smooth); + return (isequal_float3(bsdf_a->N, bsdf_b->N)) && (bsdf_a->size == bsdf_b->size) && + (bsdf_a->smooth == bsdf_b->smooth); } ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle) { - float is; + float is; - if(angle < max_angle) - is = 1.0f; - else if(angle < (max_angle + smooth) && smooth != 0.0f) - is = (1.0f - (angle - max_angle)/smooth); - else - is = 0.0f; + if (angle < max_angle) + is = 1.0f; + else if (angle < (max_angle + smooth) && smooth != 0.0f) + is = (1.0f - (angle - max_angle) / smooth); + else + is = 0.0f; - return make_float3(is, is, is); + return make_float3(is, is, is); } ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth) { - return fminf(max_angle + smooth, M_PI_2_F); + return fminf(max_angle + smooth, M_PI_2_F); } -ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_toon_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const ToonBsdf *bsdf = (const ToonBsdf*)sc; - float max_angle = bsdf->size*M_PI_2_F; - float smooth = bsdf->smooth*M_PI_2_F; - float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f)); + const ToonBsdf *bsdf = (const ToonBsdf *)sc; + float max_angle = bsdf->size * M_PI_2_F; + float smooth = bsdf->smooth * M_PI_2_F; + float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f)); - float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle); + float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle); - if(eval.x > 0.0f) { - float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); + if (eval.x > 0.0f) { + float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); - *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle)); - return *pdf * eval; - } + *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle)); + return *pdf * eval; + } - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_diffuse_toon_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_diffuse_toon_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const ToonBsdf *bsdf = (const ToonBsdf*)sc; - float max_angle = bsdf->size*M_PI_2_F; - float smooth = bsdf->smooth*M_PI_2_F; - float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); - float angle = sample_angle*randu; + const ToonBsdf *bsdf = (const ToonBsdf *)sc; + float max_angle = bsdf->size * M_PI_2_F; + float smooth = bsdf->smooth * M_PI_2_F; + float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); + float angle = sample_angle * randu; - if(sample_angle > 0.0f) { - sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf); + if (sample_angle > 0.0f) { + sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0.0f) { - *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); + if (dot(Ng, *omega_in) > 0.0f) { + *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); #ifdef __RAY_DIFFERENTIALS__ - // TODO: find a better approximation for the bounce - *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + // TODO: find a better approximation for the bounce + *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; #endif - } - else - *pdf = 0.0f; - } - - return LABEL_REFLECT | LABEL_DIFFUSE; + } + else + *pdf = 0.0f; + } + return LABEL_REFLECT | LABEL_DIFFUSE; } /* GLOSSY TOON */ ccl_device int bsdf_glossy_toon_setup(ToonBsdf *bsdf) { - bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID; - bsdf->size = saturate(bsdf->size); - bsdf->smooth = saturate(bsdf->smooth); + bsdf->type = CLOSURE_BSDF_GLOSSY_TOON_ID; + bsdf->size = saturate(bsdf->size); + bsdf->smooth = saturate(bsdf->smooth); - return SD_BSDF|SD_BSDF_HAS_EVAL; + return SD_BSDF | SD_BSDF_HAS_EVAL; } -ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_glossy_toon_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - const ToonBsdf *bsdf = (const ToonBsdf*)sc; - float max_angle = bsdf->size*M_PI_2_F; - float smooth = bsdf->smooth*M_PI_2_F; - float cosNI = dot(bsdf->N, omega_in); - float cosNO = dot(bsdf->N, I); + const ToonBsdf *bsdf = (const ToonBsdf *)sc; + float max_angle = bsdf->size * M_PI_2_F; + float smooth = bsdf->smooth * M_PI_2_F; + float cosNI = dot(bsdf->N, omega_in); + float cosNO = dot(bsdf->N, I); - if(cosNI > 0 && cosNO > 0) { - /* reflect the view vector */ - float3 R = (2 * cosNO) * bsdf->N - I; - float cosRI = dot(R, omega_in); + if (cosNI > 0 && cosNO > 0) { + /* reflect the view vector */ + float3 R = (2 * cosNO) * bsdf->N - I; + float cosRI = dot(R, omega_in); - float angle = safe_acosf(fmaxf(cosRI, 0.0f)); + float angle = safe_acosf(fmaxf(cosRI, 0.0f)); - float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle); - float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); + float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle); + float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); - *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle)); - return *pdf * eval; - } + *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle)); + return *pdf * eval; + } - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_glossy_toon_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_glossy_toon_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const ToonBsdf *bsdf = (const ToonBsdf*)sc; - float max_angle = bsdf->size*M_PI_2_F; - float smooth = bsdf->smooth*M_PI_2_F; - float cosNO = dot(bsdf->N, I); + const ToonBsdf *bsdf = (const ToonBsdf *)sc; + float max_angle = bsdf->size * M_PI_2_F; + float smooth = bsdf->smooth * M_PI_2_F; + float cosNO = dot(bsdf->N, I); - if(cosNO > 0) { - /* reflect the view vector */ - float3 R = (2 * cosNO) * bsdf->N - I; + if (cosNO > 0) { + /* reflect the view vector */ + float3 R = (2 * cosNO) * bsdf->N - I; - float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); - float angle = sample_angle*randu; + float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth); + float angle = sample_angle * randu; - sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf); + sample_uniform_cone(R, sample_angle, randu, randv, omega_in, pdf); - if(dot(Ng, *omega_in) > 0.0f) { - float cosNI = dot(bsdf->N, *omega_in); + if (dot(Ng, *omega_in) > 0.0f) { + float cosNI = dot(bsdf->N, *omega_in); - /* make sure the direction we chose is still in the right hemisphere */ - if(cosNI > 0) { - *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); + /* make sure the direction we chose is still in the right hemisphere */ + if (cosNI > 0) { + *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle); #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; - *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; + *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx; + *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy; #endif - } - else - *pdf = 0.0f; - } - else - *pdf = 0.0f; - } - - return LABEL_GLOSSY | LABEL_REFLECT; + } + else + *pdf = 0.0f; + } + else + *pdf = 0.0f; + } + + return LABEL_GLOSSY | LABEL_REFLECT; } CCL_NAMESPACE_END -#endif /* __BSDF_TOON_H__ */ +#endif /* __BSDF_TOON_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h index 060dff69f52..4e5513499e8 100644 --- a/intern/cycles/kernel/closure/bsdf_transparent.h +++ b/intern/cycles/kernel/closure/bsdf_transparent.h @@ -37,73 +37,91 @@ CCL_NAMESPACE_BEGIN ccl_device void bsdf_transparent_setup(ShaderData *sd, const float3 weight, int path_flag) { - /* Check cutoff weight. */ - float sample_weight = fabsf(average(weight)); - if(!(sample_weight >= CLOSURE_WEIGHT_CUTOFF)) { - return; - } + /* Check cutoff weight. */ + float sample_weight = fabsf(average(weight)); + if (!(sample_weight >= CLOSURE_WEIGHT_CUTOFF)) { + return; + } - if(sd->flag & SD_TRANSPARENT) { - sd->closure_transparent_extinction += weight; + if (sd->flag & SD_TRANSPARENT) { + sd->closure_transparent_extinction += weight; - /* Add weight to existing transparent BSDF. */ - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + /* Add weight to existing transparent BSDF. */ + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { - sc->weight += weight; - sc->sample_weight += sample_weight; - break; - } - } - } - else { - sd->flag |= SD_BSDF|SD_TRANSPARENT; - sd->closure_transparent_extinction = weight; + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { + sc->weight += weight; + sc->sample_weight += sample_weight; + break; + } + } + } + else { + sd->flag |= SD_BSDF | SD_TRANSPARENT; + sd->closure_transparent_extinction = weight; - if(path_flag & PATH_RAY_TERMINATE) { - /* In this case the number of closures is set to zero to disable - * all others, but we still want to get transparency so increase - * the number just for this. */ - sd->num_closure_left = 1; - } + if (path_flag & PATH_RAY_TERMINATE) { + /* In this case the number of closures is set to zero to disable + * all others, but we still want to get transparency so increase + * the number just for this. */ + sd->num_closure_left = 1; + } - /* Create new transparent BSDF. */ - ShaderClosure *bsdf = closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight); + /* Create new transparent BSDF. */ + ShaderClosure *bsdf = closure_alloc( + sd, sizeof(ShaderClosure), CLOSURE_BSDF_TRANSPARENT_ID, weight); - if(bsdf) { - bsdf->sample_weight = sample_weight; - bsdf->N = sd->N; - } - else if(path_flag & PATH_RAY_TERMINATE) { - sd->num_closure_left = 0; - } - } + if (bsdf) { + bsdf->sample_weight = sample_weight; + bsdf->N = sd->N; + } + else if (path_flag & PATH_RAY_TERMINATE) { + sd->num_closure_left = 0; + } + } } -ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_transparent_eval_reflect(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) +ccl_device float3 bsdf_transparent_eval_transmit(const ShaderClosure *sc, + const float3 I, + const float3 omega_in, + float *pdf) { - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device int bsdf_transparent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int bsdf_transparent_sample(const ShaderClosure *sc, + float3 Ng, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - // only one direction is possible - *omega_in = -I; + // only one direction is possible + *omega_in = -I; #ifdef __RAY_DIFFERENTIALS__ - *domega_in_dx = -dIdx; - *domega_in_dy = -dIdy; + *domega_in_dx = -dIdx; + *domega_in_dy = -dIdy; #endif - *pdf = 1; - *eval = make_float3(1, 1, 1); - return LABEL_TRANSMIT|LABEL_TRANSPARENT; + *pdf = 1; + *eval = make_float3(1, 1, 1); + return LABEL_TRANSMIT | LABEL_TRANSPARENT; } CCL_NAMESPACE_END -#endif /* __BSDF_TRANSPARENT_H__ */ +#endif /* __BSDF_TRANSPARENT_H__ */ diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h index 4f3453675c7..a9a27edd7de 100644 --- a/intern/cycles/kernel/closure/bsdf_util.h +++ b/intern/cycles/kernel/closure/bsdf_util.h @@ -35,127 +35,134 @@ CCL_NAMESPACE_BEGIN -ccl_device float fresnel_dielectric( - float eta, const float3 N, - const float3 I, float3 *R, float3 *T, +ccl_device float fresnel_dielectric(float eta, + const float3 N, + const float3 I, + float3 *R, + float3 *T, #ifdef __RAY_DIFFERENTIALS__ - const float3 dIdx, const float3 dIdy, - float3 *dRdx, float3 *dRdy, - float3 *dTdx, float3 *dTdy, + const float3 dIdx, + const float3 dIdy, + float3 *dRdx, + float3 *dRdy, + float3 *dTdx, + float3 *dTdy, #endif - bool *is_inside) + bool *is_inside) { - float cos = dot(N, I), neta; - float3 Nn; + float cos = dot(N, I), neta; + float3 Nn; - // check which side of the surface we are on - if(cos > 0) { - // we are on the outside of the surface, going in - neta = 1 / eta; - Nn = N; - *is_inside = false; - } - else { - // we are inside the surface - cos = -cos; - neta = eta; - Nn = -N; - *is_inside = true; - } + // check which side of the surface we are on + if (cos > 0) { + // we are on the outside of the surface, going in + neta = 1 / eta; + Nn = N; + *is_inside = false; + } + else { + // we are inside the surface + cos = -cos; + neta = eta; + Nn = -N; + *is_inside = true; + } - // compute reflection - *R = (2 * cos)* Nn - I; + // compute reflection + *R = (2 * cos) * Nn - I; #ifdef __RAY_DIFFERENTIALS__ - *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx; - *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy; + *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx; + *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy; #endif - float arg = 1 -(neta * neta *(1 -(cos * cos))); - if(arg < 0) { - *T = make_float3(0.0f, 0.0f, 0.0f); + float arg = 1 - (neta * neta * (1 - (cos * cos))); + if (arg < 0) { + *T = make_float3(0.0f, 0.0f, 0.0f); #ifdef __RAY_DIFFERENTIALS__ - *dTdx = make_float3(0.0f, 0.0f, 0.0f); - *dTdy = make_float3(0.0f, 0.0f, 0.0f); + *dTdx = make_float3(0.0f, 0.0f, 0.0f); + *dTdy = make_float3(0.0f, 0.0f, 0.0f); #endif - return 1; // total internal reflection - } - else { - float dnp = max(sqrtf(arg), 1e-7f); - float nK = (neta * cos)- dnp; - *T = -(neta * I)+(nK * Nn); + return 1; // total internal reflection + } + else { + float dnp = max(sqrtf(arg), 1e-7f); + float nK = (neta * cos) - dnp; + *T = -(neta * I) + (nK * Nn); #ifdef __RAY_DIFFERENTIALS__ - *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn; - *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn; + *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn; + *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn; #endif - // compute Fresnel terms - float cosTheta1 = cos; // N.R - float cosTheta2 = -dot(Nn, *T); - float pPara = (cosTheta1 - eta * cosTheta2)/(cosTheta1 + eta * cosTheta2); - float pPerp = (eta * cosTheta1 - cosTheta2)/(eta * cosTheta1 + cosTheta2); - return 0.5f * (pPara * pPara + pPerp * pPerp); - } + // compute Fresnel terms + float cosTheta1 = cos; // N.R + float cosTheta2 = -dot(Nn, *T); + float pPara = (cosTheta1 - eta * cosTheta2) / (cosTheta1 + eta * cosTheta2); + float pPerp = (eta * cosTheta1 - cosTheta2) / (eta * cosTheta1 + cosTheta2); + return 0.5f * (pPara * pPara + pPerp * pPerp); + } } ccl_device float fresnel_dielectric_cos(float cosi, float eta) { - // compute fresnel reflectance without explicitly computing - // the refracted direction - float c = fabsf(cosi); - float g = eta * eta - 1 + c * c; - if(g > 0) { - g = sqrtf(g); - float A = (g - c)/(g + c); - float B = (c *(g + c)- 1)/(c *(g - c)+ 1); - return 0.5f * A * A *(1 + B * B); - } - return 1.0f; // TIR(no refracted component) + // compute fresnel reflectance without explicitly computing + // the refracted direction + float c = fabsf(cosi); + float g = eta * eta - 1 + c * c; + if (g > 0) { + g = sqrtf(g); + float A = (g - c) / (g + c); + float B = (c * (g + c) - 1) / (c * (g - c) + 1); + return 0.5f * A * A * (1 + B * B); + } + return 1.0f; // TIR(no refracted component) } ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k) { - float3 cosi2 = make_float3(cosi*cosi, cosi*cosi, cosi*cosi); - float3 one = make_float3(1.0f, 1.0f, 1.0f); - float3 tmp_f = eta * eta + k * k; - float3 tmp = tmp_f * cosi2; - float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) / - (tmp + (2.0f * eta * cosi) + one); - float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / - (tmp_f + (2.0f * eta * cosi) + cosi2); - return(Rparl2 + Rperp2) * 0.5f; + float3 cosi2 = make_float3(cosi * cosi, cosi * cosi, cosi * cosi); + float3 one = make_float3(1.0f, 1.0f, 1.0f); + float3 tmp_f = eta * eta + k * k; + float3 tmp = tmp_f * cosi2; + float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) / (tmp + (2.0f * eta * cosi) + one); + float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / (tmp_f + (2.0f * eta * cosi) + cosi2); + return (Rparl2 + Rperp2) * 0.5f; } ccl_device float schlick_fresnel(float u) { - float m = clamp(1.0f - u, 0.0f, 1.0f); - float m2 = m * m; - return m2 * m2 * m; // pow(m, 5) + float m = clamp(1.0f - u, 0.0f, 1.0f); + float m2 = m * m; + return m2 * m2 * m; // pow(m, 5) } ccl_device float smooth_step(float edge0, float edge1, float x) { - float result; - if(x < edge0) result = 0.0f; - else if(x >= edge1) result = 1.0f; - else { - float t = (x - edge0)/(edge1 - edge0); - result = (3.0f-2.0f*t)*(t*t); - } - return result; + float result; + if (x < edge0) + result = 0.0f; + else if (x >= edge1) + result = 1.0f; + else { + float t = (x - edge0) / (edge1 - edge0); + result = (3.0f - 2.0f * t) * (t * t); + } + return result; } /* Calculate the fresnel color which is a blend between white and the F0 color (cspec0) */ -ccl_device_forceinline float3 interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0) { - /* Calculate the fresnel interpolation factor - * The value from fresnel_dielectric_cos(...) has to be normalized because - * the cspec0 keeps the F0 color - */ - float F0_norm = 1.0f / (1.0f - F0); - float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm; +ccl_device_forceinline float3 +interpolate_fresnel_color(float3 L, float3 H, float ior, float F0, float3 cspec0) +{ + /* Calculate the fresnel interpolation factor + * The value from fresnel_dielectric_cos(...) has to be normalized because + * the cspec0 keeps the F0 color + */ + float F0_norm = 1.0f / (1.0f - F0); + float FH = (fresnel_dielectric_cos(dot(L, H), ior) - F0) * F0_norm; - /* Blend between white and a specular color with respect to the fresnel */ - return cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; + /* Blend between white and a specular color with respect to the fresnel */ + return cspec0 * (1.0f - FH) + make_float3(1.0f, 1.0f, 1.0f) * FH; } CCL_NAMESPACE_END -#endif /* __BSDF_UTIL_H__ */ +#endif /* __BSDF_UTIL_H__ */ diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 98c7f23c288..57804eca269 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -20,14 +20,14 @@ CCL_NAMESPACE_BEGIN typedef ccl_addr_space struct Bssrdf { - SHADER_CLOSURE_BASE; - - float3 radius; - float3 albedo; - float sharpness; - float texture_blur; - float roughness; - float channels; + SHADER_CLOSURE_BASE; + + float3 radius; + float3 albedo; + float sharpness; + float texture_blur; + float roughness; + float channels; } Bssrdf; /* Planar Truncated Gaussian @@ -41,41 +41,41 @@ typedef ccl_addr_space struct Bssrdf { ccl_device float bssrdf_gaussian_eval(const float radius, float r) { - /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm - * = 1 - exp(-Rm*Rm/(2*v)) */ - const float v = radius*radius*(0.25f*0.25f); - const float Rm = sqrtf(v*GAUSS_TRUNCATE); + /* integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) from 0 to Rm + * = 1 - exp(-Rm*Rm/(2*v)) */ + const float v = radius * radius * (0.25f * 0.25f); + const float Rm = sqrtf(v * GAUSS_TRUNCATE); - if(r >= Rm) - return 0.0f; + if (r >= Rm) + return 0.0f; - return expf(-r*r/(2.0f*v))/(2.0f*M_PI_F*v); + return expf(-r * r / (2.0f * v)) / (2.0f * M_PI_F * v); } ccl_device float bssrdf_gaussian_pdf(const float radius, float r) { - /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ - const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE); + /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ + const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE); - return bssrdf_gaussian_eval(radius, r) * (1.0f/(area_truncated)); + return bssrdf_gaussian_eval(radius, r) * (1.0f / (area_truncated)); } ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, float *h) { - /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v)) - * r = sqrt(-2*v*logf(xi)) */ - const float v = radius*radius*(0.25f*0.25f); - const float Rm = sqrtf(v*GAUSS_TRUNCATE); + /* xi = integrate (2*pi*r * exp(-r*r/(2*v)))/(2*pi*v)) = -exp(-r^2/(2*v)) + * r = sqrt(-2*v*logf(xi)) */ + const float v = radius * radius * (0.25f * 0.25f); + const float Rm = sqrtf(v * GAUSS_TRUNCATE); - /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ - const float area_truncated = 1.0f - expf(-0.5f*GAUSS_TRUNCATE); + /* 1.0 - expf(-Rm*Rm/(2*v)) simplified */ + const float area_truncated = 1.0f - expf(-0.5f * GAUSS_TRUNCATE); - /* r(xi) */ - const float r_squared = -2.0f*v*logf(1.0f - xi*area_truncated); - *r = sqrtf(r_squared); + /* r(xi) */ + const float r_squared = -2.0f * v * logf(1.0f - xi * area_truncated); + *r = sqrtf(r_squared); - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm*Rm - r_squared); + /* h^2 + r^2 = Rm^2 */ + *h = safe_sqrtf(Rm * Rm - r_squared); } /* Planar Cubic BSSRDF falloff @@ -87,97 +87,97 @@ ccl_device void bssrdf_gaussian_sample(const float radius, float xi, float *r, f ccl_device float bssrdf_cubic_eval(const float radius, const float sharpness, float r) { - if(sharpness == 0.0f) { - const float Rm = radius; - - if(r >= Rm) - return 0.0f; - - /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */ - const float Rm5 = (Rm*Rm) * (Rm*Rm) * Rm; - const float f = Rm - r; - const float num = f*f*f; - - return (10.0f * num) / (Rm5 * M_PI_F); - - } - else { - float Rm = radius*(1.0f + sharpness); - - if(r >= Rm) - return 0.0f; - - /* custom variation with extra sharpness, to match the previous code */ - const float y = 1.0f/(1.0f + sharpness); - float Rmy, ry, ryinv; - - if(sharpness == 1.0f) { - Rmy = sqrtf(Rm); - ry = sqrtf(r); - ryinv = (ry > 0.0f)? 1.0f/ry: 0.0f; - } - else { - Rmy = powf(Rm, y); - ry = powf(r, y); - ryinv = (r > 0.0f)? powf(r, y - 1.0f): 0.0f; - } - - const float Rmy5 = (Rmy*Rmy) * (Rmy*Rmy) * Rmy; - const float f = Rmy - ry; - const float num = f*(f*f)*(y*ryinv); - - return (10.0f * num) / (Rmy5 * M_PI_F); - } + if (sharpness == 0.0f) { + const float Rm = radius; + + if (r >= Rm) + return 0.0f; + + /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */ + const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm; + const float f = Rm - r; + const float num = f * f * f; + + return (10.0f * num) / (Rm5 * M_PI_F); + } + else { + float Rm = radius * (1.0f + sharpness); + + if (r >= Rm) + return 0.0f; + + /* custom variation with extra sharpness, to match the previous code */ + const float y = 1.0f / (1.0f + sharpness); + float Rmy, ry, ryinv; + + if (sharpness == 1.0f) { + Rmy = sqrtf(Rm); + ry = sqrtf(r); + ryinv = (ry > 0.0f) ? 1.0f / ry : 0.0f; + } + else { + Rmy = powf(Rm, y); + ry = powf(r, y); + ryinv = (r > 0.0f) ? powf(r, y - 1.0f) : 0.0f; + } + + const float Rmy5 = (Rmy * Rmy) * (Rmy * Rmy) * Rmy; + const float f = Rmy - ry; + const float num = f * (f * f) * (y * ryinv); + + return (10.0f * num) / (Rmy5 * M_PI_F); + } } ccl_device float bssrdf_cubic_pdf(const float radius, const float sharpness, float r) { - return bssrdf_cubic_eval(radius, sharpness, r); + return bssrdf_cubic_eval(radius, sharpness, r); } /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */ ccl_device_forceinline float bssrdf_cubic_quintic_root_find(float xi) { - /* newton-raphson iteration, usually succeeds in 2-4 iterations, except - * outside 0.02 ... 0.98 where it can go up to 10, so overall performance - * should not be too bad */ - const float tolerance = 1e-6f; - const int max_iteration_count = 10; - float x = 0.25f; - int i; + /* newton-raphson iteration, usually succeeds in 2-4 iterations, except + * outside 0.02 ... 0.98 where it can go up to 10, so overall performance + * should not be too bad */ + const float tolerance = 1e-6f; + const int max_iteration_count = 10; + float x = 0.25f; + int i; - for(i = 0; i < max_iteration_count; i++) { - float x2 = x*x; - float x3 = x2*x; - float nx = (1.0f - x); + for (i = 0; i < max_iteration_count; i++) { + float x2 = x * x; + float x3 = x2 * x; + float nx = (1.0f - x); - float f = 10.0f*x2 - 20.0f*x3 + 15.0f*x2*x2 - 4.0f*x2*x3 - xi; - float f_ = 20.0f*(x*nx)*(nx*nx); + float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi; + float f_ = 20.0f * (x * nx) * (nx * nx); - if(fabsf(f) < tolerance || f_ == 0.0f) - break; + if (fabsf(f) < tolerance || f_ == 0.0f) + break; - x = saturate(x - f/f_); - } + x = saturate(x - f / f_); + } - return x; + return x; } -ccl_device void bssrdf_cubic_sample(const float radius, const float sharpness, float xi, float *r, float *h) +ccl_device void bssrdf_cubic_sample( + const float radius, const float sharpness, float xi, float *r, float *h) { - float Rm = radius; - float r_ = bssrdf_cubic_quintic_root_find(xi); + float Rm = radius; + float r_ = bssrdf_cubic_quintic_root_find(xi); - if(sharpness != 0.0f) { - r_ = powf(r_, 1.0f + sharpness); - Rm *= (1.0f + sharpness); - } + if (sharpness != 0.0f) { + r_ = powf(r_, 1.0f + sharpness); + Rm *= (1.0f + sharpness); + } - r_ *= Rm; - *r = r_; + r_ *= Rm; + *r = r_; - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm*Rm - r_*r_); + /* h^2 + r^2 = Rm^2 */ + *h = safe_sqrtf(Rm * Rm - r_ * r_); } /* Approximate Reflectance Profiles @@ -188,13 +188,13 @@ ccl_device void bssrdf_cubic_sample(const float radius, const float sharpness, f * the mean free length, but still not too big so sampling is still * effective. Might need some further tweaks. */ -#define BURLEY_TRUNCATE 16.0f -#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE) +#define BURLEY_TRUNCATE 16.0f +#define BURLEY_TRUNCATE_CDF 0.9963790093708328f // cdf(BURLEY_TRUNCATE) ccl_device_inline float bssrdf_burley_fitting(float A) { - /* Diffuse surface transmission, equation (6). */ - return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f); + /* Diffuse surface transmission, equation (6). */ + return 1.9f - A + 3.5f * (A - 0.8f) * (A - 0.8f); } /* Scale mean free path length so it gives similar looking result @@ -202,45 +202,44 @@ ccl_device_inline float bssrdf_burley_fitting(float A) */ ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r) { - return 0.25f * M_1_PI_F * r; + return 0.25f * M_1_PI_F * r; } ccl_device void bssrdf_burley_setup(Bssrdf *bssrdf) { - /* Mean free path length. */ - const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius); - /* Surface albedo. */ - const float3 A = bssrdf->albedo; - const float3 s = make_float3(bssrdf_burley_fitting(A.x), - bssrdf_burley_fitting(A.y), - bssrdf_burley_fitting(A.z)); - - bssrdf->radius = l / s; + /* Mean free path length. */ + const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius); + /* Surface albedo. */ + const float3 A = bssrdf->albedo; + const float3 s = make_float3( + bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z)); + + bssrdf->radius = l / s; } ccl_device float bssrdf_burley_eval(const float d, float r) { - const float Rm = BURLEY_TRUNCATE * d; - - if(r >= Rm) - return 0.0f; - - /* Burley refletance profile, equation (3). - * - * NOTES: - * - Surface albedo is already included into sc->weight, no need to - * multiply by this term here. - * - This is normalized diffuse model, so the equation is mutliplied - * by 2*pi, which also matches cdf(). - */ - float exp_r_3_d = expf(-r / (3.0f * d)); - float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d; - return (exp_r_d + exp_r_3_d) / (4.0f*d); + const float Rm = BURLEY_TRUNCATE * d; + + if (r >= Rm) + return 0.0f; + + /* Burley refletance profile, equation (3). + * + * NOTES: + * - Surface albedo is already included into sc->weight, no need to + * multiply by this term here. + * - This is normalized diffuse model, so the equation is mutliplied + * by 2*pi, which also matches cdf(). + */ + float exp_r_3_d = expf(-r / (3.0f * d)); + float exp_r_d = exp_r_3_d * exp_r_3_d * exp_r_3_d; + return (exp_r_d + exp_r_3_d) / (4.0f * d); } ccl_device float bssrdf_burley_pdf(const float d, float r) { - return bssrdf_burley_eval(d, r) * (1.0f/BURLEY_TRUNCATE_CDF); + return bssrdf_burley_eval(d, r) * (1.0f / BURLEY_TRUNCATE_CDF); } /* Find the radius for desired CDF value. @@ -249,52 +248,49 @@ ccl_device float bssrdf_burley_pdf(const float d, float r) */ ccl_device_forceinline float bssrdf_burley_root_find(float xi) { - const float tolerance = 1e-6f; - const int max_iteration_count = 10; - /* Do initial guess based on manual curve fitting, this allows us to reduce - * number of iterations to maximum 4 across the [0..1] range. We keep maximum - * number of iteration higher just to be sure we didn't miss root in some - * corner case. - */ - float r; - if(xi <= 0.9f) { - r = expf(xi * xi * 2.4f) - 1.0f; - } - else { - /* TODO(sergey): Some nicer curve fit is possible here. */ - r = 15.0f; - } - /* Solve against scaled radius. */ - for(int i = 0; i < max_iteration_count; i++) { - float exp_r_3 = expf(-r / 3.0f); - float exp_r = exp_r_3 * exp_r_3 * exp_r_3; - float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi; - float f_ = 0.25f * exp_r + 0.25f * exp_r_3; - - if(fabsf(f) < tolerance || f_ == 0.0f) { - break; - } - - r = r - f/f_; - if(r < 0.0f) { - r = 0.0f; - } - } - return r; + const float tolerance = 1e-6f; + const int max_iteration_count = 10; + /* Do initial guess based on manual curve fitting, this allows us to reduce + * number of iterations to maximum 4 across the [0..1] range. We keep maximum + * number of iteration higher just to be sure we didn't miss root in some + * corner case. + */ + float r; + if (xi <= 0.9f) { + r = expf(xi * xi * 2.4f) - 1.0f; + } + else { + /* TODO(sergey): Some nicer curve fit is possible here. */ + r = 15.0f; + } + /* Solve against scaled radius. */ + for (int i = 0; i < max_iteration_count; i++) { + float exp_r_3 = expf(-r / 3.0f); + float exp_r = exp_r_3 * exp_r_3 * exp_r_3; + float f = 1.0f - 0.25f * exp_r - 0.75f * exp_r_3 - xi; + float f_ = 0.25f * exp_r + 0.25f * exp_r_3; + + if (fabsf(f) < tolerance || f_ == 0.0f) { + break; + } + + r = r - f / f_; + if (r < 0.0f) { + r = 0.0f; + } + } + return r; } -ccl_device void bssrdf_burley_sample(const float d, - float xi, - float *r, - float *h) +ccl_device void bssrdf_burley_sample(const float d, float xi, float *r, float *h) { - const float Rm = BURLEY_TRUNCATE * d; - const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d; + const float Rm = BURLEY_TRUNCATE * d; + const float r_ = bssrdf_burley_root_find(xi * BURLEY_TRUNCATE_CDF) * d; - *r = r_; + *r = r_; - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm*Rm - r_*r_); + /* h^2 + r^2 = Rm^2 */ + *h = safe_sqrtf(Rm * Rm - r_ * r_); } /* None BSSRDF falloff @@ -303,200 +299,195 @@ ccl_device void bssrdf_burley_sample(const float d, ccl_device float bssrdf_none_eval(const float radius, float r) { - const float Rm = radius; - return (r < Rm)? 1.0f: 0.0f; + const float Rm = radius; + return (r < Rm) ? 1.0f : 0.0f; } ccl_device float bssrdf_none_pdf(const float radius, float r) { - /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */ - const float Rm = radius; - const float area = (M_PI_F*Rm*Rm); + /* integrate (2*pi*r)/(pi*Rm*Rm) from 0 to Rm = 1 */ + const float Rm = radius; + const float area = (M_PI_F * Rm * Rm); - return bssrdf_none_eval(radius, r) / area; + return bssrdf_none_eval(radius, r) / area; } ccl_device void bssrdf_none_sample(const float radius, float xi, float *r, float *h) { - /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2 - * r = sqrt(xi)*Rm */ - const float Rm = radius; - const float r_ = sqrtf(xi)*Rm; + /* xi = integrate (2*pi*r)/(pi*Rm*Rm) = r^2/Rm^2 + * r = sqrt(xi)*Rm */ + const float Rm = radius; + const float r_ = sqrtf(xi) * Rm; - *r = r_; + *r = r_; - /* h^2 + r^2 = Rm^2 */ - *h = safe_sqrtf(Rm*Rm - r_*r_); + /* h^2 + r^2 = Rm^2 */ + *h = safe_sqrtf(Rm * Rm - r_ * r_); } /* Generic */ ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight) { - Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight); + Bssrdf *bssrdf = (Bssrdf *)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight); - if(bssrdf == NULL) { - return NULL; - } + if (bssrdf == NULL) { + return NULL; + } - float sample_weight = fabsf(average(weight)); - bssrdf->sample_weight = sample_weight; - return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL; + float sample_weight = fabsf(average(weight)); + bssrdf->sample_weight = sample_weight; + return (sample_weight >= CLOSURE_WEIGHT_CUTOFF) ? bssrdf : NULL; } ccl_device int bssrdf_setup(ShaderData *sd, Bssrdf *bssrdf, ClosureType type) { - int flag = 0; - int bssrdf_channels = 3; - float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f); - - /* Verify if the radii are large enough to sample without precision issues. */ - if(bssrdf->radius.x < BSSRDF_MIN_RADIUS) { - diffuse_weight.x = bssrdf->weight.x; - bssrdf->weight.x = 0.0f; - bssrdf->radius.x = 0.0f; - bssrdf_channels--; - } - if(bssrdf->radius.y < BSSRDF_MIN_RADIUS) { - diffuse_weight.y = bssrdf->weight.y; - bssrdf->weight.y = 0.0f; - bssrdf->radius.y = 0.0f; - bssrdf_channels--; - } - if(bssrdf->radius.z < BSSRDF_MIN_RADIUS) { - diffuse_weight.z = bssrdf->weight.z; - bssrdf->weight.z = 0.0f; - bssrdf->radius.z = 0.0f; - bssrdf_channels--; - } - - if(bssrdf_channels < 3) { - /* Add diffuse BSDF if any radius too small. */ + int flag = 0; + int bssrdf_channels = 3; + float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f); + + /* Verify if the radii are large enough to sample without precision issues. */ + if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) { + diffuse_weight.x = bssrdf->weight.x; + bssrdf->weight.x = 0.0f; + bssrdf->radius.x = 0.0f; + bssrdf_channels--; + } + if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) { + diffuse_weight.y = bssrdf->weight.y; + bssrdf->weight.y = 0.0f; + bssrdf->radius.y = 0.0f; + bssrdf_channels--; + } + if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) { + diffuse_weight.z = bssrdf->weight.z; + bssrdf->weight.z = 0.0f; + bssrdf->radius.z = 0.0f; + bssrdf_channels--; + } + + if (bssrdf_channels < 3) { + /* Add diffuse BSDF if any radius too small. */ #ifdef __PRINCIPLED__ - if(type == CLOSURE_BSSRDF_PRINCIPLED_ID || - type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) - { - float roughness = bssrdf->roughness; - float3 N = bssrdf->N; - - PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight); - - if(bsdf) { - bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID; - bsdf->N = N; - bsdf->roughness = roughness; - flag |= bsdf_principled_diffuse_setup(bsdf); - } - } - else -#endif /* __PRINCIPLED__ */ - { - DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight); - - if(bsdf) { - bsdf->type = CLOSURE_BSDF_BSSRDF_ID; - bsdf->N = bssrdf->N; - flag |= bsdf_diffuse_setup(bsdf); - } - } - } - - /* Setup BSSRDF if radius is large enough. */ - if(bssrdf_channels > 0) { - bssrdf->type = type; - bssrdf->channels = bssrdf_channels; - bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels; - bssrdf->texture_blur = saturate(bssrdf->texture_blur); - bssrdf->sharpness = saturate(bssrdf->sharpness); - - if(type == CLOSURE_BSSRDF_BURLEY_ID || - type == CLOSURE_BSSRDF_PRINCIPLED_ID || - type == CLOSURE_BSSRDF_RANDOM_WALK_ID || - type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) - { - bssrdf_burley_setup(bssrdf); - } - - flag |= SD_BSSRDF; - } - else { - bssrdf->type = type; - bssrdf->sample_weight = 0.0f; - } - - return flag; + if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) { + float roughness = bssrdf->roughness; + float3 N = bssrdf->N; + + PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc( + sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight); + + if (bsdf) { + bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID; + bsdf->N = N; + bsdf->roughness = roughness; + flag |= bsdf_principled_diffuse_setup(bsdf); + } + } + else +#endif /* __PRINCIPLED__ */ + { + DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), diffuse_weight); + + if (bsdf) { + bsdf->type = CLOSURE_BSDF_BSSRDF_ID; + bsdf->N = bssrdf->N; + flag |= bsdf_diffuse_setup(bsdf); + } + } + } + + /* Setup BSSRDF if radius is large enough. */ + if (bssrdf_channels > 0) { + bssrdf->type = type; + bssrdf->channels = bssrdf_channels; + bssrdf->sample_weight = fabsf(average(bssrdf->weight)) * bssrdf->channels; + bssrdf->texture_blur = saturate(bssrdf->texture_blur); + bssrdf->sharpness = saturate(bssrdf->sharpness); + + if (type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID || + type == CLOSURE_BSSRDF_RANDOM_WALK_ID || + type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) { + bssrdf_burley_setup(bssrdf); + } + + flag |= SD_BSSRDF; + } + else { + bssrdf->type = type; + bssrdf->sample_weight = 0.0f; + } + + return flag; } ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float *h) { - const Bssrdf *bssrdf = (const Bssrdf*)sc; - float radius; - - /* Sample color channel and reuse random number. Only a subset of channels - * may be used if their radius was too small to handle as BSSRDF. */ - xi *= bssrdf->channels; - - if(xi < 1.0f) { - radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.x: - (bssrdf->radius.y > 0.0f)? bssrdf->radius.y: - bssrdf->radius.z; - } - else if(xi < 2.0f) { - xi -= 1.0f; - radius = (bssrdf->radius.x > 0.0f)? bssrdf->radius.y: - bssrdf->radius.z; - } - else { - xi -= 2.0f; - radius = bssrdf->radius.z; - } - - /* Sample BSSRDF. */ - if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) { - bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h); - } - else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID){ - bssrdf_gaussian_sample(radius, xi, r, h); - } - else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ - bssrdf_burley_sample(radius, xi, r, h); - } + const Bssrdf *bssrdf = (const Bssrdf *)sc; + float radius; + + /* Sample color channel and reuse random number. Only a subset of channels + * may be used if their radius was too small to handle as BSSRDF. */ + xi *= bssrdf->channels; + + if (xi < 1.0f) { + radius = (bssrdf->radius.x > 0.0f) ? + bssrdf->radius.x : + (bssrdf->radius.y > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z; + } + else if (xi < 2.0f) { + xi -= 1.0f; + radius = (bssrdf->radius.x > 0.0f) ? bssrdf->radius.y : bssrdf->radius.z; + } + else { + xi -= 2.0f; + radius = bssrdf->radius.z; + } + + /* Sample BSSRDF. */ + if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) { + bssrdf_cubic_sample(radius, bssrdf->sharpness, xi, r, h); + } + else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { + bssrdf_gaussian_sample(radius, xi, r, h); + } + else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ + bssrdf_burley_sample(radius, xi, r, h); + } } ccl_device float bssrdf_channel_pdf(const Bssrdf *bssrdf, float radius, float r) { - if(radius == 0.0f) { - return 0.0f; - } - else if(bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) { - return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r); - } - else if(bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { - return bssrdf_gaussian_pdf(radius, r); - } - else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ - return bssrdf_burley_pdf(radius, r); - } + if (radius == 0.0f) { + return 0.0f; + } + else if (bssrdf->type == CLOSURE_BSSRDF_CUBIC_ID) { + return bssrdf_cubic_pdf(radius, bssrdf->sharpness, r); + } + else if (bssrdf->type == CLOSURE_BSSRDF_GAUSSIAN_ID) { + return bssrdf_gaussian_pdf(radius, r); + } + else { /*if(bssrdf->type == CLOSURE_BSSRDF_BURLEY_ID || bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID)*/ + return bssrdf_burley_pdf(radius, r); + } } ccl_device_forceinline float3 bssrdf_eval(const ShaderClosure *sc, float r) { - const Bssrdf *bssrdf = (const Bssrdf*)sc; + const Bssrdf *bssrdf = (const Bssrdf *)sc; - return make_float3( - bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r), - bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r), - bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r)); + return make_float3(bssrdf_channel_pdf(bssrdf, bssrdf->radius.x, r), + bssrdf_channel_pdf(bssrdf, bssrdf->radius.y, r), + bssrdf_channel_pdf(bssrdf, bssrdf->radius.z, r)); } ccl_device_forceinline float bssrdf_pdf(const ShaderClosure *sc, float r) { - const Bssrdf *bssrdf = (const Bssrdf*)sc; - float3 pdf = bssrdf_eval(sc, r); + const Bssrdf *bssrdf = (const Bssrdf *)sc; + float3 pdf = bssrdf_eval(sc, r); - return (pdf.x + pdf.y + pdf.z) / bssrdf->channels; + return (pdf.x + pdf.y + pdf.z) / bssrdf->channels; } CCL_NAMESPACE_END -#endif /* __KERNEL_BSSRDF_H__ */ +#endif /* __KERNEL_BSSRDF_H__ */ diff --git a/intern/cycles/kernel/closure/emissive.h b/intern/cycles/kernel/closure/emissive.h index a7f4a2a7327..911382e6865 100644 --- a/intern/cycles/kernel/closure/emissive.h +++ b/intern/cycles/kernel/closure/emissive.h @@ -36,26 +36,26 @@ CCL_NAMESPACE_BEGIN ccl_device void background_setup(ShaderData *sd, const float3 weight) { - if(sd->flag & SD_EMISSION) { - sd->closure_emission_background += weight; - } - else { - sd->flag |= SD_EMISSION; - sd->closure_emission_background = weight; - } + if (sd->flag & SD_EMISSION) { + sd->closure_emission_background += weight; + } + else { + sd->flag |= SD_EMISSION; + sd->closure_emission_background = weight; + } } /* EMISSION CLOSURE */ ccl_device void emission_setup(ShaderData *sd, const float3 weight) { - if(sd->flag & SD_EMISSION) { - sd->closure_emission_background += weight; - } - else { - sd->flag |= SD_EMISSION; - sd->closure_emission_background = weight; - } + if (sd->flag & SD_EMISSION) { + sd->closure_emission_background += weight; + } + else { + sd->flag |= SD_EMISSION; + sd->closure_emission_background = weight; + } } /* return the probability distribution function in the direction I, @@ -63,21 +63,21 @@ ccl_device void emission_setup(ShaderData *sd, const float3 weight) * the PDF computed by sample(). */ ccl_device float emissive_pdf(const float3 Ng, const float3 I) { - float cosNO = fabsf(dot(Ng, I)); - return (cosNO > 0.0f)? 1.0f: 0.0f; + float cosNO = fabsf(dot(Ng, I)); + return (cosNO > 0.0f) ? 1.0f : 0.0f; } -ccl_device void emissive_sample(const float3 Ng, float randu, float randv, - float3 *omega_out, float *pdf) +ccl_device void emissive_sample( + const float3 Ng, float randu, float randv, float3 *omega_out, float *pdf) { - /* todo: not implemented and used yet */ + /* todo: not implemented and used yet */ } ccl_device float3 emissive_simple_eval(const float3 Ng, const float3 I) { - float res = emissive_pdf(Ng, I); + float res = emissive_pdf(Ng, I); - return make_float3(res, res, res); + return make_float3(res, res, res); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h index 872d06c936a..473bc0e8a82 100644 --- a/intern/cycles/kernel/closure/volume.h +++ b/intern/cycles/kernel/closure/volume.h @@ -23,21 +23,21 @@ CCL_NAMESPACE_BEGIN ccl_device void volume_extinction_setup(ShaderData *sd, float3 weight) { - if(sd->flag & SD_EXTINCTION) { - sd->closure_transparent_extinction += weight; - } - else { - sd->flag |= SD_EXTINCTION; - sd->closure_transparent_extinction = weight; - } + if (sd->flag & SD_EXTINCTION) { + sd->closure_transparent_extinction += weight; + } + else { + sd->flag |= SD_EXTINCTION; + sd->closure_transparent_extinction = weight; + } } /* HENYEY-GREENSTEIN CLOSURE */ typedef ccl_addr_space struct HenyeyGreensteinVolume { - SHADER_CLOSURE_BASE; + SHADER_CLOSURE_BASE; - float g; + float g; } HenyeyGreensteinVolume; /* Given cosine between rays, return probability density that a photon bounces @@ -45,119 +45,152 @@ typedef ccl_addr_space struct HenyeyGreensteinVolume { * uniform sphere. g=0 uniform diffuse-like, g=1 close to sharp single ray. */ ccl_device float single_peaked_henyey_greenstein(float cos_theta, float g) { - return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) * (M_1_PI_F * 0.25f); + return ((1.0f - g * g) / safe_powf(1.0f + g * g - 2.0f * g * cos_theta, 1.5f)) * + (M_1_PI_F * 0.25f); }; ccl_device int volume_henyey_greenstein_setup(HenyeyGreensteinVolume *volume) { - volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID; + volume->type = CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID; - /* clamp anisotropy to avoid delta function */ - volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f); + /* clamp anisotropy to avoid delta function */ + volume->g = signf(volume->g) * min(fabsf(volume->g), 1.0f - 1e-3f); - return SD_SCATTER; + return SD_SCATTER; } ccl_device bool volume_henyey_greenstein_merge(const ShaderClosure *a, const ShaderClosure *b) { - const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume*)a; - const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume*)b; + const HenyeyGreensteinVolume *volume_a = (const HenyeyGreensteinVolume *)a; + const HenyeyGreensteinVolume *volume_b = (const HenyeyGreensteinVolume *)b; - return (volume_a->g == volume_b->g); + return (volume_a->g == volume_b->g); } -ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, const float3 I, float3 omega_in, float *pdf) +ccl_device float3 volume_henyey_greenstein_eval_phase(const ShaderClosure *sc, + const float3 I, + float3 omega_in, + float *pdf) { - const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc; - float g = volume->g; - - /* note that I points towards the viewer */ - if(fabsf(g) < 1e-3f) { - *pdf = M_1_PI_F * 0.25f; - } - else { - float cos_theta = dot(-I, omega_in); - *pdf = single_peaked_henyey_greenstein(cos_theta, g); - } - - return make_float3(*pdf, *pdf, *pdf); + const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc; + float g = volume->g; + + /* note that I points towards the viewer */ + if (fabsf(g) < 1e-3f) { + *pdf = M_1_PI_F * 0.25f; + } + else { + float cos_theta = dot(-I, omega_in); + *pdf = single_peaked_henyey_greenstein(cos_theta, g); + } + + return make_float3(*pdf, *pdf, *pdf); } -ccl_device float3 henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf) +ccl_device float3 +henyey_greenstrein_sample(float3 D, float g, float randu, float randv, float *pdf) { - /* match pdf for small g */ - float cos_theta; - bool isotropic = fabsf(g) < 1e-3f; - - if(isotropic) { - cos_theta = (1.0f - 2.0f * randu); - if(pdf) { - *pdf = M_1_PI_F * 0.25f; - } - } - else { - float k = (1.0f - g * g) / (1.0f - g + 2.0f * g * randu); - cos_theta = (1.0f + g * g - k * k) / (2.0f * g); - if(pdf) { - *pdf = single_peaked_henyey_greenstein(cos_theta, g); - } - } - - float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta); - float phi = M_2PI_F * randv; - float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta); - - float3 T, B; - make_orthonormals(D, &T, &B); - dir = dir.x * T + dir.y * B + dir.z * D; - - return dir; + /* match pdf for small g */ + float cos_theta; + bool isotropic = fabsf(g) < 1e-3f; + + if (isotropic) { + cos_theta = (1.0f - 2.0f * randu); + if (pdf) { + *pdf = M_1_PI_F * 0.25f; + } + } + else { + float k = (1.0f - g * g) / (1.0f - g + 2.0f * g * randu); + cos_theta = (1.0f + g * g - k * k) / (2.0f * g); + if (pdf) { + *pdf = single_peaked_henyey_greenstein(cos_theta, g); + } + } + + float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta); + float phi = M_2PI_F * randv; + float3 dir = make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cos_theta); + + float3 T, B; + make_orthonormals(D, &T, &B); + dir = dir.x * T + dir.y * B + dir.z * D; + + return dir; } -ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, - float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) +ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, + float3 I, + float3 dIdx, + float3 dIdy, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + float3 *domega_in_dx, + float3 *domega_in_dy, + float *pdf) { - const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume*)sc; - float g = volume->g; + const HenyeyGreensteinVolume *volume = (const HenyeyGreensteinVolume *)sc; + float g = volume->g; - /* note that I points towards the viewer and so is used negated */ - *omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf); - *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */ + /* note that I points towards the viewer and so is used negated */ + *omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf); + *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */ #ifdef __RAY_DIFFERENTIALS__ - /* todo: implement ray differential estimation */ - *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f); - *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f); + /* todo: implement ray differential estimation */ + *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f); + *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return LABEL_VOLUME_SCATTER; + return LABEL_VOLUME_SCATTER; } /* VOLUME CLOSURE */ -ccl_device float3 volume_phase_eval(const ShaderData *sd, const ShaderClosure *sc, float3 omega_in, float *pdf) +ccl_device float3 volume_phase_eval(const ShaderData *sd, + const ShaderClosure *sc, + float3 omega_in, + float *pdf) { - kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID); + kernel_assert(sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID); - return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); + return volume_henyey_greenstein_eval_phase(sc, sd->I, omega_in, pdf); } -ccl_device int volume_phase_sample(const ShaderData *sd, const ShaderClosure *sc, float randu, - float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device int volume_phase_sample(const ShaderData *sd, + const ShaderClosure *sc, + float randu, + float randv, + float3 *eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { - int label; - - switch(sc->type) { - case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: - label = volume_henyey_greenstein_sample(sc, sd->I, sd->dI.dx, sd->dI.dy, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf); - break; - default: - *eval = make_float3(0.0f, 0.0f, 0.0f); - label = LABEL_NONE; - break; - } - - return label; + int label; + + switch (sc->type) { + case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID: + label = volume_henyey_greenstein_sample(sc, + sd->I, + sd->dI.dx, + sd->dI.dy, + randu, + randv, + eval, + omega_in, + &domega_in->dx, + &domega_in->dy, + pdf); + break; + default: + *eval = make_float3(0.0f, 0.0f, 0.0f); + label = LABEL_NONE; + break; + } + + return label; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/filter/filter.h b/intern/cycles/kernel/filter/filter.h index 4209d69ee73..b067e53a8bf 100644 --- a/intern/cycles/kernel/filter/filter.h +++ b/intern/cycles/kernel/filter/filter.h @@ -25,8 +25,8 @@ CCL_NAMESPACE_BEGIN -#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z -#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name) +#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z +#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name) #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name) #define KERNEL_ARCH cpu @@ -49,4 +49,4 @@ CCL_NAMESPACE_BEGIN CCL_NAMESPACE_END -#endif /* __FILTER_H__ */ +#endif /* __FILTER_H__ */ diff --git a/intern/cycles/kernel/filter/filter_defines.h b/intern/cycles/kernel/filter/filter_defines.h index cb04aac35f4..0e51eeef92f 100644 --- a/intern/cycles/kernel/filter/filter_defines.h +++ b/intern/cycles/kernel/filter/filter_defines.h @@ -18,59 +18,56 @@ #define __FILTER_DEFINES_H__ #define DENOISE_FEATURES 11 -#define TRANSFORM_SIZE (DENOISE_FEATURES*DENOISE_FEATURES) -#define XTWX_SIZE (((DENOISE_FEATURES+1)*(DENOISE_FEATURES+2))/2) -#define XTWY_SIZE (DENOISE_FEATURES+1) +#define TRANSFORM_SIZE (DENOISE_FEATURES * DENOISE_FEATURES) +#define XTWX_SIZE (((DENOISE_FEATURES + 1) * (DENOISE_FEATURES + 2)) / 2) +#define XTWY_SIZE (DENOISE_FEATURES + 1) #define DENOISE_MAX_FRAMES 16 typedef struct TileInfo { - int offsets[9]; - int strides[9]; - int x[4]; - int y[4]; - int from_render; - int frames[DENOISE_MAX_FRAMES]; - int num_frames; - /* TODO(lukas): CUDA doesn't have uint64_t... */ + int offsets[9]; + int strides[9]; + int x[4]; + int y[4]; + int from_render; + int frames[DENOISE_MAX_FRAMES]; + int num_frames; + /* TODO(lukas): CUDA doesn't have uint64_t... */ #ifdef __KERNEL_OPENCL__ - ccl_global float *buffers[9]; + ccl_global float *buffers[9]; #else - long long int buffers[9]; + long long int buffers[9]; #endif } TileInfo; #ifdef __KERNEL_OPENCL__ -# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info, \ - ccl_global float *tile_buffer_1, \ - ccl_global float *tile_buffer_2, \ - ccl_global float *tile_buffer_3, \ - ccl_global float *tile_buffer_4, \ - ccl_global float *tile_buffer_5, \ - ccl_global float *tile_buffer_6, \ - ccl_global float *tile_buffer_7, \ - ccl_global float *tile_buffer_8, \ - ccl_global float *tile_buffer_9 -# define CCL_FILTER_TILE_INFO_ARG tile_info, \ - tile_buffer_1, tile_buffer_2, tile_buffer_3, \ - tile_buffer_4, tile_buffer_5, tile_buffer_6, \ - tile_buffer_7, tile_buffer_8, tile_buffer_9 -# define ccl_get_tile_buffer(id) (id == 0 ? tile_buffer_1 \ - : id == 1 ? tile_buffer_2 \ - : id == 2 ? tile_buffer_3 \ - : id == 3 ? tile_buffer_4 \ - : id == 4 ? tile_buffer_5 \ - : id == 5 ? tile_buffer_6 \ - : id == 6 ? tile_buffer_7 \ - : id == 7 ? tile_buffer_8 \ - : tile_buffer_9) +# define CCL_FILTER_TILE_INFO \ + ccl_global TileInfo *tile_info, ccl_global float *tile_buffer_1, \ + ccl_global float *tile_buffer_2, ccl_global float *tile_buffer_3, \ + ccl_global float *tile_buffer_4, ccl_global float *tile_buffer_5, \ + ccl_global float *tile_buffer_6, ccl_global float *tile_buffer_7, \ + ccl_global float *tile_buffer_8, ccl_global float *tile_buffer_9 +# define CCL_FILTER_TILE_INFO_ARG \ + tile_info, tile_buffer_1, tile_buffer_2, tile_buffer_3, tile_buffer_4, tile_buffer_5, \ + tile_buffer_6, tile_buffer_7, tile_buffer_8, tile_buffer_9 +# define ccl_get_tile_buffer(id) \ + (id == 0 ? tile_buffer_1 : \ + id == 1 ? \ + tile_buffer_2 : \ + id == 2 ? \ + tile_buffer_3 : \ + id == 3 ? tile_buffer_4 : \ + id == 4 ? tile_buffer_5 : \ + id == 5 ? tile_buffer_6 : \ + id == 6 ? tile_buffer_7 : \ + id == 7 ? tile_buffer_8 : tile_buffer_9) #else # ifdef __KERNEL_CUDA__ -# define CCL_FILTER_TILE_INFO ccl_global TileInfo* tile_info +# define CCL_FILTER_TILE_INFO ccl_global TileInfo *tile_info # else -# define CCL_FILTER_TILE_INFO TileInfo* tile_info +# define CCL_FILTER_TILE_INFO TileInfo *tile_info # endif # define ccl_get_tile_buffer(id) (tile_info->buffers[id]) #endif -#endif /* __FILTER_DEFINES_H__*/ +#endif /* __FILTER_DEFINES_H__*/ diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h index e1ea6487aa9..809ccfe8be6 100644 --- a/intern/cycles/kernel/filter/filter_features.h +++ b/intern/cycles/kernel/filter/filter_features.h @@ -14,22 +14,25 @@ * limitations under the License. */ - CCL_NAMESPACE_BEGIN +CCL_NAMESPACE_BEGIN #define ccl_get_feature(buffer, pass) (buffer)[(pass)*pass_stride] /* Loop over the pixels in the range [low.x, high.x) x [low.y, high.y).+ * pixel_buffer always points to the current pixel in the first pass. * Repeat the loop for every secondary frame if there are any. */ -#define FOR_PIXEL_WINDOW for(int frame = 0; frame < tile_info->num_frames; frame++) { \ - pixel.z = tile_info->frames[frame]; \ - pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x) + frame*frame_stride; \ - for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \ - for(pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) { +#define FOR_PIXEL_WINDOW \ + for (int frame = 0; frame < tile_info->num_frames; frame++) { \ + pixel.z = tile_info->frames[frame]; \ + pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \ + frame * frame_stride; \ + for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \ + for (pixel.x = low.x; pixel.x < high.x; pixel.x++, pixel_buffer++) { -#define END_FOR_PIXEL_WINDOW } \ - pixel_buffer += buffer_w - (high.x - low.x); \ - } \ - } +#define END_FOR_PIXEL_WINDOW \ + } \ + pixel_buffer += buffer_w - (high.x - low.x); \ + } \ + } ccl_device_inline void filter_get_features(int3 pixel, const ccl_global float *ccl_restrict buffer, @@ -38,24 +41,24 @@ ccl_device_inline void filter_get_features(int3 pixel, const float *ccl_restrict mean, int pass_stride) { - features[0] = pixel.x; - features[1] = pixel.y; - features[2] = fabsf(ccl_get_feature(buffer, 0)); - features[3] = ccl_get_feature(buffer, 1); - features[4] = ccl_get_feature(buffer, 2); - features[5] = ccl_get_feature(buffer, 3); - features[6] = ccl_get_feature(buffer, 4); - features[7] = ccl_get_feature(buffer, 5); - features[8] = ccl_get_feature(buffer, 6); - features[9] = ccl_get_feature(buffer, 7); - if(use_time) { - features[10] = pixel.z; - } - if(mean) { - for(int i = 0; i < (use_time? 11 : 10); i++) { - features[i] -= mean[i]; - } - } + features[0] = pixel.x; + features[1] = pixel.y; + features[2] = fabsf(ccl_get_feature(buffer, 0)); + features[3] = ccl_get_feature(buffer, 1); + features[4] = ccl_get_feature(buffer, 2); + features[5] = ccl_get_feature(buffer, 3); + features[6] = ccl_get_feature(buffer, 4); + features[7] = ccl_get_feature(buffer, 5); + features[8] = ccl_get_feature(buffer, 6); + features[9] = ccl_get_feature(buffer, 7); + if (use_time) { + features[10] = pixel.z; + } + if (mean) { + for (int i = 0; i < (use_time ? 11 : 10); i++) { + features[i] -= mean[i]; + } + } } ccl_device_inline void filter_get_feature_scales(int3 pixel, @@ -65,38 +68,39 @@ ccl_device_inline void filter_get_feature_scales(int3 pixel, const float *ccl_restrict mean, int pass_stride) { - scales[0] = fabsf(pixel.x - mean[0]); - scales[1] = fabsf(pixel.y - mean[1]); - scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]); - scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3], - ccl_get_feature(buffer, 2) - mean[4], - ccl_get_feature(buffer, 3) - mean[5])); - scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]); - scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7], - ccl_get_feature(buffer, 6) - mean[8], - ccl_get_feature(buffer, 7) - mean[9])); - if(use_time) { - scales[6] = fabsf(pixel.z - mean[10]); - } + scales[0] = fabsf(pixel.x - mean[0]); + scales[1] = fabsf(pixel.y - mean[1]); + scales[2] = fabsf(fabsf(ccl_get_feature(buffer, 0)) - mean[2]); + scales[3] = len_squared(make_float3(ccl_get_feature(buffer, 1) - mean[3], + ccl_get_feature(buffer, 2) - mean[4], + ccl_get_feature(buffer, 3) - mean[5])); + scales[4] = fabsf(ccl_get_feature(buffer, 4) - mean[6]); + scales[5] = len_squared(make_float3(ccl_get_feature(buffer, 5) - mean[7], + ccl_get_feature(buffer, 6) - mean[8], + ccl_get_feature(buffer, 7) - mean[9])); + if (use_time) { + scales[6] = fabsf(pixel.z - mean[10]); + } } ccl_device_inline void filter_calculate_scale(float *scale, bool use_time) { - scale[0] = 1.0f/max(scale[0], 0.01f); - scale[1] = 1.0f/max(scale[1], 0.01f); - scale[2] = 1.0f/max(scale[2], 0.01f); - if(use_time) { - scale[10] = 1.0f/max(scale[6], 0.01f); - } - scale[6] = 1.0f/max(scale[4], 0.01f); - scale[7] = scale[8] = scale[9] = 1.0f/max(sqrtf(scale[5]), 0.01f); - scale[3] = scale[4] = scale[5] = 1.0f/max(sqrtf(scale[3]), 0.01f); + scale[0] = 1.0f / max(scale[0], 0.01f); + scale[1] = 1.0f / max(scale[1], 0.01f); + scale[2] = 1.0f / max(scale[2], 0.01f); + if (use_time) { + scale[10] = 1.0f / max(scale[6], 0.01f); + } + scale[6] = 1.0f / max(scale[4], 0.01f); + scale[7] = scale[8] = scale[9] = 1.0f / max(sqrtf(scale[5]), 0.01f); + scale[3] = scale[4] = scale[5] = 1.0f / max(sqrtf(scale[3]), 0.01f); } ccl_device_inline float3 filter_get_color(const ccl_global float *ccl_restrict buffer, int pass_stride) { - return make_float3(ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10)); + return make_float3( + ccl_get_feature(buffer, 8), ccl_get_feature(buffer, 9), ccl_get_feature(buffer, 10)); } ccl_device_inline void design_row_add(float *design_row, @@ -107,42 +111,44 @@ ccl_device_inline void design_row_add(float *design_row, float feature, int transform_row_stride) { - for(int i = 0; i < rank; i++) { - design_row[1+i] += transform[(row*transform_row_stride + i)*stride]*feature; - } + for (int i = 0; i < rank; i++) { + design_row[1 + i] += transform[(row * transform_row_stride + i) * stride] * feature; + } } /* Fill the design row. */ -ccl_device_inline void filter_get_design_row_transform(int3 p_pixel, - const ccl_global float *ccl_restrict p_buffer, - int3 q_pixel, - const ccl_global float *ccl_restrict q_buffer, - int pass_stride, - int rank, - float *design_row, - const ccl_global float *ccl_restrict transform, - int stride, - bool use_time) +ccl_device_inline void filter_get_design_row_transform( + int3 p_pixel, + const ccl_global float *ccl_restrict p_buffer, + int3 q_pixel, + const ccl_global float *ccl_restrict q_buffer, + int pass_stride, + int rank, + float *design_row, + const ccl_global float *ccl_restrict transform, + int stride, + bool use_time) { - int num_features = use_time? 11 : 10; + int num_features = use_time ? 11 : 10; - design_row[0] = 1.0f; - math_vector_zero(design_row+1, rank); + design_row[0] = 1.0f; + math_vector_zero(design_row + 1, rank); -#define DESIGN_ROW_ADD(I, F) design_row_add(design_row, rank, transform, stride, I, F, num_features); - DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x); - DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y); - DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0))); - DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1)); - DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2)); - DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3)); - DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4)); - DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5)); - DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6)); - DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7)); - if(use_time) { - DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z) - } +#define DESIGN_ROW_ADD(I, F) \ + design_row_add(design_row, rank, transform, stride, I, F, num_features); + DESIGN_ROW_ADD(0, q_pixel.x - p_pixel.x); + DESIGN_ROW_ADD(1, q_pixel.y - p_pixel.y); + DESIGN_ROW_ADD(2, fabsf(ccl_get_feature(q_buffer, 0)) - fabsf(ccl_get_feature(p_buffer, 0))); + DESIGN_ROW_ADD(3, ccl_get_feature(q_buffer, 1) - ccl_get_feature(p_buffer, 1)); + DESIGN_ROW_ADD(4, ccl_get_feature(q_buffer, 2) - ccl_get_feature(p_buffer, 2)); + DESIGN_ROW_ADD(5, ccl_get_feature(q_buffer, 3) - ccl_get_feature(p_buffer, 3)); + DESIGN_ROW_ADD(6, ccl_get_feature(q_buffer, 4) - ccl_get_feature(p_buffer, 4)); + DESIGN_ROW_ADD(7, ccl_get_feature(q_buffer, 5) - ccl_get_feature(p_buffer, 5)); + DESIGN_ROW_ADD(8, ccl_get_feature(q_buffer, 6) - ccl_get_feature(p_buffer, 6)); + DESIGN_ROW_ADD(9, ccl_get_feature(q_buffer, 7) - ccl_get_feature(p_buffer, 7)); + if (use_time) { + DESIGN_ROW_ADD(10, q_pixel.z - p_pixel.z) + } #undef DESIGN_ROW_ADD } diff --git a/intern/cycles/kernel/filter/filter_features_sse.h b/intern/cycles/kernel/filter/filter_features_sse.h index 5dd001ffb93..1e0d6e93453 100644 --- a/intern/cycles/kernel/filter/filter_features_sse.h +++ b/intern/cycles/kernel/filter/filter_features_sse.h @@ -22,22 +22,27 @@ CCL_NAMESPACE_BEGIN * pixel_buffer always points to the first of the 4 current pixel in the first pass. * x4 and y4 contain the coordinates of the four pixels, active_pixels contains a mask that's set for all pixels within the window. * Repeat the loop for every secondary frame if there are any. */ -#define FOR_PIXEL_WINDOW_SSE for(int frame = 0; frame < tile_info->num_frames; frame++) { \ - pixel.z = tile_info->frames[frame]; \ - pixel_buffer = buffer + (low.y - rect.y)*buffer_w + (low.x - rect.x) + frame*frame_stride; \ - float4 t4 = make_float4(pixel.z); \ - for(pixel.y = low.y; pixel.y < high.y; pixel.y++) { \ - float4 y4 = make_float4(pixel.y); \ - for(pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \ - float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \ - int4 active_pixels = x4 < make_float4(high.x); +#define FOR_PIXEL_WINDOW_SSE \ + for (int frame = 0; frame < tile_info->num_frames; frame++) { \ + pixel.z = tile_info->frames[frame]; \ + pixel_buffer = buffer + (low.y - rect.y) * buffer_w + (low.x - rect.x) + \ + frame * frame_stride; \ + float4 t4 = make_float4(pixel.z); \ + for (pixel.y = low.y; pixel.y < high.y; pixel.y++) { \ + float4 y4 = make_float4(pixel.y); \ + for (pixel.x = low.x; pixel.x < high.x; pixel.x += 4, pixel_buffer += 4) { \ + float4 x4 = make_float4(pixel.x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); \ + int4 active_pixels = x4 < make_float4(high.x); -#define END_FOR_PIXEL_WINDOW_SSE } \ - pixel_buffer += buffer_w - (high.x - low.x); \ - } \ - } +#define END_FOR_PIXEL_WINDOW_SSE \ + } \ + pixel_buffer += buffer_w - (high.x - low.x); \ + } \ + } -ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t, +ccl_device_inline void filter_get_features_sse(float4 x, + float4 y, + float4 t, int4 active_pixels, const float *ccl_restrict buffer, float4 *features, @@ -45,33 +50,35 @@ ccl_device_inline void filter_get_features_sse(float4 x, float4 y, float4 t, const float4 *ccl_restrict mean, int pass_stride) { - int num_features = use_time? 11 : 10; + int num_features = use_time ? 11 : 10; - features[0] = x; - features[1] = y; - features[2] = fabs(ccl_get_feature_sse(0)); - features[3] = ccl_get_feature_sse(1); - features[4] = ccl_get_feature_sse(2); - features[5] = ccl_get_feature_sse(3); - features[6] = ccl_get_feature_sse(4); - features[7] = ccl_get_feature_sse(5); - features[8] = ccl_get_feature_sse(6); - features[9] = ccl_get_feature_sse(7); - if(use_time) { - features[10] = t; - } + features[0] = x; + features[1] = y; + features[2] = fabs(ccl_get_feature_sse(0)); + features[3] = ccl_get_feature_sse(1); + features[4] = ccl_get_feature_sse(2); + features[5] = ccl_get_feature_sse(3); + features[6] = ccl_get_feature_sse(4); + features[7] = ccl_get_feature_sse(5); + features[8] = ccl_get_feature_sse(6); + features[9] = ccl_get_feature_sse(7); + if (use_time) { + features[10] = t; + } - if(mean) { - for(int i = 0; i < num_features; i++) { - features[i] = features[i] - mean[i]; - } - } - for(int i = 0; i < num_features; i++) { - features[i] = mask(active_pixels, features[i]); - } + if (mean) { + for (int i = 0; i < num_features; i++) { + features[i] = features[i] - mean[i]; + } + } + for (int i = 0; i < num_features; i++) { + features[i] = mask(active_pixels, features[i]); + } } -ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4 t, +ccl_device_inline void filter_get_feature_scales_sse(float4 x, + float4 y, + float4 t, int4 active_pixels, const float *ccl_restrict buffer, float4 *scales, @@ -79,36 +86,34 @@ ccl_device_inline void filter_get_feature_scales_sse(float4 x, float4 y, float4 const float4 *ccl_restrict mean, int pass_stride) { - scales[0] = fabs(x - mean[0]); - scales[1] = fabs(y - mean[1]); - scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]); - scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) + - sqr(ccl_get_feature_sse(2) - mean[4]) + - sqr(ccl_get_feature_sse(3) - mean[5]); - scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]); - scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) + - sqr(ccl_get_feature_sse(6) - mean[8]) + - sqr(ccl_get_feature_sse(7) - mean[9]); - if(use_time) { - scales[6] = fabs(t - mean[10]); - } + scales[0] = fabs(x - mean[0]); + scales[1] = fabs(y - mean[1]); + scales[2] = fabs(fabs(ccl_get_feature_sse(0)) - mean[2]); + scales[3] = sqr(ccl_get_feature_sse(1) - mean[3]) + sqr(ccl_get_feature_sse(2) - mean[4]) + + sqr(ccl_get_feature_sse(3) - mean[5]); + scales[4] = fabs(ccl_get_feature_sse(4) - mean[6]); + scales[5] = sqr(ccl_get_feature_sse(5) - mean[7]) + sqr(ccl_get_feature_sse(6) - mean[8]) + + sqr(ccl_get_feature_sse(7) - mean[9]); + if (use_time) { + scales[6] = fabs(t - mean[10]); + } - for(int i = 0; i < (use_time? 7 : 6); i++) - scales[i] = mask(active_pixels, scales[i]); + for (int i = 0; i < (use_time ? 7 : 6); i++) + scales[i] = mask(active_pixels, scales[i]); } ccl_device_inline void filter_calculate_scale_sse(float4 *scale, bool use_time) { - scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f))); - scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f))); - scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f))); - if(use_time) { - scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f)));; - } - scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f))); - scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f))); - scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f))); + scale[0] = rcp(max(reduce_max(scale[0]), make_float4(0.01f))); + scale[1] = rcp(max(reduce_max(scale[1]), make_float4(0.01f))); + scale[2] = rcp(max(reduce_max(scale[2]), make_float4(0.01f))); + if (use_time) { + scale[10] = rcp(max(reduce_max(scale[6]), make_float4(0.01f))); + ; + } + scale[6] = rcp(max(reduce_max(scale[4]), make_float4(0.01f))); + scale[7] = scale[8] = scale[9] = rcp(max(reduce_max(sqrt(scale[5])), make_float4(0.01f))); + scale[3] = scale[4] = scale[5] = rcp(max(reduce_max(sqrt(scale[3])), make_float4(0.01f))); } - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/filter/filter_nlm_cpu.h b/intern/cycles/kernel/filter/filter_nlm_cpu.h index 9eb3c603a4a..a94266a8786 100644 --- a/intern/cycles/kernel/filter/filter_nlm_cpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_cpu.h @@ -16,10 +16,11 @@ CCL_NAMESPACE_BEGIN -#define load4_a(buf, ofs) (*((float4*) ((buf) + (ofs)))) -#define load4_u(buf, ofs) load_float4((buf)+(ofs)) +#define load4_a(buf, ofs) (*((float4 *)((buf) + (ofs)))) +#define load4_u(buf, ofs) load_float4((buf) + (ofs)) -ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy, +ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, + int dy, const float *ccl_restrict weight_image, const float *ccl_restrict variance_image, const float *ccl_restrict scale_image, @@ -31,122 +32,117 @@ ccl_device_inline void kernel_filter_nlm_calc_difference(int dx, int dy, float a, float k_2) { - /* Strides need to be aligned to 16 bytes. */ - kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0); - - int aligned_lowx = rect.x & (~3); - const int numChannels = (channel_offset > 0)? 3 : 1; - const float4 channel_fac = make_float4(1.0f / numChannels); - - for(int y = rect.y; y < rect.w; y++) { - int idx_p = y*stride + aligned_lowx; - int idx_q = (y+dy)*stride + aligned_lowx + dx + frame_offset; - for(int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) { - float4 diff = make_float4(0.0f); - float4 scale_fac; - if(scale_image) { - scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q), - make_float4(0.25f), make_float4(4.0f)); - } - else { - scale_fac = make_float4(1.0f); - } - for(int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) { - /* idx_p is guaranteed to be aligned, but idx_q isn't. */ - float4 color_p = load4_a(weight_image, idx_p + chan_ofs); - float4 color_q = scale_fac*load4_u(weight_image, idx_q + chan_ofs); - float4 cdiff = color_p - color_q; - float4 var_p = load4_a(variance_image, idx_p + chan_ofs); - float4 var_q = sqr(scale_fac)*load4_u(variance_image, idx_q + chan_ofs); - diff += (cdiff*cdiff - a*(var_p + min(var_p, var_q))) / (make_float4(1e-8f) + k_2*(var_p+var_q)); - } - load4_a(difference_image, idx_p) = diff*channel_fac; - } - } + /* Strides need to be aligned to 16 bytes. */ + kernel_assert((stride % 4) == 0 && (channel_offset % 4) == 0); + + int aligned_lowx = rect.x & (~3); + const int numChannels = (channel_offset > 0) ? 3 : 1; + const float4 channel_fac = make_float4(1.0f / numChannels); + + for (int y = rect.y; y < rect.w; y++) { + int idx_p = y * stride + aligned_lowx; + int idx_q = (y + dy) * stride + aligned_lowx + dx + frame_offset; + for (int x = aligned_lowx; x < rect.z; x += 4, idx_p += 4, idx_q += 4) { + float4 diff = make_float4(0.0f); + float4 scale_fac; + if (scale_image) { + scale_fac = clamp(load4_a(scale_image, idx_p) / load4_u(scale_image, idx_q), + make_float4(0.25f), + make_float4(4.0f)); + } + else { + scale_fac = make_float4(1.0f); + } + for (int c = 0, chan_ofs = 0; c < numChannels; c++, chan_ofs += channel_offset) { + /* idx_p is guaranteed to be aligned, but idx_q isn't. */ + float4 color_p = load4_a(weight_image, idx_p + chan_ofs); + float4 color_q = scale_fac * load4_u(weight_image, idx_q + chan_ofs); + float4 cdiff = color_p - color_q; + float4 var_p = load4_a(variance_image, idx_p + chan_ofs); + float4 var_q = sqr(scale_fac) * load4_u(variance_image, idx_q + chan_ofs); + diff += (cdiff * cdiff - a * (var_p + min(var_p, var_q))) / + (make_float4(1e-8f) + k_2 * (var_p + var_q)); + } + load4_a(difference_image, idx_p) = diff * channel_fac; + } + } } -ccl_device_inline void kernel_filter_nlm_blur(const float *ccl_restrict difference_image, - float *out_image, - int4 rect, - int stride, - int f) +ccl_device_inline void kernel_filter_nlm_blur( + const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f) { - int aligned_lowx = round_down(rect.x, 4); - for(int y = rect.y; y < rect.w; y++) { - const int low = max(rect.y, y-f); - const int high = min(rect.w, y+f+1); - for(int x = aligned_lowx; x < rect.z; x += 4) { - load4_a(out_image, y*stride + x) = make_float4(0.0f); - } - for(int y1 = low; y1 < high; y1++) { - for(int x = aligned_lowx; x < rect.z; x += 4) { - load4_a(out_image, y*stride + x) += load4_a(difference_image, y1*stride + x); - } - } - float fac = 1.0f/(high - low); - for(int x = aligned_lowx; x < rect.z; x += 4) { - load4_a(out_image, y*stride + x) *= fac; - } - } + int aligned_lowx = round_down(rect.x, 4); + for (int y = rect.y; y < rect.w; y++) { + const int low = max(rect.y, y - f); + const int high = min(rect.w, y + f + 1); + for (int x = aligned_lowx; x < rect.z; x += 4) { + load4_a(out_image, y * stride + x) = make_float4(0.0f); + } + for (int y1 = low; y1 < high; y1++) { + for (int x = aligned_lowx; x < rect.z; x += 4) { + load4_a(out_image, y * stride + x) += load4_a(difference_image, y1 * stride + x); + } + } + float fac = 1.0f / (high - low); + for (int x = aligned_lowx; x < rect.z; x += 4) { + load4_a(out_image, y * stride + x) *= fac; + } + } } -ccl_device_inline void nlm_blur_horizontal(const float *ccl_restrict difference_image, - float *out_image, - int4 rect, - int stride, - int f) +ccl_device_inline void nlm_blur_horizontal( + const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f) { - int aligned_lowx = round_down(rect.x, 4); - for(int y = rect.y; y < rect.w; y++) { - for(int x = aligned_lowx; x < rect.z; x += 4) { - load4_a(out_image, y*stride + x) = make_float4(0.0f); - } - } - - for(int dx = -f; dx <= f; dx++) { - aligned_lowx = round_down(rect.x - min(0, dx), 4); - int highx = rect.z - max(0, dx); - int4 lowx4 = make_int4(rect.x - min(0, dx)); - int4 highx4 = make_int4(rect.z - max(0, dx)); - for(int y = rect.y; y < rect.w; y++) { - for(int x = aligned_lowx; x < highx; x += 4) { - int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3); - int4 active = (x4 >= lowx4) & (x4 < highx4); - - float4 diff = load4_u(difference_image, y*stride + x + dx); - load4_a(out_image, y*stride + x) += mask(active, diff); - } - } - } - - aligned_lowx = round_down(rect.x, 4); - for(int y = rect.y; y < rect.w; y++) { - for(int x = aligned_lowx; x < rect.z; x += 4) { - float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); - float4 low = max(make_float4(rect.x), x4 - make_float4(f)); - float4 high = min(make_float4(rect.z), x4 + make_float4(f+1)); - load4_a(out_image, y*stride + x) *= rcp(high - low); - } - } + int aligned_lowx = round_down(rect.x, 4); + for (int y = rect.y; y < rect.w; y++) { + for (int x = aligned_lowx; x < rect.z; x += 4) { + load4_a(out_image, y * stride + x) = make_float4(0.0f); + } + } + + for (int dx = -f; dx <= f; dx++) { + aligned_lowx = round_down(rect.x - min(0, dx), 4); + int highx = rect.z - max(0, dx); + int4 lowx4 = make_int4(rect.x - min(0, dx)); + int4 highx4 = make_int4(rect.z - max(0, dx)); + for (int y = rect.y; y < rect.w; y++) { + for (int x = aligned_lowx; x < highx; x += 4) { + int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3); + int4 active = (x4 >= lowx4) & (x4 < highx4); + + float4 diff = load4_u(difference_image, y * stride + x + dx); + load4_a(out_image, y * stride + x) += mask(active, diff); + } + } + } + + aligned_lowx = round_down(rect.x, 4); + for (int y = rect.y; y < rect.w; y++) { + for (int x = aligned_lowx; x < rect.z; x += 4) { + float4 x4 = make_float4(x) + make_float4(0.0f, 1.0f, 2.0f, 3.0f); + float4 low = max(make_float4(rect.x), x4 - make_float4(f)); + float4 high = min(make_float4(rect.z), x4 + make_float4(f + 1)); + load4_a(out_image, y * stride + x) *= rcp(high - low); + } + } } -ccl_device_inline void kernel_filter_nlm_calc_weight(const float *ccl_restrict difference_image, - float *out_image, - int4 rect, - int stride, - int f) +ccl_device_inline void kernel_filter_nlm_calc_weight( + const float *ccl_restrict difference_image, float *out_image, int4 rect, int stride, int f) { - nlm_blur_horizontal(difference_image, out_image, rect, stride, f); - - int aligned_lowx = round_down(rect.x, 4); - for(int y = rect.y; y < rect.w; y++) { - for(int x = aligned_lowx; x < rect.z; x += 4) { - load4_a(out_image, y*stride + x) = fast_expf4(-max(load4_a(out_image, y*stride + x), make_float4(0.0f))); - } - } + nlm_blur_horizontal(difference_image, out_image, rect, stride, f); + + int aligned_lowx = round_down(rect.x, 4); + for (int y = rect.y; y < rect.w; y++) { + for (int x = aligned_lowx; x < rect.z; x += 4) { + load4_a(out_image, y * stride + x) = fast_expf4( + -max(load4_a(out_image, y * stride + x), make_float4(0.0f))); + } + } } -ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, +ccl_device_inline void kernel_filter_nlm_update_output(int dx, + int dy, const float *ccl_restrict difference_image, const float *ccl_restrict image, float *temp_image, @@ -157,33 +153,36 @@ ccl_device_inline void kernel_filter_nlm_update_output(int dx, int dy, int stride, int f) { - nlm_blur_horizontal(difference_image, temp_image, rect, stride, f); + nlm_blur_horizontal(difference_image, temp_image, rect, stride, f); - int aligned_lowx = round_down(rect.x, 4); - for(int y = rect.y; y < rect.w; y++) { - for(int x = aligned_lowx; x < rect.z; x += 4) { - int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3); - int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z)); + int aligned_lowx = round_down(rect.x, 4); + for (int y = rect.y; y < rect.w; y++) { + for (int x = aligned_lowx; x < rect.z; x += 4) { + int4 x4 = make_int4(x) + make_int4(0, 1, 2, 3); + int4 active = (x4 >= make_int4(rect.x)) & (x4 < make_int4(rect.z)); - int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx); + int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx); - float4 weight = load4_a(temp_image, idx_p); - load4_a(accum_image, idx_p) += mask(active, weight); + float4 weight = load4_a(temp_image, idx_p); + load4_a(accum_image, idx_p) += mask(active, weight); - float4 val = load4_u(image, idx_q); - if(channel_offset) { - val += load4_u(image, idx_q + channel_offset); - val += load4_u(image, idx_q + 2*channel_offset); - val *= 1.0f/3.0f; - } + float4 val = load4_u(image, idx_q); + if (channel_offset) { + val += load4_u(image, idx_q + channel_offset); + val += load4_u(image, idx_q + 2 * channel_offset); + val *= 1.0f / 3.0f; + } - load4_a(out_image, idx_p) += mask(active, weight*val); - } - } + load4_a(out_image, idx_p) += mask(active, weight * val); + } + } } -ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t, - const float *ccl_restrict difference_image, +ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, + int dy, + int t, + const float *ccl_restrict + difference_image, const float *ccl_restrict buffer, float *transform, int *rank, @@ -191,40 +190,49 @@ ccl_device_inline void kernel_filter_nlm_construct_gramian(int dx, int dy, int t float3 *XtWY, int4 rect, int4 filter_window, - int stride, int f, + int stride, + int f, int pass_stride, int frame_offset, bool use_time) { - int4 clip_area = rect_clip(rect, filter_window); - /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */ - for(int y = clip_area.y; y < clip_area.w; y++) { - for(int x = clip_area.x; x < clip_area.z; x++) { - const int low = max(rect.x, x-f); - const int high = min(rect.z, x+f+1); - float sum = 0.0f; - for(int x1 = low; x1 < high; x1++) { - sum += difference_image[y*stride + x1]; - } - float weight = sum * (1.0f/(high - low)); - - int storage_ofs = coord_to_local_index(filter_window, x, y); - float *l_transform = transform + storage_ofs*TRANSFORM_SIZE; - float *l_XtWX = XtWX + storage_ofs*XTWX_SIZE; - float3 *l_XtWY = XtWY + storage_ofs*XTWY_SIZE; - int *l_rank = rank + storage_ofs; - - kernel_filter_construct_gramian(x, y, 1, - dx, dy, t, - stride, - pass_stride, - frame_offset, - use_time, - buffer, - l_transform, l_rank, - weight, l_XtWX, l_XtWY, 0); - } - } + int4 clip_area = rect_clip(rect, filter_window); + /* fy and fy are in filter-window-relative coordinates, while x and y are in feature-window-relative coordinates. */ + for (int y = clip_area.y; y < clip_area.w; y++) { + for (int x = clip_area.x; x < clip_area.z; x++) { + const int low = max(rect.x, x - f); + const int high = min(rect.z, x + f + 1); + float sum = 0.0f; + for (int x1 = low; x1 < high; x1++) { + sum += difference_image[y * stride + x1]; + } + float weight = sum * (1.0f / (high - low)); + + int storage_ofs = coord_to_local_index(filter_window, x, y); + float *l_transform = transform + storage_ofs * TRANSFORM_SIZE; + float *l_XtWX = XtWX + storage_ofs * XTWX_SIZE; + float3 *l_XtWY = XtWY + storage_ofs * XTWY_SIZE; + int *l_rank = rank + storage_ofs; + + kernel_filter_construct_gramian(x, + y, + 1, + dx, + dy, + t, + stride, + pass_stride, + frame_offset, + use_time, + buffer, + l_transform, + l_rank, + weight, + l_XtWX, + l_XtWY, + 0); + } + } } ccl_device_inline void kernel_filter_nlm_normalize(float *out_image, @@ -232,11 +240,11 @@ ccl_device_inline void kernel_filter_nlm_normalize(float *out_image, int4 rect, int w) { - for(int y = rect.y; y < rect.w; y++) { - for(int x = rect.x; x < rect.z; x++) { - out_image[y*w+x] /= accum_image[y*w+x]; - } - } + for (int y = rect.y; y < rect.w; y++) { + for (int x = rect.x; x < rect.z; x++) { + out_image[y * w + x] /= accum_image[y * w + x]; + } + } } #undef load4_a diff --git a/intern/cycles/kernel/filter/filter_nlm_gpu.h b/intern/cycles/kernel/filter/filter_nlm_gpu.h index 12636393243..650c743f34f 100644 --- a/intern/cycles/kernel/filter/filter_nlm_gpu.h +++ b/intern/cycles/kernel/filter/filter_nlm_gpu.h @@ -24,203 +24,232 @@ CCL_NAMESPACE_BEGIN * Window is the rect that should be processed. * co is filled with (x, y, dx, dy). */ -ccl_device_inline bool get_nlm_coords_window(int w, int h, int r, int stride, - int4 *rect, int4 *co, int *ofs, - int4 window) +ccl_device_inline bool get_nlm_coords_window( + int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs, int4 window) { - /* Determine the pixel offset that this thread should apply. */ - int s = 2*r+1; - int si = ccl_global_id(1); - int sx = si % s; - int sy = si / s; - if(sy >= s) { - return false; - } - - /* Pixels still need to lie inside the denoising buffer after applying the offset, - * so determine the area for which this is the case. */ - int dx = sx - r; - int dy = sy - r; - - *rect = make_int4(max(0, -dx), max(0, -dy), - w - max(0, dx), h - max(0, dy)); - - /* Find the intersection of the area that we want to process (window) and the area - * that can be processed (rect) to get the final area for this offset. */ - int4 clip_area = rect_clip(window, *rect); - - /* If the radius is larger than one of the sides of the window, - * there will be shifts for which there is no usable pixel at all. */ - if(!rect_is_valid(clip_area)) { - return false; - } - - /* Map the linear thread index to pixels inside the clip area. */ - int x, y; - if(!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) { - return false; - } - - *co = make_int4(x, y, dx, dy); - - *ofs = (sy*s + sx) * stride; - - return true; + /* Determine the pixel offset that this thread should apply. */ + int s = 2 * r + 1; + int si = ccl_global_id(1); + int sx = si % s; + int sy = si / s; + if (sy >= s) { + return false; + } + + /* Pixels still need to lie inside the denoising buffer after applying the offset, + * so determine the area for which this is the case. */ + int dx = sx - r; + int dy = sy - r; + + *rect = make_int4(max(0, -dx), max(0, -dy), w - max(0, dx), h - max(0, dy)); + + /* Find the intersection of the area that we want to process (window) and the area + * that can be processed (rect) to get the final area for this offset. */ + int4 clip_area = rect_clip(window, *rect); + + /* If the radius is larger than one of the sides of the window, + * there will be shifts for which there is no usable pixel at all. */ + if (!rect_is_valid(clip_area)) { + return false; + } + + /* Map the linear thread index to pixels inside the clip area. */ + int x, y; + if (!local_index_to_coord(clip_area, ccl_global_id(0), &x, &y)) { + return false; + } + + *co = make_int4(x, y, dx, dy); + + *ofs = (sy * s + sx) * stride; + + return true; } -ccl_device_inline bool get_nlm_coords(int w, int h, int r, int stride, - int4 *rect, int4 *co, int *ofs) +ccl_device_inline bool get_nlm_coords( + int w, int h, int r, int stride, int4 *rect, int4 *co, int *ofs) { - return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h)); + return get_nlm_coords_window(w, h, r, stride, rect, co, ofs, make_int4(0, 0, w, h)); } -ccl_device_inline void kernel_filter_nlm_calc_difference(int x, int y, - int dx, int dy, - const ccl_global float *ccl_restrict weight_image, - const ccl_global float *ccl_restrict variance_image, - const ccl_global float *ccl_restrict scale_image, - ccl_global float *difference_image, - int4 rect, int stride, - int channel_offset, - int frame_offset, - float a, float k_2) +ccl_device_inline void kernel_filter_nlm_calc_difference( + int x, + int y, + int dx, + int dy, + const ccl_global float *ccl_restrict weight_image, + const ccl_global float *ccl_restrict variance_image, + const ccl_global float *ccl_restrict scale_image, + ccl_global float *difference_image, + int4 rect, + int stride, + int channel_offset, + int frame_offset, + float a, + float k_2) { - int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx) + frame_offset; - int numChannels = channel_offset? 3 : 1; - - float diff = 0.0f; - float scale_fac = 1.0f; - if(scale_image) { - scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f); - } - - for(int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) { - float cdiff = weight_image[idx_p] - scale_fac*weight_image[idx_q]; - float pvar = variance_image[idx_p]; - float qvar = sqr(scale_fac)*variance_image[idx_q]; - diff += (cdiff*cdiff - a*(pvar + min(pvar, qvar))) / (1e-8f + k_2*(pvar+qvar)); - } - if(numChannels > 1) { - diff *= 1.0f/numChannels; - } - difference_image[y*stride + x] = diff; + int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx) + frame_offset; + int numChannels = channel_offset ? 3 : 1; + + float diff = 0.0f; + float scale_fac = 1.0f; + if (scale_image) { + scale_fac = clamp(scale_image[idx_p] / scale_image[idx_q], 0.25f, 4.0f); + } + + for (int c = 0; c < numChannels; c++, idx_p += channel_offset, idx_q += channel_offset) { + float cdiff = weight_image[idx_p] - scale_fac * weight_image[idx_q]; + float pvar = variance_image[idx_p]; + float qvar = sqr(scale_fac) * variance_image[idx_q]; + diff += (cdiff * cdiff - a * (pvar + min(pvar, qvar))) / (1e-8f + k_2 * (pvar + qvar)); + } + if (numChannels > 1) { + diff *= 1.0f / numChannels; + } + difference_image[y * stride + x] = diff; } -ccl_device_inline void kernel_filter_nlm_blur(int x, int y, - const ccl_global float *ccl_restrict difference_image, +ccl_device_inline void kernel_filter_nlm_blur(int x, + int y, + const ccl_global float *ccl_restrict + difference_image, ccl_global float *out_image, - int4 rect, int stride, int f) + int4 rect, + int stride, + int f) { - float sum = 0.0f; - const int low = max(rect.y, y-f); - const int high = min(rect.w, y+f+1); - for(int y1 = low; y1 < high; y1++) { - sum += difference_image[y1*stride + x]; - } - sum *= 1.0f/(high-low); - out_image[y*stride + x] = sum; + float sum = 0.0f; + const int low = max(rect.y, y - f); + const int high = min(rect.w, y + f + 1); + for (int y1 = low; y1 < high; y1++) { + sum += difference_image[y1 * stride + x]; + } + sum *= 1.0f / (high - low); + out_image[y * stride + x] = sum; } -ccl_device_inline void kernel_filter_nlm_calc_weight(int x, int y, - const ccl_global float *ccl_restrict difference_image, +ccl_device_inline void kernel_filter_nlm_calc_weight(int x, + int y, + const ccl_global float *ccl_restrict + difference_image, ccl_global float *out_image, - int4 rect, int stride, int f) + int4 rect, + int stride, + int f) { - float sum = 0.0f; - const int low = max(rect.x, x-f); - const int high = min(rect.z, x+f+1); - for(int x1 = low; x1 < high; x1++) { - sum += difference_image[y*stride + x1]; - } - sum *= 1.0f/(high-low); - out_image[y*stride + x] = fast_expf(-max(sum, 0.0f)); + float sum = 0.0f; + const int low = max(rect.x, x - f); + const int high = min(rect.z, x + f + 1); + for (int x1 = low; x1 < high; x1++) { + sum += difference_image[y * stride + x1]; + } + sum *= 1.0f / (high - low); + out_image[y * stride + x] = fast_expf(-max(sum, 0.0f)); } -ccl_device_inline void kernel_filter_nlm_update_output(int x, int y, - int dx, int dy, - const ccl_global float *ccl_restrict difference_image, +ccl_device_inline void kernel_filter_nlm_update_output(int x, + int y, + int dx, + int dy, + const ccl_global float *ccl_restrict + difference_image, const ccl_global float *ccl_restrict image, ccl_global float *out_image, ccl_global float *accum_image, - int4 rect, int channel_offset, - int stride, int f) + int4 rect, + int channel_offset, + int stride, + int f) { - float sum = 0.0f; - const int low = max(rect.x, x-f); - const int high = min(rect.z, x+f+1); - for(int x1 = low; x1 < high; x1++) { - sum += difference_image[y*stride + x1]; - } - sum *= 1.0f/(high-low); - - int idx_p = y*stride + x, idx_q = (y+dy)*stride + (x+dx); - if(out_image) { - atomic_add_and_fetch_float(accum_image + idx_p, sum); - - float val = image[idx_q]; - if(channel_offset) { - val += image[idx_q + channel_offset]; - val += image[idx_q + 2*channel_offset]; - val *= 1.0f/3.0f; - } - atomic_add_and_fetch_float(out_image + idx_p, sum*val); - } - else { - accum_image[idx_p] = sum; - } + float sum = 0.0f; + const int low = max(rect.x, x - f); + const int high = min(rect.z, x + f + 1); + for (int x1 = low; x1 < high; x1++) { + sum += difference_image[y * stride + x1]; + } + sum *= 1.0f / (high - low); + + int idx_p = y * stride + x, idx_q = (y + dy) * stride + (x + dx); + if (out_image) { + atomic_add_and_fetch_float(accum_image + idx_p, sum); + + float val = image[idx_q]; + if (channel_offset) { + val += image[idx_q + channel_offset]; + val += image[idx_q + 2 * channel_offset]; + val *= 1.0f / 3.0f; + } + atomic_add_and_fetch_float(out_image + idx_p, sum * val); + } + else { + accum_image[idx_p] = sum; + } } -ccl_device_inline void kernel_filter_nlm_construct_gramian(int x, int y, - int dx, int dy, int t, - const ccl_global float *ccl_restrict difference_image, - const ccl_global float *ccl_restrict buffer, - const ccl_global float *ccl_restrict transform, - ccl_global int *rank, - ccl_global float *XtWX, - ccl_global float3 *XtWY, - int4 rect, - int4 filter_window, - int stride, int f, - int pass_stride, - int frame_offset, - bool use_time, - int localIdx) +ccl_device_inline void kernel_filter_nlm_construct_gramian( + int x, + int y, + int dx, + int dy, + int t, + const ccl_global float *ccl_restrict difference_image, + const ccl_global float *ccl_restrict buffer, + const ccl_global float *ccl_restrict transform, + ccl_global int *rank, + ccl_global float *XtWX, + ccl_global float3 *XtWY, + int4 rect, + int4 filter_window, + int stride, + int f, + int pass_stride, + int frame_offset, + bool use_time, + int localIdx) { - const int low = max(rect.x, x-f); - const int high = min(rect.z, x+f+1); - float sum = 0.0f; - for(int x1 = low; x1 < high; x1++) { - sum += difference_image[y*stride + x1]; - } - float weight = sum * (1.0f/(high - low)); - - /* Reconstruction data is only stored for pixels inside the filter window, - * so compute the pixels's index in there. */ - int storage_ofs = coord_to_local_index(filter_window, x, y); - transform += storage_ofs; - rank += storage_ofs; - XtWX += storage_ofs; - XtWY += storage_ofs; - - kernel_filter_construct_gramian(x, y, - rect_size(filter_window), - dx, dy, t, - stride, - pass_stride, - frame_offset, - use_time, - buffer, - transform, rank, - weight, XtWX, XtWY, - localIdx); + const int low = max(rect.x, x - f); + const int high = min(rect.z, x + f + 1); + float sum = 0.0f; + for (int x1 = low; x1 < high; x1++) { + sum += difference_image[y * stride + x1]; + } + float weight = sum * (1.0f / (high - low)); + + /* Reconstruction data is only stored for pixels inside the filter window, + * so compute the pixels's index in there. */ + int storage_ofs = coord_to_local_index(filter_window, x, y); + transform += storage_ofs; + rank += storage_ofs; + XtWX += storage_ofs; + XtWY += storage_ofs; + + kernel_filter_construct_gramian(x, + y, + rect_size(filter_window), + dx, + dy, + t, + stride, + pass_stride, + frame_offset, + use_time, + buffer, + transform, + rank, + weight, + XtWX, + XtWY, + localIdx); } -ccl_device_inline void kernel_filter_nlm_normalize(int x, int y, +ccl_device_inline void kernel_filter_nlm_normalize(int x, + int y, ccl_global float *out_image, - const ccl_global float *ccl_restrict accum_image, + const ccl_global float *ccl_restrict + accum_image, int stride) { - out_image[y*stride + x] /= accum_image[y*stride + x]; + out_image[y * stride + x] /= accum_image[y * stride + x]; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/filter/filter_prefilter.h b/intern/cycles/kernel/filter/filter_prefilter.h index e24f4feb28d..8211311313d 100644 --- a/intern/cycles/kernel/filter/filter_prefilter.h +++ b/intern/cycles/kernel/filter/filter_prefilter.h @@ -27,7 +27,8 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_filter_divide_shadow(int sample, CCL_FILTER_TILE_INFO, - int x, int y, + int x, + int y, ccl_global float *unfilteredA, ccl_global float *unfilteredB, ccl_global float *sampleVariance, @@ -37,37 +38,39 @@ ccl_device void kernel_filter_divide_shadow(int sample, int buffer_pass_stride, int buffer_denoising_offset) { - int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2); - int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2); - int tile = ytile*3+xtile; + int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2); + int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2); + int tile = ytile * 3 + xtile; - int offset = tile_info->offsets[tile]; - int stride = tile_info->strides[tile]; - const ccl_global float *ccl_restrict center_buffer = (ccl_global float*) ccl_get_tile_buffer(tile); - center_buffer += (y*stride + x + offset)*buffer_pass_stride; - center_buffer += buffer_denoising_offset + 14; + int offset = tile_info->offsets[tile]; + int stride = tile_info->strides[tile]; + const ccl_global float *ccl_restrict center_buffer = (ccl_global float *)ccl_get_tile_buffer( + tile); + center_buffer += (y * stride + x + offset) * buffer_pass_stride; + center_buffer += buffer_denoising_offset + 14; - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); - unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f); - unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); + unfilteredA[idx] = center_buffer[1] / max(center_buffer[0], 1e-7f); + unfilteredB[idx] = center_buffer[4] / max(center_buffer[3], 1e-7f); - float varA = center_buffer[2]; - float varB = center_buffer[5]; - int odd_sample = (sample+1)/2; - int even_sample = sample/2; + float varA = center_buffer[2]; + float varB = center_buffer[5]; + int odd_sample = (sample + 1) / 2; + int even_sample = sample / 2; - /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance - * update does not work efficiently with atomics in the kernel. */ - varA = max(0.0f, varA - unfilteredA[idx]*unfilteredA[idx]*odd_sample); - varB = max(0.0f, varB - unfilteredB[idx]*unfilteredB[idx]*even_sample); + /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance + * update does not work efficiently with atomics in the kernel. */ + varA = max(0.0f, varA - unfilteredA[idx] * unfilteredA[idx] * odd_sample); + varB = max(0.0f, varB - unfilteredB[idx] * unfilteredB[idx] * even_sample); - varA /= max(odd_sample - 1, 1); - varB /= max(even_sample - 1, 1); + varA /= max(odd_sample - 1, 1); + varB /= max(even_sample - 1, 1); - sampleVariance[idx] = 0.5f*(varA + varB) / sample; - sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample*sample); - bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * (unfilteredA[idx] - unfilteredB[idx]); + sampleVariance[idx] = 0.5f * (varA + varB) / sample; + sampleVarianceV[idx] = 0.5f * (varA - varB) * (varA - varB) / (sample * sample); + bufferVariance[idx] = 0.5f * (unfilteredA[idx] - unfilteredB[idx]) * + (unfilteredA[idx] - unfilteredB[idx]); } /* Load a regular feature from the render buffers into the denoise buffer. @@ -80,55 +83,65 @@ ccl_device void kernel_filter_divide_shadow(int sample, */ ccl_device void kernel_filter_get_feature(int sample, CCL_FILTER_TILE_INFO, - int m_offset, int v_offset, - int x, int y, + int m_offset, + int v_offset, + int x, + int y, ccl_global float *mean, ccl_global float *variance, float scale, - int4 rect, int buffer_pass_stride, + int4 rect, + int buffer_pass_stride, int buffer_denoising_offset) { - int xtile = (x < tile_info->x[1])? 0: ((x < tile_info->x[2])? 1: 2); - int ytile = (y < tile_info->y[1])? 0: ((y < tile_info->y[2])? 1: 2); - int tile = ytile*3+xtile; - ccl_global float *center_buffer = ((ccl_global float*) ccl_get_tile_buffer(tile)) + (tile_info->offsets[tile] + y*tile_info->strides[tile] + x)*buffer_pass_stride + buffer_denoising_offset; + int xtile = (x < tile_info->x[1]) ? 0 : ((x < tile_info->x[2]) ? 1 : 2); + int ytile = (y < tile_info->y[1]) ? 0 : ((y < tile_info->y[2]) ? 1 : 2); + int tile = ytile * 3 + xtile; + ccl_global float *center_buffer = ((ccl_global float *)ccl_get_tile_buffer(tile)) + + (tile_info->offsets[tile] + y * tile_info->strides[tile] + x) * + buffer_pass_stride + + buffer_denoising_offset; - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); - float val = scale * center_buffer[m_offset]; - mean[idx] = val; + float val = scale * center_buffer[m_offset]; + mean[idx] = val; - if(v_offset >= 0) { - if(sample > 1) { - /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance - * update does not work efficiently with atomics in the kernel. */ - variance[idx] = max(0.0f, (center_buffer[v_offset] - val*val*sample) / (sample * (sample-1))); - } - else { - /* Can't compute variance with single sample, just set it very high. */ - variance[idx] = 1e10f; - } - } + if (v_offset >= 0) { + if (sample > 1) { + /* Approximate variance as E[x^2] - 1/N * (E[x])^2, since online variance + * update does not work efficiently with atomics in the kernel. */ + variance[idx] = max( + 0.0f, (center_buffer[v_offset] - val * val * sample) / (sample * (sample - 1))); + } + else { + /* Can't compute variance with single sample, just set it very high. */ + variance[idx] = 1e10f; + } + } } ccl_device void kernel_filter_write_feature(int sample, - int x, int y, + int x, + int y, int4 buffer_params, ccl_global float *from, ccl_global float *buffer, int out_offset, int4 rect) { - ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z; + ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) * + buffer_params.z; - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); - combined_buffer[out_offset] = from[idx]; + combined_buffer[out_offset] = from[idx]; } -ccl_device void kernel_filter_detect_outliers(int x, int y, +ccl_device void kernel_filter_detect_outliers(int x, + int y, ccl_global float *image, ccl_global float *variance, ccl_global float *depth, @@ -136,123 +149,131 @@ ccl_device void kernel_filter_detect_outliers(int x, int y, int4 rect, int pass_stride) { - int buffer_w = align_up(rect.z - rect.x, 4); + int buffer_w = align_up(rect.z - rect.x, 4); - int n = 0; - float values[25]; - float pixel_variance, max_variance = 0.0f; - for(int y1 = max(y-2, rect.y); y1 < min(y+3, rect.w); y1++) { - for(int x1 = max(x-2, rect.x); x1 < min(x+3, rect.z); x1++) { - int idx = (y1-rect.y)*buffer_w + (x1-rect.x); - float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]); - color = max(color, make_float3(0.0f, 0.0f, 0.0f)); - float L = average(color); + int n = 0; + float values[25]; + float pixel_variance, max_variance = 0.0f; + for (int y1 = max(y - 2, rect.y); y1 < min(y + 3, rect.w); y1++) { + for (int x1 = max(x - 2, rect.x); x1 < min(x + 3, rect.z); x1++) { + int idx = (y1 - rect.y) * buffer_w + (x1 - rect.x); + float3 color = make_float3( + image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]); + color = max(color, make_float3(0.0f, 0.0f, 0.0f)); + float L = average(color); - /* Find the position of L. */ - int i; - for(i = 0; i < n; i++) { - if(values[i] > L) break; - } - /* Make space for L by shifting all following values to the right. */ - for(int j = n; j > i; j--) { - values[j] = values[j-1]; - } - /* Insert L. */ - values[i] = L; - n++; + /* Find the position of L. */ + int i; + for (i = 0; i < n; i++) { + if (values[i] > L) + break; + } + /* Make space for L by shifting all following values to the right. */ + for (int j = n; j > i; j--) { + values[j] = values[j - 1]; + } + /* Insert L. */ + values[i] = L; + n++; - float3 pixel_var = make_float3(variance[idx], variance[idx+pass_stride], variance[idx+2*pass_stride]); - float var = average(pixel_var); - if((x1 == x) && (y1 == y)) { - pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f)? -1.0f : var; - } - else { - max_variance = max(max_variance, var); - } - } - } + float3 pixel_var = make_float3( + variance[idx], variance[idx + pass_stride], variance[idx + 2 * pass_stride]); + float var = average(pixel_var); + if ((x1 == x) && (y1 == y)) { + pixel_variance = (pixel_var.x < 0.0f || pixel_var.y < 0.0f || pixel_var.z < 0.0f) ? -1.0f : + var; + } + else { + max_variance = max(max_variance, var); + } + } + } - max_variance += 1e-4f; + max_variance += 1e-4f; - int idx = (y-rect.y)*buffer_w + (x-rect.x); - float3 color = make_float3(image[idx], image[idx+pass_stride], image[idx+2*pass_stride]); - color = max(color, make_float3(0.0f, 0.0f, 0.0f)); - float L = average(color); + int idx = (y - rect.y) * buffer_w + (x - rect.x); + float3 color = make_float3(image[idx], image[idx + pass_stride], image[idx + 2 * pass_stride]); + color = max(color, make_float3(0.0f, 0.0f, 0.0f)); + float L = average(color); - float ref = 2.0f*values[(int)(n*0.75f)]; + float ref = 2.0f * values[(int)(n * 0.75f)]; - /* Slightly offset values to avoid false positives in (almost) black areas. */ - max_variance += 1e-5f; - ref -= 1e-5f; + /* Slightly offset values to avoid false positives in (almost) black areas. */ + max_variance += 1e-5f; + ref -= 1e-5f; - if(L > ref) { - /* The pixel appears to be an outlier. - * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel - * should actually be at the reference value: - * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. - * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. - */ + if (L > ref) { + /* The pixel appears to be an outlier. + * However, it may just be a legitimate highlight. Therefore, it is checked how likely it is that the pixel + * should actually be at the reference value: + * If the reference is within the 3-sigma interval, the pixel is assumed to be a statistical outlier. + * Otherwise, it is very unlikely that the pixel should be darker, which indicates a legitimate highlight. + */ - if(pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) { - depth[idx] = -depth[idx]; - color *= ref/L; - variance[idx] = variance[idx + pass_stride] = variance[idx + 2*pass_stride] = max_variance; - } - else { - float stddev = sqrtf(pixel_variance); - if(L - 3*stddev < ref) { - /* The pixel is an outlier, so negate the depth value to mark it as one. - * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ - depth[idx] = -depth[idx]; - float fac = ref/L; - color *= fac; - variance[idx ] *= fac*fac; - variance[idx + pass_stride] *= fac*fac; - variance[idx+2*pass_stride] *= fac*fac; - } - } - } - out[idx ] = color.x; - out[idx + pass_stride] = color.y; - out[idx+2*pass_stride] = color.z; + if (pixel_variance < 0.0f || pixel_variance > 9.0f * max_variance) { + depth[idx] = -depth[idx]; + color *= ref / L; + variance[idx] = variance[idx + pass_stride] = variance[idx + 2 * pass_stride] = max_variance; + } + else { + float stddev = sqrtf(pixel_variance); + if (L - 3 * stddev < ref) { + /* The pixel is an outlier, so negate the depth value to mark it as one. + * Also, scale its brightness down to the outlier threshold to avoid trouble with the NLM weights. */ + depth[idx] = -depth[idx]; + float fac = ref / L; + color *= fac; + variance[idx] *= fac * fac; + variance[idx + pass_stride] *= fac * fac; + variance[idx + 2 * pass_stride] *= fac * fac; + } + } + } + out[idx] = color.x; + out[idx + pass_stride] = color.y; + out[idx + 2 * pass_stride] = color.z; } /* Combine A/B buffers. * Calculates the combined mean and the buffer variance. */ -ccl_device void kernel_filter_combine_halves(int x, int y, +ccl_device void kernel_filter_combine_halves(int x, + int y, ccl_global float *mean, ccl_global float *variance, ccl_global float *a, ccl_global float *b, - int4 rect, int r) + int4 rect, + int r) { - int buffer_w = align_up(rect.z - rect.x, 4); - int idx = (y-rect.y)*buffer_w + (x - rect.x); + int buffer_w = align_up(rect.z - rect.x, 4); + int idx = (y - rect.y) * buffer_w + (x - rect.x); - if(mean) mean[idx] = 0.5f * (a[idx]+b[idx]); - if(variance) { - if(r == 0) variance[idx] = 0.25f * (a[idx]-b[idx])*(a[idx]-b[idx]); - else { - variance[idx] = 0.0f; - float values[25]; - int numValues = 0; - for(int py = max(y-r, rect.y); py < min(y+r+1, rect.w); py++) { - for(int px = max(x-r, rect.x); px < min(x+r+1, rect.z); px++) { - int pidx = (py-rect.y)*buffer_w + (px-rect.x); - values[numValues++] = 0.25f * (a[pidx]-b[pidx])*(a[pidx]-b[pidx]); - } - } - /* Insertion-sort the variances (fast enough for 25 elements). */ - for(int i = 1; i < numValues; i++) { - float v = values[i]; - int j; - for(j = i-1; j >= 0 && values[j] > v; j--) - values[j+1] = values[j]; - values[j+1] = v; - } - variance[idx] = values[(7*numValues)/8]; - } - } + if (mean) + mean[idx] = 0.5f * (a[idx] + b[idx]); + if (variance) { + if (r == 0) + variance[idx] = 0.25f * (a[idx] - b[idx]) * (a[idx] - b[idx]); + else { + variance[idx] = 0.0f; + float values[25]; + int numValues = 0; + for (int py = max(y - r, rect.y); py < min(y + r + 1, rect.w); py++) { + for (int px = max(x - r, rect.x); px < min(x + r + 1, rect.z); px++) { + int pidx = (py - rect.y) * buffer_w + (px - rect.x); + values[numValues++] = 0.25f * (a[pidx] - b[pidx]) * (a[pidx] - b[pidx]); + } + } + /* Insertion-sort the variances (fast enough for 25 elements). */ + for (int i = 1; i < numValues; i++) { + float v = values[i]; + int j; + for (j = i - 1; j >= 0 && values[j] > v; j--) + values[j + 1] = values[j]; + values[j + 1] = v; + } + variance[idx] = values[(7 * numValues) / 8]; + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/filter/filter_reconstruction.h b/intern/cycles/kernel/filter/filter_reconstruction.h index ceda8f71f98..850f20584da 100644 --- a/intern/cycles/kernel/filter/filter_reconstruction.h +++ b/intern/cycles/kernel/filter/filter_reconstruction.h @@ -16,63 +16,75 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void kernel_filter_construct_gramian(int x, int y, +ccl_device_inline void kernel_filter_construct_gramian(int x, + int y, int storage_stride, - int dx, int dy, int t, + int dx, + int dy, + int t, int buffer_stride, int pass_stride, int frame_offset, bool use_time, const ccl_global float *ccl_restrict buffer, - const ccl_global float *ccl_restrict transform, + const ccl_global float *ccl_restrict + transform, ccl_global int *rank, float weight, ccl_global float *XtWX, ccl_global float3 *XtWY, int localIdx) { - if(weight < 1e-3f) { - return; - } + if (weight < 1e-3f) { + return; + } - int p_offset = y * buffer_stride + x; - int q_offset = (y+dy) * buffer_stride + (x+dx) + frame_offset; + int p_offset = y * buffer_stride + x; + int q_offset = (y + dy) * buffer_stride + (x + dx) + frame_offset; #ifdef __KERNEL_GPU__ - const int stride = storage_stride; + const int stride = storage_stride; #else - const int stride = 1; - (void) storage_stride; + const int stride = 1; + (void)storage_stride; #endif #ifdef __KERNEL_CUDA__ - ccl_local float shared_design_row[(DENOISE_FEATURES+1)*CCL_MAX_LOCAL_SIZE]; - ccl_local_param float *design_row = shared_design_row + localIdx*(DENOISE_FEATURES+1); + ccl_local float shared_design_row[(DENOISE_FEATURES + 1) * CCL_MAX_LOCAL_SIZE]; + ccl_local_param float *design_row = shared_design_row + localIdx * (DENOISE_FEATURES + 1); #else - float design_row[DENOISE_FEATURES+1]; + float design_row[DENOISE_FEATURES + 1]; #endif - float3 q_color = filter_get_color(buffer + q_offset, pass_stride); + float3 q_color = filter_get_color(buffer + q_offset, pass_stride); - /* If the pixel was flagged as an outlier during prefiltering, skip it. */ - if(ccl_get_feature(buffer + q_offset, 0) < 0.0f) { - return; - } + /* If the pixel was flagged as an outlier during prefiltering, skip it. */ + if (ccl_get_feature(buffer + q_offset, 0) < 0.0f) { + return; + } - filter_get_design_row_transform(make_int3(x, y, t), buffer + p_offset, - make_int3(x+dx, y+dy, t), buffer + q_offset, - pass_stride, *rank, design_row, transform, stride, use_time); + filter_get_design_row_transform(make_int3(x, y, t), + buffer + p_offset, + make_int3(x + dx, y + dy, t), + buffer + q_offset, + pass_stride, + *rank, + design_row, + transform, + stride, + use_time); #ifdef __KERNEL_GPU__ - math_trimatrix_add_gramian_strided(XtWX, (*rank)+1, design_row, weight, stride); - math_vec3_add_strided(XtWY, (*rank)+1, design_row, weight * q_color, stride); + math_trimatrix_add_gramian_strided(XtWX, (*rank) + 1, design_row, weight, stride); + math_vec3_add_strided(XtWY, (*rank) + 1, design_row, weight * q_color, stride); #else - math_trimatrix_add_gramian(XtWX, (*rank)+1, design_row, weight); - math_vec3_add(XtWY, (*rank)+1, design_row, weight * q_color); + math_trimatrix_add_gramian(XtWX, (*rank) + 1, design_row, weight); + math_vec3_add(XtWY, (*rank) + 1, design_row, weight * q_color); #endif } -ccl_device_inline void kernel_filter_finalize(int x, int y, +ccl_device_inline void kernel_filter_finalize(int x, + int y, ccl_global float *buffer, ccl_global int *rank, int storage_stride, @@ -82,47 +94,47 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int sample) { #ifdef __KERNEL_GPU__ - const int stride = storage_stride; + const int stride = storage_stride; #else - const int stride = 1; - (void) storage_stride; + const int stride = 1; + (void)storage_stride; #endif - if(XtWX[0] < 1e-3f) { - /* There is not enough information to determine a denoised result. - * As a fallback, keep the original value of the pixel. */ - return; - } - - /* The weighted average of pixel colors (essentially, the NLM-filtered image). - * In case the solution of the linear model fails due to numerical issues or - * returns non-sensical negative values, fall back to this value. */ - float3 mean_color = XtWY[0]/XtWX[0]; - - math_trimatrix_vec3_solve(XtWX, XtWY, (*rank)+1, stride); - - float3 final_color = XtWY[0]; - if(!isfinite3_safe(final_color) || - (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f)) - { - final_color = mean_color; - } - - /* Clamp pixel value to positive values. */ - final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f)); - - ccl_global float *combined_buffer = buffer + (y*buffer_params.y + x + buffer_params.x)*buffer_params.z; - if(buffer_params.w >= 0) { - final_color *= sample; - if(buffer_params.w > 0) { - final_color.x += combined_buffer[buffer_params.w+0]; - final_color.y += combined_buffer[buffer_params.w+1]; - final_color.z += combined_buffer[buffer_params.w+2]; - } - } - combined_buffer[0] = final_color.x; - combined_buffer[1] = final_color.y; - combined_buffer[2] = final_color.z; + if (XtWX[0] < 1e-3f) { + /* There is not enough information to determine a denoised result. + * As a fallback, keep the original value of the pixel. */ + return; + } + + /* The weighted average of pixel colors (essentially, the NLM-filtered image). + * In case the solution of the linear model fails due to numerical issues or + * returns non-sensical negative values, fall back to this value. */ + float3 mean_color = XtWY[0] / XtWX[0]; + + math_trimatrix_vec3_solve(XtWX, XtWY, (*rank) + 1, stride); + + float3 final_color = XtWY[0]; + if (!isfinite3_safe(final_color) || + (final_color.x < -0.01f || final_color.y < -0.01f || final_color.z < -0.01f)) { + final_color = mean_color; + } + + /* Clamp pixel value to positive values. */ + final_color = max(final_color, make_float3(0.0f, 0.0f, 0.0f)); + + ccl_global float *combined_buffer = buffer + (y * buffer_params.y + x + buffer_params.x) * + buffer_params.z; + if (buffer_params.w >= 0) { + final_color *= sample; + if (buffer_params.w > 0) { + final_color.x += combined_buffer[buffer_params.w + 0]; + final_color.y += combined_buffer[buffer_params.w + 1]; + final_color.z += combined_buffer[buffer_params.w + 2]; + } + } + combined_buffer[0] = final_color.x; + combined_buffer[1] = final_color.y; + combined_buffer[2] = final_color.z; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h index 94e27bb02fd..69e3c7c458d 100644 --- a/intern/cycles/kernel/filter/filter_transform.h +++ b/intern/cycles/kernel/filter/filter_transform.h @@ -18,92 +18,101 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer, CCL_FILTER_TILE_INFO, - int x, int y, int4 rect, - int pass_stride, int frame_stride, + int x, + int y, + int4 rect, + int pass_stride, + int frame_stride, bool use_time, - float *transform, int *rank, - int radius, float pca_threshold) + float *transform, + int *rank, + int radius, + float pca_threshold) { - int buffer_w = align_up(rect.z - rect.x, 4); - - float features[DENOISE_FEATURES]; - - const float *ccl_restrict pixel_buffer; - int3 pixel; - - int num_features = use_time? 11 : 10; - - /* === Calculate denoising window. === */ - int2 low = make_int2(max(rect.x, x - radius), - max(rect.y, y - radius)); - int2 high = make_int2(min(rect.z, x + radius + 1), - min(rect.w, y + radius + 1)); - int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames; - - /* === Shift feature passes to have mean 0. === */ - float feature_means[DENOISE_FEATURES]; - math_vector_zero(feature_means, num_features); - FOR_PIXEL_WINDOW { - filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride); - math_vector_add(feature_means, features, num_features); - } END_FOR_PIXEL_WINDOW - - math_vector_scale(feature_means, 1.0f / num_pixels, num_features); - - /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ - float feature_scale[DENOISE_FEATURES]; - math_vector_zero(feature_scale, num_features); - - FOR_PIXEL_WINDOW { - filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); - math_vector_max(feature_scale, features, num_features); - } END_FOR_PIXEL_WINDOW - - filter_calculate_scale(feature_scale, use_time); - - /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space - * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ - float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES]; - math_matrix_zero(feature_matrix, num_features); - FOR_PIXEL_WINDOW { - filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); - math_vector_mul(features, feature_scale, num_features); - math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f); - } END_FOR_PIXEL_WINDOW - - math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1); - *rank = 0; - /* Prevent overfitting when a small window is used. */ - int max_rank = min(num_features, num_pixels/3); - if(pca_threshold < 0.0f) { - float threshold_energy = 0.0f; - for(int i = 0; i < num_features; i++) { - threshold_energy += feature_matrix[i*num_features+i]; - } - threshold_energy *= 1.0f - (-pca_threshold); - - float reduced_energy = 0.0f; - for(int i = 0; i < max_rank; i++, (*rank)++) { - if(i >= 2 && reduced_energy >= threshold_energy) - break; - float s = feature_matrix[i*num_features+i]; - reduced_energy += s; - } - } - else { - for(int i = 0; i < max_rank; i++, (*rank)++) { - float s = feature_matrix[i*num_features+i]; - if(i >= 2 && sqrtf(s) < pca_threshold) - break; - } - } - - /* Bake the feature scaling into the transformation matrix. */ - for(int i = 0; i < (*rank); i++) { - math_vector_mul(transform + i*num_features, feature_scale, num_features); - } - math_matrix_transpose(transform, num_features, 1); + int buffer_w = align_up(rect.z - rect.x, 4); + + float features[DENOISE_FEATURES]; + + const float *ccl_restrict pixel_buffer; + int3 pixel; + + int num_features = use_time ? 11 : 10; + + /* === Calculate denoising window. === */ + int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius)); + int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1)); + int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames; + + /* === Shift feature passes to have mean 0. === */ + float feature_means[DENOISE_FEATURES]; + math_vector_zero(feature_means, num_features); + FOR_PIXEL_WINDOW + { + filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride); + math_vector_add(feature_means, features, num_features); + } + END_FOR_PIXEL_WINDOW + + math_vector_scale(feature_means, 1.0f / num_pixels, num_features); + + /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ + float feature_scale[DENOISE_FEATURES]; + math_vector_zero(feature_scale, num_features); + + FOR_PIXEL_WINDOW + { + filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); + math_vector_max(feature_scale, features, num_features); + } + END_FOR_PIXEL_WINDOW + + filter_calculate_scale(feature_scale, use_time); + + /* === Generate the feature transformation. === + * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space + * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ + float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; + math_matrix_zero(feature_matrix, num_features); + FOR_PIXEL_WINDOW + { + filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); + math_vector_mul(features, feature_scale, num_features); + math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f); + } + END_FOR_PIXEL_WINDOW + + math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1); + *rank = 0; + /* Prevent overfitting when a small window is used. */ + int max_rank = min(num_features, num_pixels / 3); + if (pca_threshold < 0.0f) { + float threshold_energy = 0.0f; + for (int i = 0; i < num_features; i++) { + threshold_energy += feature_matrix[i * num_features + i]; + } + threshold_energy *= 1.0f - (-pca_threshold); + + float reduced_energy = 0.0f; + for (int i = 0; i < max_rank; i++, (*rank)++) { + if (i >= 2 && reduced_energy >= threshold_energy) + break; + float s = feature_matrix[i * num_features + i]; + reduced_energy += s; + } + } + else { + for (int i = 0; i < max_rank; i++, (*rank)++) { + float s = feature_matrix[i * num_features + i]; + if (i >= 2 && sqrtf(s) < pca_threshold) + break; + } + } + + /* Bake the feature scaling into the transformation matrix. */ + for (int i = 0; i < (*rank); i++) { + math_vector_mul(transform + i * num_features, feature_scale, num_features); + } + math_matrix_transpose(transform, num_features, 1); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h index ed8ddcb49b1..89cddfd927f 100644 --- a/intern/cycles/kernel/filter/filter_transform_gpu.h +++ b/intern/cycles/kernel/filter/filter_transform_gpu.h @@ -18,106 +18,110 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_restrict buffer, CCL_FILTER_TILE_INFO, - int x, int y, int4 rect, - int pass_stride, int frame_stride, + int x, + int y, + int4 rect, + int pass_stride, + int frame_stride, bool use_time, ccl_global float *transform, ccl_global int *rank, - int radius, float pca_threshold, - int transform_stride, int localIdx) + int radius, + float pca_threshold, + int transform_stride, + int localIdx) { - int buffer_w = align_up(rect.z - rect.x, 4); + int buffer_w = align_up(rect.z - rect.x, 4); #ifdef __KERNEL_CUDA__ - ccl_local float shared_features[DENOISE_FEATURES*CCL_MAX_LOCAL_SIZE]; - ccl_local_param float *features = shared_features + localIdx*DENOISE_FEATURES; + ccl_local float shared_features[DENOISE_FEATURES * CCL_MAX_LOCAL_SIZE]; + ccl_local_param float *features = shared_features + localIdx * DENOISE_FEATURES; #else - float features[DENOISE_FEATURES]; + float features[DENOISE_FEATURES]; #endif - int num_features = use_time? 11 : 10; - - /* === Calculate denoising window. === */ - int2 low = make_int2(max(rect.x, x - radius), - max(rect.y, y - radius)); - int2 high = make_int2(min(rect.z, x + radius + 1), - min(rect.w, y + radius + 1)); - int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames; - const ccl_global float *ccl_restrict pixel_buffer; - int3 pixel; - - - - - /* === Shift feature passes to have mean 0. === */ - float feature_means[DENOISE_FEATURES]; - math_vector_zero(feature_means, num_features); - FOR_PIXEL_WINDOW { - filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride); - math_vector_add(feature_means, features, num_features); - } END_FOR_PIXEL_WINDOW - - math_vector_scale(feature_means, 1.0f / num_pixels, num_features); - - /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ - float feature_scale[DENOISE_FEATURES]; - math_vector_zero(feature_scale, num_features); - - FOR_PIXEL_WINDOW { - filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); - math_vector_max(feature_scale, features, num_features); - } END_FOR_PIXEL_WINDOW - - filter_calculate_scale(feature_scale, use_time); - - - - /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space - * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ - float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES]; - math_matrix_zero(feature_matrix, num_features); - FOR_PIXEL_WINDOW { - filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); - math_vector_mul(features, feature_scale, num_features); - math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f); - } END_FOR_PIXEL_WINDOW - - math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride); - *rank = 0; - /* Prevent overfitting when a small window is used. */ - int max_rank = min(num_features, num_pixels/3); - if(pca_threshold < 0.0f) { - float threshold_energy = 0.0f; - for(int i = 0; i < num_features; i++) { - threshold_energy += feature_matrix[i*num_features+i]; - } - threshold_energy *= 1.0f - (-pca_threshold); - - float reduced_energy = 0.0f; - for(int i = 0; i < max_rank; i++, (*rank)++) { - if(i >= 2 && reduced_energy >= threshold_energy) - break; - float s = feature_matrix[i*num_features+i]; - reduced_energy += s; - } - } - else { - for(int i = 0; i < max_rank; i++, (*rank)++) { - float s = feature_matrix[i*num_features+i]; - if(i >= 2 && sqrtf(s) < pca_threshold) - break; - } - } - - math_matrix_transpose(transform, num_features, transform_stride); - - /* Bake the feature scaling into the transformation matrix. */ - for(int i = 0; i < num_features; i++) { - for(int j = 0; j < (*rank); j++) { - transform[(i*num_features + j)*transform_stride] *= feature_scale[i]; - } - } + int num_features = use_time ? 11 : 10; + + /* === Calculate denoising window. === */ + int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius)); + int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1)); + int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames; + const ccl_global float *ccl_restrict pixel_buffer; + int3 pixel; + + /* === Shift feature passes to have mean 0. === */ + float feature_means[DENOISE_FEATURES]; + math_vector_zero(feature_means, num_features); + FOR_PIXEL_WINDOW + { + filter_get_features(pixel, pixel_buffer, features, use_time, NULL, pass_stride); + math_vector_add(feature_means, features, num_features); + } + END_FOR_PIXEL_WINDOW + + math_vector_scale(feature_means, 1.0f / num_pixels, num_features); + + /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ + float feature_scale[DENOISE_FEATURES]; + math_vector_zero(feature_scale, num_features); + + FOR_PIXEL_WINDOW + { + filter_get_feature_scales(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); + math_vector_max(feature_scale, features, num_features); + } + END_FOR_PIXEL_WINDOW + + filter_calculate_scale(feature_scale, use_time); + + /* === Generate the feature transformation. === + * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space + * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ + float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; + math_matrix_zero(feature_matrix, num_features); + FOR_PIXEL_WINDOW + { + filter_get_features(pixel, pixel_buffer, features, use_time, feature_means, pass_stride); + math_vector_mul(features, feature_scale, num_features); + math_matrix_add_gramian(feature_matrix, num_features, features, 1.0f); + } + END_FOR_PIXEL_WINDOW + + math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, transform_stride); + *rank = 0; + /* Prevent overfitting when a small window is used. */ + int max_rank = min(num_features, num_pixels / 3); + if (pca_threshold < 0.0f) { + float threshold_energy = 0.0f; + for (int i = 0; i < num_features; i++) { + threshold_energy += feature_matrix[i * num_features + i]; + } + threshold_energy *= 1.0f - (-pca_threshold); + + float reduced_energy = 0.0f; + for (int i = 0; i < max_rank; i++, (*rank)++) { + if (i >= 2 && reduced_energy >= threshold_energy) + break; + float s = feature_matrix[i * num_features + i]; + reduced_energy += s; + } + } + else { + for (int i = 0; i < max_rank; i++, (*rank)++) { + float s = feature_matrix[i * num_features + i]; + if (i >= 2 && sqrtf(s) < pca_threshold) + break; + } + } + + math_matrix_transpose(transform, num_features, transform_stride); + + /* Bake the feature scaling into the transformation matrix. */ + for (int i = 0; i < num_features; i++) { + for (int j = 0; j < (*rank); j++) { + transform[(i * num_features + j) * transform_stride] *= feature_scale[i]; + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h index 10bd3e477e9..22397b292db 100644 --- a/intern/cycles/kernel/filter/filter_transform_sse.h +++ b/intern/cycles/kernel/filter/filter_transform_sse.h @@ -18,98 +18,110 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buffer, CCL_FILTER_TILE_INFO, - int x, int y, int4 rect, - int pass_stride, int frame_stride, + int x, + int y, + int4 rect, + int pass_stride, + int frame_stride, bool use_time, - float *transform, int *rank, - int radius, float pca_threshold) + float *transform, + int *rank, + int radius, + float pca_threshold) { - int buffer_w = align_up(rect.z - rect.x, 4); - - float4 features[DENOISE_FEATURES]; - const float *ccl_restrict pixel_buffer; - int3 pixel; - - int num_features = use_time? 11 : 10; - - /* === Calculate denoising window. === */ - int2 low = make_int2(max(rect.x, x - radius), - max(rect.y, y - radius)); - int2 high = make_int2(min(rect.z, x + radius + 1), - min(rect.w, y + radius + 1)); - int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames; - - /* === Shift feature passes to have mean 0. === */ - float4 feature_means[DENOISE_FEATURES]; - math_vector_zero_sse(feature_means, num_features); - FOR_PIXEL_WINDOW_SSE { - filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride); - math_vector_add_sse(feature_means, num_features, features); - } END_FOR_PIXEL_WINDOW_SSE - - float4 pixel_scale = make_float4(1.0f / num_pixels); - for(int i = 0; i < num_features; i++) { - feature_means[i] = reduce_add(feature_means[i]) * pixel_scale; - } - - /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ - float4 feature_scale[DENOISE_FEATURES]; - math_vector_zero_sse(feature_scale, num_features); - FOR_PIXEL_WINDOW_SSE { - filter_get_feature_scales_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride); - math_vector_max_sse(feature_scale, features, num_features); - } END_FOR_PIXEL_WINDOW_SSE - - filter_calculate_scale_sse(feature_scale, use_time); - - /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space - * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ - float4 feature_matrix_sse[DENOISE_FEATURES*DENOISE_FEATURES]; - math_matrix_zero_sse(feature_matrix_sse, num_features); - FOR_PIXEL_WINDOW_SSE { - filter_get_features_sse(x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride); - math_vector_mul_sse(features, num_features, feature_scale); - math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f)); - } END_FOR_PIXEL_WINDOW_SSE - - float feature_matrix[DENOISE_FEATURES*DENOISE_FEATURES]; - math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse); - - math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1); - - *rank = 0; - /* Prevent overfitting when a small window is used. */ - int max_rank = min(num_features, num_pixels/3); - if(pca_threshold < 0.0f) { - float threshold_energy = 0.0f; - for(int i = 0; i < num_features; i++) { - threshold_energy += feature_matrix[i*num_features+i]; - } - threshold_energy *= 1.0f - (-pca_threshold); - - float reduced_energy = 0.0f; - for(int i = 0; i < max_rank; i++, (*rank)++) { - if(i >= 2 && reduced_energy >= threshold_energy) - break; - float s = feature_matrix[i*num_features+i]; - reduced_energy += s; - } - } - else { - for(int i = 0; i < max_rank; i++, (*rank)++) { - float s = feature_matrix[i*num_features+i]; - if(i >= 2 && sqrtf(s) < pca_threshold) - break; - } - } - - math_matrix_transpose(transform, num_features, 1); - - /* Bake the feature scaling into the transformation matrix. */ - for(int i = 0; i < num_features; i++) { - math_vector_scale(transform + i*num_features, feature_scale[i][0], *rank); - } + int buffer_w = align_up(rect.z - rect.x, 4); + + float4 features[DENOISE_FEATURES]; + const float *ccl_restrict pixel_buffer; + int3 pixel; + + int num_features = use_time ? 11 : 10; + + /* === Calculate denoising window. === */ + int2 low = make_int2(max(rect.x, x - radius), max(rect.y, y - radius)); + int2 high = make_int2(min(rect.z, x + radius + 1), min(rect.w, y + radius + 1)); + int num_pixels = (high.y - low.y) * (high.x - low.x) * tile_info->num_frames; + + /* === Shift feature passes to have mean 0. === */ + float4 feature_means[DENOISE_FEATURES]; + math_vector_zero_sse(feature_means, num_features); + FOR_PIXEL_WINDOW_SSE + { + filter_get_features_sse( + x4, y4, t4, active_pixels, pixel_buffer, features, use_time, NULL, pass_stride); + math_vector_add_sse(feature_means, num_features, features); + } + END_FOR_PIXEL_WINDOW_SSE + + float4 pixel_scale = make_float4(1.0f / num_pixels); + for (int i = 0; i < num_features; i++) { + feature_means[i] = reduce_add(feature_means[i]) * pixel_scale; + } + + /* === Scale the shifted feature passes to a range of [-1; 1], will be baked into the transform later. === */ + float4 feature_scale[DENOISE_FEATURES]; + math_vector_zero_sse(feature_scale, num_features); + FOR_PIXEL_WINDOW_SSE + { + filter_get_feature_scales_sse( + x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride); + math_vector_max_sse(feature_scale, features, num_features); + } + END_FOR_PIXEL_WINDOW_SSE + + filter_calculate_scale_sse(feature_scale, use_time); + + /* === Generate the feature transformation. === + * This transformation maps the num_features-dimentional feature space to a reduced feature (r-feature) space + * which generally has fewer dimensions. This mainly helps to prevent overfitting. */ + float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES]; + math_matrix_zero_sse(feature_matrix_sse, num_features); + FOR_PIXEL_WINDOW_SSE + { + filter_get_features_sse( + x4, y4, t4, active_pixels, pixel_buffer, features, use_time, feature_means, pass_stride); + math_vector_mul_sse(features, num_features, feature_scale); + math_matrix_add_gramian_sse(feature_matrix_sse, num_features, features, make_float4(1.0f)); + } + END_FOR_PIXEL_WINDOW_SSE + + float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; + math_matrix_hsum(feature_matrix, num_features, feature_matrix_sse); + + math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1); + + *rank = 0; + /* Prevent overfitting when a small window is used. */ + int max_rank = min(num_features, num_pixels / 3); + if (pca_threshold < 0.0f) { + float threshold_energy = 0.0f; + for (int i = 0; i < num_features; i++) { + threshold_energy += feature_matrix[i * num_features + i]; + } + threshold_energy *= 1.0f - (-pca_threshold); + + float reduced_energy = 0.0f; + for (int i = 0; i < max_rank; i++, (*rank)++) { + if (i >= 2 && reduced_energy >= threshold_energy) + break; + float s = feature_matrix[i * num_features + i]; + reduced_energy += s; + } + } + else { + for (int i = 0; i < max_rank; i++, (*rank)++) { + float s = feature_matrix[i * num_features + i]; + if (i >= 2 && sqrtf(s) < pca_threshold) + break; + } + } + + math_matrix_transpose(transform, num_features, 1); + + /* Bake the feature scaling into the transformation matrix. */ + for (int i = 0; i < num_features; i++) { + math_vector_scale(transform + i * num_features, feature_scale[i][0], *rank); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index e991f3d685a..456608bfa22 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -30,81 +30,83 @@ ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData * ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderData *sd) { #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) { - return ATTR_PRIM_CURVE; - } - else + if (sd->type & PRIMITIVE_ALL_CURVE) { + return ATTR_PRIM_CURVE; + } + else #endif - if(subd_triangle_patch(kg, sd) != ~0) { - return ATTR_PRIM_SUBD; - } - else { - return ATTR_PRIM_TRIANGLE; - } + if (subd_triangle_patch(kg, sd) != ~0) { + return ATTR_PRIM_SUBD; + } + else { + return ATTR_PRIM_TRIANGLE; + } } ccl_device_inline AttributeDescriptor attribute_not_found() { - const AttributeDescriptor desc = {ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND}; - return desc; + const AttributeDescriptor desc = { + ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND}; + return desc; } /* Find attribute based on ID */ ccl_device_inline uint object_attribute_map_offset(KernelGlobals *kg, int object) { - return kernel_tex_fetch(__objects, object).attribute_map_offset; + return kernel_tex_fetch(__objects, object).attribute_map_offset; } -ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id) +ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, + const ShaderData *sd, + uint id) { - if(sd->object == OBJECT_NONE) { - return attribute_not_found(); - } - - /* for SVM, find attribute by unique id */ - uint attr_offset = object_attribute_map_offset(kg, sd->object); - attr_offset += attribute_primitive_type(kg, sd); - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while(attr_map.x != id) { - if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - return attribute_not_found(); - } - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } - - AttributeDescriptor desc; - desc.element = (AttributeElement)attr_map.y; - - if(sd->prim == PRIM_NONE && - desc.element != ATTR_ELEMENT_MESH && - desc.element != ATTR_ELEMENT_VOXEL && - desc.element != ATTR_ELEMENT_OBJECT) - { - return attribute_not_found(); - } - - /* return result */ - desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; - desc.type = (NodeAttributeType)(attr_map.w & 0xff); - desc.flags = (AttributeFlag)(attr_map.w >> 8); - - return desc; + if (sd->object == OBJECT_NONE) { + return attribute_not_found(); + } + + /* for SVM, find attribute by unique id */ + uint attr_offset = object_attribute_map_offset(kg, sd->object); + attr_offset += attribute_primitive_type(kg, sd); + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while (attr_map.x != id) { + if (UNLIKELY(attr_map.x == ATTR_STD_NONE)) { + return attribute_not_found(); + } + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + AttributeDescriptor desc; + desc.element = (AttributeElement)attr_map.y; + + if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH && + desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) { + return attribute_not_found(); + } + + /* return result */ + desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + desc.type = (NodeAttributeType)(attr_map.w & 0xff); + desc.flags = (AttributeFlag)(attr_map.w >> 8); + + return desc; } /* Transform matrix attribute on meshes */ -ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) +ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) { - Transform tfm; + Transform tfm; - tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0); - tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1); - tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2); + tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0); + tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1); + tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2); - return tfm; + return tfm; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 9b60cf6d56b..e0aacb434eb 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -27,169 +27,199 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3) { - float fc = 0.71f; - float data[4]; - float t2 = t * t; - data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; - data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; - data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; - data[3] = 3.0f * fc * t2 - 2.0f * fc * t; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; + float fc = 0.71f; + float data[4]; + float t2 = t * t; + data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; + data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; + data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; + data[3] = 3.0f * fc * t2 - 2.0f * fc * t; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; } ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3) { - float data[4]; - float fc = 0.71f; - float t2 = t * t; - float t3 = t2 * t; - data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; - data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; - data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; - data[3] = fc * t3 - fc * t2; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; + float data[4]; + float fc = 0.71f; + float t2 = t * t; + float t3 = t2 * t; + data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; + data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; + data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; + data[3] = fc * t3 - fc * t2; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; } /* Reading attributes on various curve elements */ -ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +ccl_device float curve_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(desc.element == ATTR_ELEMENT_CURVE) { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; -#endif - - return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = 0.0f; -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; -#endif - - return 0.0f; - } + if (desc.element == ATTR_ELEMENT_CURVE) { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; +# endif + + return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_CURVE_KEY || + desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = sd->du.dx * (f1 - f0); + if (dy) + *dy = 0.0f; +# endif + + return (1.0f - sd->u) * f0 + sd->u * f1; + } + else { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; +# endif + + return 0.0f; + } } -ccl_device float2 curve_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy) +ccl_device float2 curve_attribute_float2(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float2 *dx, + float2 *dy) { - if(desc.element == ATTR_ELEMENT_CURVE) { - /* idea: we can't derive any useful differentials here, but for tiled - * mipmap image caching it would be useful to avoid reading the highest - * detail level always. maybe a derivative based on the hair density - * could be computed somehow? */ -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); -#endif - - return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = make_float2(0.0f, 0.0f); -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); -#endif - - return make_float2(0.0f, 0.0f); - } + if (desc.element == ATTR_ELEMENT_CURVE) { + /* idea: we can't derive any useful differentials here, but for tiled + * mipmap image caching it would be useful to avoid reading the highest + * detail level always. maybe a derivative based on the hair density + * could be computed somehow? */ +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); +# endif + + return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_CURVE_KEY || + desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + k0); + float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + k1); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = sd->du.dx * (f1 - f0); + if (dy) + *dy = make_float2(0.0f, 0.0f); +# endif + + return (1.0f - sd->u) * f0 + sd->u * f1; + } + else { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); +# endif + + return make_float2(0.0f, 0.0f); + } } -ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +ccl_device float3 curve_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, + float3 *dy) { - if(desc.element == ATTR_ELEMENT_CURVE) { - /* idea: we can't derive any useful differentials here, but for tiled - * mipmap image caching it would be useful to avoid reading the highest - * detail level always. maybe a derivative based on the hair density - * could be computed somehow? */ -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); - } - else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return make_float3(0.0f, 0.0f, 0.0f); - } + if (desc.element == ATTR_ELEMENT_CURVE) { + /* idea: we can't derive any useful differentials here, but for tiled + * mipmap image caching it would be useful to avoid reading the highest + * detail level always. maybe a derivative based on the hair density + * could be computed somehow? */ +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); +# endif + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); + } + else if (desc.element == ATTR_ELEMENT_CURVE_KEY || + desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = sd->du.dx * (f1 - f0); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); +# endif + + return (1.0f - sd->u) * f0 + sd->u * f1; + } + else { +# ifdef __RAY_DIFFERENTIALS__ + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); +# endif + + return make_float3(0.0f, 0.0f, 0.0f); + } } /* Curve thickness */ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) { - float r = 0.0f; + float r = 0.0f; - if(sd->type & PRIMITIVE_ALL_CURVE) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; + if (sd->type & PRIMITIVE_ALL_CURVE) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; - float4 P_curve[2]; + float4 P_curve[2]; - if(sd->type & PRIMITIVE_CURVE) { - P_curve[0]= kernel_tex_fetch(__curve_keys, k0); - P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - } - else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); - } + if (sd->type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + } - r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; - } + r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; + } - return r*2.0f; + return r * 2.0f; } /* Curve location for motion pass, linear interpolation between keys and @@ -197,89 +227,98 @@ ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; - float4 P_curve[2]; + float4 P_curve[2]; - P_curve[0]= kernel_tex_fetch(__curve_keys, k0); - P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); - return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); + return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); } /* Curve tangent normal */ ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) { - float3 tgN = make_float3(0.0f,0.0f,0.0f); + float3 tgN = make_float3(0.0f, 0.0f, 0.0f); - if(sd->type & PRIMITIVE_ALL_CURVE) { + if (sd->type & PRIMITIVE_ALL_CURVE) { - tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); - tgN = normalize(tgN); + tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu, -sd->I) / len_squared(sd->dPdu))); + tgN = normalize(tgN); - /* need to find suitable scaled gd for corrected normal */ -#if 0 - tgN = normalize(tgN - gd * sd->dPdu); -#endif - } + /* need to find suitable scaled gd for corrected normal */ +# if 0 + tgN = normalize(tgN - gd * sd->dPdu); +# endif + } - return tgN; + return tgN; } /* Curve bounds utility function */ -ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3) +ccl_device_inline void curvebounds(float *lower, + float *upper, + float *extremta, + float *extrema, + float *extremtb, + float *extremb, + float p0, + float p1, + float p2, + float p3) { - float halfdiscroot = (p2 * p2 - 3 * p3 * p1); - float ta = -1.0f; - float tb = -1.0f; - - *extremta = -1.0f; - *extremtb = -1.0f; - *upper = p0; - *lower = (p0 + p1) + (p2 + p3); - *extrema = *upper; - *extremb = *lower; - - if(*lower >= *upper) { - *upper = *lower; - *lower = p0; - } - - if(halfdiscroot >= 0) { - float inv3p3 = (1.0f/3.0f)/p3; - halfdiscroot = sqrtf(halfdiscroot); - ta = (-p2 - halfdiscroot) * inv3p3; - tb = (-p2 + halfdiscroot) * inv3p3; - } - - float t2; - float t3; - - if(ta > 0.0f && ta < 1.0f) { - t2 = ta * ta; - t3 = t2 * ta; - *extremta = ta; - *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; - - *upper = fmaxf(*extrema, *upper); - *lower = fminf(*extrema, *lower); - } - - if(tb > 0.0f && tb < 1.0f) { - t2 = tb * tb; - t3 = t2 * tb; - *extremtb = tb; - *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; - - *upper = fmaxf(*extremb, *upper); - *lower = fminf(*extremb, *lower); - } + float halfdiscroot = (p2 * p2 - 3 * p3 * p1); + float ta = -1.0f; + float tb = -1.0f; + + *extremta = -1.0f; + *extremtb = -1.0f; + *upper = p0; + *lower = (p0 + p1) + (p2 + p3); + *extrema = *upper; + *extremb = *lower; + + if (*lower >= *upper) { + *upper = *lower; + *lower = p0; + } + + if (halfdiscroot >= 0) { + float inv3p3 = (1.0f / 3.0f) / p3; + halfdiscroot = sqrtf(halfdiscroot); + ta = (-p2 - halfdiscroot) * inv3p3; + tb = (-p2 + halfdiscroot) * inv3p3; + } + + float t2; + float t3; + + if (ta > 0.0f && ta < 1.0f) { + t2 = ta * ta; + t3 = t2 * ta; + *extremta = ta; + *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; + + *upper = fmaxf(*extrema, *upper); + *lower = fminf(*extrema, *lower); + } + + if (tb > 0.0f && tb < 1.0f) { + t2 = tb * tb; + t3 = t2 * tb; + *extremtb = tb; + *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; + + *upper = fmaxf(*extremb, *upper); + *lower = fminf(*extremb, *lower); + } } -#endif /* __HAIR__ */ +#endif /* __HAIR__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_curve_intersect.h b/intern/cycles/kernel/geom/geom_curve_intersect.h index 5cf8713e3a8..5fd277c2f99 100644 --- a/intern/cycles/kernel/geom/geom_curve_intersect.h +++ b/intern/cycles/kernel/geom/geom_curve_intersect.h @@ -18,484 +18,534 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ -#ifdef __KERNEL_SSE2__ +# ifdef __KERNEL_SSE2__ ccl_device_inline ssef transform_point_T3(const ssef t[3], const ssef &a) { - return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2])); + return madd(shuffle<0>(a), t[0], madd(shuffle<1>(a), t[1], shuffle<2>(a) * t[2])); } -#endif +# endif /* On CPU pass P and dir by reference to aligned vector. */ -ccl_device_forceinline bool cardinal_curve_intersect( - KernelGlobals *kg, - Intersection *isect, - const float3 ccl_ref P, - const float3 ccl_ref dir, - uint visibility, - int object, - int curveAddr, - float time, - int type, - uint *lcg_state, - float difl, - float extmax) +ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, + Intersection *isect, + const float3 ccl_ref P, + const float3 ccl_ref dir, + uint visibility, + int object, + int curveAddr, + float time, + int type, + uint *lcg_state, + float difl, + float extmax) { - const bool is_curve_primitive = (type & PRIMITIVE_CURVE); - - if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); - if(time < prim_time.x || time > prim_time.y) { - return false; - } - } - - int segment = PRIMITIVE_UNPACK_SEGMENT(type); - float epsilon = 0.0f; - float r_st, r_en; - - int depth = kernel_data.curve.subdivisions; - int flags = kernel_data.curve.curveflags; - int prim = kernel_tex_fetch(__prim_index, curveAddr); - -#ifdef __KERNEL_SSE2__ - ssef vdir = load4f(dir); - ssef vcurve_coef[4]; - const float3 *curve_coef = (float3 *)vcurve_coef; - - { - ssef dtmp = vdir * vdir; - ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp)); - ssef rd_ss = load1f_first(1.0f) / d_ss; - - ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]); - int2 &v00 = (int2 &)v00vec; - - int k0 = v00.x + segment; - int k1 = k0 + 1; - int ka = max(k0 - 1, v00.x); - int kb = min(k1 + 1, v00.x + v00.y - 1); - -#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800) - avxf P_curve_0_1, P_curve_2_3; - if(is_curve_primitive) { - P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x); - P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x); - } - else { - int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; - motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3); - } -#else /* __KERNEL_AVX2__ */ - ssef P_curve[4]; - - if(is_curve_primitive) { - P_curve[0] = load4f(&kg->__curve_keys.data[ka].x); - P_curve[1] = load4f(&kg->__curve_keys.data[k0].x); - P_curve[2] = load4f(&kg->__curve_keys.data[k1].x); - P_curve[3] = load4f(&kg->__curve_keys.data[kb].x); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve); - } -#endif /* __KERNEL_AVX2__ */ - - ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss)); - ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn; - ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy; - ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); - ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); - - ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); - ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0); - ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); - -#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && (!defined(_MSC_VER) || _MSC_VER > 1800) - const avxf vPP = _mm256_broadcast_ps(&P.m128); - const avxf htfm00 = avxf(htfm0.m128, htfm0.m128); - const avxf htfm11 = avxf(htfm1.m128, htfm1.m128); - const avxf htfm22 = avxf(htfm2.m128, htfm2.m128); - - const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP), - htfm00, - madd(shuffle<1>(P_curve_0_1 - vPP), - htfm11, - shuffle<2>(P_curve_0_1 - vPP) * htfm22)); - const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP), - htfm00, - madd(shuffle<1>(P_curve_2_3 - vPP), - htfm11, - shuffle<2>(P_curve_2_3 - vPP)*htfm22)); - - const ssef p0 = _mm256_castps256_ps128(p01); - const ssef p1 = _mm256_extractf128_ps(p01, 1); - const ssef p2 = _mm256_castps256_ps128(p23); - const ssef p3 = _mm256_extractf128_ps(p23, 1); - - const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1); - r_st = ((float4 &)P_curve_1).w; - const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3); - r_en = ((float4 &)P_curve_2).w; -#else /* __KERNEL_AVX2__ */ - ssef htfm[] = { htfm0, htfm1, htfm2 }; - ssef vP = load4f(P); - ssef p0 = transform_point_T3(htfm, P_curve[0] - vP); - ssef p1 = transform_point_T3(htfm, P_curve[1] - vP); - ssef p2 = transform_point_T3(htfm, P_curve[2] - vP); - ssef p3 = transform_point_T3(htfm, P_curve[3] - vP); - - r_st = ((float4 &)P_curve[1]).w; - r_en = ((float4 &)P_curve[2]).w; -#endif /* __KERNEL_AVX2__ */ - - float fc = 0.71f; - ssef vfc = ssef(fc); - ssef vfcxp3 = vfc * p3; - - vcurve_coef[0] = p1; - vcurve_coef[1] = vfc * (p2 - p0); - vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3))); - vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3)); - - } -#else - float3 curve_coef[4]; - - /* curve Intersection check */ - /* obtain curve parameters */ - { - /* ray transform created - this should be created at beginning of intersection loop */ - Transform htfm; - float d = sqrtf(dir.x * dir.x + dir.z * dir.z); - htfm = make_transform( - dir.z / d, 0, -dir.x /d, 0, - -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0, - dir.x, dir.y, dir.z, 0); - - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; - int k1 = k0 + 1; - - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P_curve[4]; - - if(is_curve_primitive) { - P_curve[0] = kernel_tex_fetch(__curve_keys, ka); - P_curve[1] = kernel_tex_fetch(__curve_keys, k0); - P_curve[2] = kernel_tex_fetch(__curve_keys, k1); - P_curve[3] = kernel_tex_fetch(__curve_keys, kb); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve); - } - - float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P); - float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P); - float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P); - float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P); - - float fc = 0.71f; - curve_coef[0] = p1; - curve_coef[1] = -fc*p0 + fc*p2; - curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; - curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; - r_st = P_curve[1].w; - r_en = P_curve[2].w; - } -#endif - - float r_curr = max(r_st, r_en); - - if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) - epsilon = 2 * r_curr; - - /* find bounds - this is slow for cubic curves */ - float upper, lower; - - float zextrem[4]; - curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); - if(lower - r_curr > isect->t || upper + r_curr < epsilon) - return false; - - /* minimum width extension */ - float mw_extension = min(difl * fabsf(upper), extmax); - float r_ext = mw_extension + r_curr; - - float xextrem[4]; - curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); - if(lower > r_ext || upper < -r_ext) - return false; - - float yextrem[4]; - curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); - if(lower > r_ext || upper < -r_ext) - return false; - - /* setup recurrent loop */ - int level = 1 << depth; - int tree = 0; - float resol = 1.0f / (float)level; - bool hit = false; - - /* begin loop */ - while(!(tree >> (depth))) { - const float i_st = tree * resol; - const float i_en = i_st + (level * resol); - -#ifdef __KERNEL_SSE2__ - ssef vi_st = ssef(i_st), vi_en = ssef(i_en); - ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]); - ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]); - - ssef vbmin = min(vp_st, vp_en); - ssef vbmax = max(vp_st, vp_en); - - float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; - float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; - float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; - float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; -#else - float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0]; - float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0]; - - float bminx = min(p_st.x, p_en.x); - float bmaxx = max(p_st.x, p_en.x); - float bminy = min(p_st.y, p_en.y); - float bmaxy = max(p_st.y, p_en.y); - float bminz = min(p_st.z, p_en.z); - float bmaxz = max(p_st.z, p_en.z); -#endif - - if(xextrem[0] >= i_st && xextrem[0] <= i_en) { - bminx = min(bminx,xextrem[1]); - bmaxx = max(bmaxx,xextrem[1]); - } - if(xextrem[2] >= i_st && xextrem[2] <= i_en) { - bminx = min(bminx,xextrem[3]); - bmaxx = max(bmaxx,xextrem[3]); - } - if(yextrem[0] >= i_st && yextrem[0] <= i_en) { - bminy = min(bminy,yextrem[1]); - bmaxy = max(bmaxy,yextrem[1]); - } - if(yextrem[2] >= i_st && yextrem[2] <= i_en) { - bminy = min(bminy,yextrem[3]); - bmaxy = max(bmaxy,yextrem[3]); - } - if(zextrem[0] >= i_st && zextrem[0] <= i_en) { - bminz = min(bminz,zextrem[1]); - bmaxz = max(bmaxz,zextrem[1]); - } - if(zextrem[2] >= i_st && zextrem[2] <= i_en) { - bminz = min(bminz,zextrem[3]); - bmaxz = max(bmaxz,zextrem[3]); - } - - float r1 = r_st + (r_en - r_st) * i_st; - float r2 = r_st + (r_en - r_st) * i_en; - r_curr = max(r1, r2); - - mw_extension = min(difl * fabsf(bmaxz), extmax); - float r_ext = mw_extension + r_curr; - float coverage = 1.0f; - - if(bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { - /* the bounding box does not overlap the square centered at O */ - tree += level; - level = tree & -tree; - } - else if(level == 1) { - - /* the maximum recursion depth is reached. - * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. - * dP* is reversed if necessary.*/ - float t = isect->t; - float u = 0.0f; - float gd = 0.0f; - - if(flags & CURVE_KN_RIBBONS) { - float3 tg = (p_en - p_st); -#ifdef __KERNEL_SSE__ - const float3 tg_sq = tg * tg; - float w = tg_sq.x + tg_sq.y; -#else - float w = tg.x * tg.x + tg.y * tg.y; -#endif - if(w == 0) { - tree++; - level = tree & -tree; - continue; - } -#ifdef __KERNEL_SSE__ - const float3 p_sttg = p_st * tg; - w = -(p_sttg.x + p_sttg.y) / w; -#else - w = -(p_st.x * tg.x + p_st.y * tg.y) / w; -#endif - w = saturate(w); - - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - r_curr = r_st + (r_en - r_st) * u; - /* compare x-y distances */ - float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if(dot(tg, dp_st)< 0) - dp_st *= -1; - if(dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { - tree++; - level = tree & -tree; - continue; - } - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if(dot(tg, dp_en) < 0) - dp_en *= -1; - if(dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { - tree++; - level = tree & -tree; - continue; - } - - /* compute coverage */ - float r_ext = r_curr; - coverage = 1.0f; - if(difl != 0.0f) { - mw_extension = min(difl * fabsf(bmaxz), extmax); - r_ext = mw_extension + r_curr; -#ifdef __KERNEL_SSE__ - const float3 p_curr_sq = p_curr * p_curr; - const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128))); - float d = dxxx.x; -#else - float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); -#endif - float d0 = d - r_curr; - float d1 = d + r_curr; - float inv_mw_extension = 1.0f/mw_extension; - if(d0 >= 0) - coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f; - else // inside - coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f; - } - - if(p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { - tree++; - level = tree & -tree; - continue; - } - - t = p_curr.z; - - /* stochastic fade from minimum width */ - if(difl != 0.0f && lcg_state) { - if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) - return hit; - } - } - else { - float l = len(p_en - p_st); - /* minimum width extension */ - float or1 = r1; - float or2 = r2; - - if(difl != 0.0f) { - mw_extension = min(len(p_st - P) * difl, extmax); - or1 = r1 < mw_extension ? mw_extension : r1; - mw_extension = min(len(p_en - P) * difl, extmax); - or2 = r2 < mw_extension ? mw_extension : r2; - } - /* --- */ - float invl = 1.0f/l; - float3 tg = (p_en - p_st) * invl; - gd = (or2 - or1) * invl; - float difz = -dot(p_st,tg); - float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); - float invcyla = 1.0f/cyla; - float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); - float tcentre = -halfb*invcyla; - float zcentre = difz + (tg.z * tcentre); - float3 tdif = - p_st; - tdif.z += tcentre; - float tdifz = dot(tdif,tg); - float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); - float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; - float td = tb*tb - 4*cyla*tc; - if(td < 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float rootd = sqrtf(td); - float correction = (-tb - rootd) * 0.5f * invcyla; - t = tcentre + correction; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if(dot(tg, dp_st)< 0) - dp_st *= -1; - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if(dot(tg, dp_en) < 0) - dp_en *= -1; - - if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { - correction = (-tb + rootd) * 0.5f * invcyla; - t = tcentre + correction; - } - - if(dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float w = (zcentre + (tg.z * correction)) * invl; - w = saturate(w); - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - - /* stochastic fade from minimum width */ - if(difl != 0.0f && lcg_state) { - r_curr = r1 + (r2 - r1) * w; - r_ext = or1 + (or2 - or1) * w; - coverage = r_curr/r_ext; - - if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) - return hit; - } - } - /* we found a new intersection */ - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->t = t; - isect->u = u; - isect->v = gd; - isect->prim = curveAddr; - isect->object = object; - isect->type = type; - hit = true; - } - - tree++; - level = tree & -tree; - } - else { - /* split the curve into two curves and process */ - level = level >> 1; - } - } - - return hit; + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if (time < prim_time.x || time > prim_time.y) { + return false; + } + } + + int segment = PRIMITIVE_UNPACK_SEGMENT(type); + float epsilon = 0.0f; + float r_st, r_en; + + int depth = kernel_data.curve.subdivisions; + int flags = kernel_data.curve.curveflags; + int prim = kernel_tex_fetch(__prim_index, curveAddr); + +# ifdef __KERNEL_SSE2__ + ssef vdir = load4f(dir); + ssef vcurve_coef[4]; + const float3 *curve_coef = (float3 *)vcurve_coef; + + { + ssef dtmp = vdir * vdir; + ssef d_ss = mm_sqrt(dtmp + shuffle<2>(dtmp)); + ssef rd_ss = load1f_first(1.0f) / d_ss; + + ssei v00vec = load4i((ssei *)&kg->__curves.data[prim]); + int2 &v00 = (int2 &)v00vec; + + int k0 = v00.x + segment; + int k1 = k0 + 1; + int ka = max(k0 - 1, v00.x); + int kb = min(k1 + 1, v00.x + v00.y - 1); + +# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \ + (!defined(_MSC_VER) || _MSC_VER > 1800) + avxf P_curve_0_1, P_curve_2_3; + if (is_curve_primitive) { + P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x); + P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_cardinal_curve_keys_avx( + kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1, &P_curve_2_3); + } +# else /* __KERNEL_AVX2__ */ + ssef P_curve[4]; + + if (is_curve_primitive) { + P_curve[0] = load4f(&kg->__curve_keys.data[ka].x); + P_curve[1] = load4f(&kg->__curve_keys.data[k0].x); + P_curve[2] = load4f(&kg->__curve_keys.data[k1].x); + P_curve[3] = load4f(&kg->__curve_keys.data[kb].x); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4 *)&P_curve); + } +# endif /* __KERNEL_AVX2__ */ + + ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss)); + ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn; + ssef mul_yz = shuffle<1, 2, 1, 2>(vdir) * mul_zxxy; + ssef mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); + ssef vdir0 = vdir & cast(ssei(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)); + + ssef htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); + ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0); + ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); + +# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) && \ + (!defined(_MSC_VER) || _MSC_VER > 1800) + const avxf vPP = _mm256_broadcast_ps(&P.m128); + const avxf htfm00 = avxf(htfm0.m128, htfm0.m128); + const avxf htfm11 = avxf(htfm1.m128, htfm1.m128); + const avxf htfm22 = avxf(htfm2.m128, htfm2.m128); + + const avxf p01 = madd( + shuffle<0>(P_curve_0_1 - vPP), + htfm00, + madd(shuffle<1>(P_curve_0_1 - vPP), htfm11, shuffle<2>(P_curve_0_1 - vPP) * htfm22)); + const avxf p23 = madd( + shuffle<0>(P_curve_2_3 - vPP), + htfm00, + madd(shuffle<1>(P_curve_2_3 - vPP), htfm11, shuffle<2>(P_curve_2_3 - vPP) * htfm22)); + + const ssef p0 = _mm256_castps256_ps128(p01); + const ssef p1 = _mm256_extractf128_ps(p01, 1); + const ssef p2 = _mm256_castps256_ps128(p23); + const ssef p3 = _mm256_extractf128_ps(p23, 1); + + const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1); + r_st = ((float4 &)P_curve_1).w; + const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3); + r_en = ((float4 &)P_curve_2).w; +# else /* __KERNEL_AVX2__ */ + ssef htfm[] = {htfm0, htfm1, htfm2}; + ssef vP = load4f(P); + ssef p0 = transform_point_T3(htfm, P_curve[0] - vP); + ssef p1 = transform_point_T3(htfm, P_curve[1] - vP); + ssef p2 = transform_point_T3(htfm, P_curve[2] - vP); + ssef p3 = transform_point_T3(htfm, P_curve[3] - vP); + + r_st = ((float4 &)P_curve[1]).w; + r_en = ((float4 &)P_curve[2]).w; +# endif /* __KERNEL_AVX2__ */ + + float fc = 0.71f; + ssef vfc = ssef(fc); + ssef vfcxp3 = vfc * p3; + + vcurve_coef[0] = p1; + vcurve_coef[1] = vfc * (p2 - p0); + vcurve_coef[2] = madd( + ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3))); + vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3)); + } +# else + float3 curve_coef[4]; + + /* curve Intersection check */ + /* obtain curve parameters */ + { + /* ray transform created - this should be created at beginning of intersection loop */ + Transform htfm; + float d = sqrtf(dir.x * dir.x + dir.z * dir.z); + htfm = make_transform(dir.z / d, + 0, + -dir.x / d, + 0, + -dir.x * dir.y / d, + d, + -dir.y * dir.z / d, + 0, + dir.x, + dir.y, + dir.z, + 0); + + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + segment; + int k1 = k0 + 1; + + int ka = max(k0 - 1, __float_as_int(v00.x)); + int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + if (is_curve_primitive) { + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve); + } + + float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P); + float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P); + float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P); + float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P); + + float fc = 0.71f; + curve_coef[0] = p1; + curve_coef[1] = -fc * p0 + fc * p2; + curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; + curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; + r_st = P_curve[1].w; + r_en = P_curve[2].w; + } +# endif + + float r_curr = max(r_st, r_en); + + if ((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) + epsilon = 2 * r_curr; + + /* find bounds - this is slow for cubic curves */ + float upper, lower; + + float zextrem[4]; + curvebounds(&lower, + &upper, + &zextrem[0], + &zextrem[1], + &zextrem[2], + &zextrem[3], + curve_coef[0].z, + curve_coef[1].z, + curve_coef[2].z, + curve_coef[3].z); + if (lower - r_curr > isect->t || upper + r_curr < epsilon) + return false; + + /* minimum width extension */ + float mw_extension = min(difl * fabsf(upper), extmax); + float r_ext = mw_extension + r_curr; + + float xextrem[4]; + curvebounds(&lower, + &upper, + &xextrem[0], + &xextrem[1], + &xextrem[2], + &xextrem[3], + curve_coef[0].x, + curve_coef[1].x, + curve_coef[2].x, + curve_coef[3].x); + if (lower > r_ext || upper < -r_ext) + return false; + + float yextrem[4]; + curvebounds(&lower, + &upper, + &yextrem[0], + &yextrem[1], + &yextrem[2], + &yextrem[3], + curve_coef[0].y, + curve_coef[1].y, + curve_coef[2].y, + curve_coef[3].y); + if (lower > r_ext || upper < -r_ext) + return false; + + /* setup recurrent loop */ + int level = 1 << depth; + int tree = 0; + float resol = 1.0f / (float)level; + bool hit = false; + + /* begin loop */ + while (!(tree >> (depth))) { + const float i_st = tree * resol; + const float i_en = i_st + (level * resol); + +# ifdef __KERNEL_SSE2__ + ssef vi_st = ssef(i_st), vi_en = ssef(i_en); + ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), + vi_st, + vcurve_coef[0]); + ssef vp_en = madd(madd(madd(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), + vi_en, + vcurve_coef[0]); + + ssef vbmin = min(vp_st, vp_en); + ssef vbmax = max(vp_st, vp_en); + + float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; + float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; + float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; + float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; +# else + float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + + curve_coef[0]; + float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + + curve_coef[0]; + + float bminx = min(p_st.x, p_en.x); + float bmaxx = max(p_st.x, p_en.x); + float bminy = min(p_st.y, p_en.y); + float bmaxy = max(p_st.y, p_en.y); + float bminz = min(p_st.z, p_en.z); + float bmaxz = max(p_st.z, p_en.z); +# endif + + if (xextrem[0] >= i_st && xextrem[0] <= i_en) { + bminx = min(bminx, xextrem[1]); + bmaxx = max(bmaxx, xextrem[1]); + } + if (xextrem[2] >= i_st && xextrem[2] <= i_en) { + bminx = min(bminx, xextrem[3]); + bmaxx = max(bmaxx, xextrem[3]); + } + if (yextrem[0] >= i_st && yextrem[0] <= i_en) { + bminy = min(bminy, yextrem[1]); + bmaxy = max(bmaxy, yextrem[1]); + } + if (yextrem[2] >= i_st && yextrem[2] <= i_en) { + bminy = min(bminy, yextrem[3]); + bmaxy = max(bmaxy, yextrem[3]); + } + if (zextrem[0] >= i_st && zextrem[0] <= i_en) { + bminz = min(bminz, zextrem[1]); + bmaxz = max(bmaxz, zextrem[1]); + } + if (zextrem[2] >= i_st && zextrem[2] <= i_en) { + bminz = min(bminz, zextrem[3]); + bmaxz = max(bmaxz, zextrem[3]); + } + + float r1 = r_st + (r_en - r_st) * i_st; + float r2 = r_st + (r_en - r_st) * i_en; + r_curr = max(r1, r2); + + mw_extension = min(difl * fabsf(bmaxz), extmax); + float r_ext = mw_extension + r_curr; + float coverage = 1.0f; + + if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext || bmaxx < -r_ext || + bminy > r_ext || bmaxy < -r_ext) { + /* the bounding box does not overlap the square centered at O */ + tree += level; + level = tree & -tree; + } + else if (level == 1) { + + /* the maximum recursion depth is reached. + * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. + * dP* is reversed if necessary.*/ + float t = isect->t; + float u = 0.0f; + float gd = 0.0f; + + if (flags & CURVE_KN_RIBBONS) { + float3 tg = (p_en - p_st); +# ifdef __KERNEL_SSE__ + const float3 tg_sq = tg * tg; + float w = tg_sq.x + tg_sq.y; +# else + float w = tg.x * tg.x + tg.y * tg.y; +# endif + if (w == 0) { + tree++; + level = tree & -tree; + continue; + } +# ifdef __KERNEL_SSE__ + const float3 p_sttg = p_st * tg; + w = -(p_sttg.x + p_sttg.y) / w; +# else + w = -(p_st.x * tg.x + p_st.y * tg.y) / w; +# endif + w = saturate(w); + + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + r_curr = r_st + (r_en - r_st) * u; + /* compare x-y distances */ + float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + + curve_coef[0]; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st) < 0) + dp_st *= -1; + if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { + tree++; + level = tree & -tree; + continue; + } + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { + tree++; + level = tree & -tree; + continue; + } + + /* compute coverage */ + float r_ext = r_curr; + coverage = 1.0f; + if (difl != 0.0f) { + mw_extension = min(difl * fabsf(bmaxz), extmax); + r_ext = mw_extension + r_curr; +# ifdef __KERNEL_SSE__ + const float3 p_curr_sq = p_curr * p_curr; + const float3 dxxx(_mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128))); + float d = dxxx.x; +# else + float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); +# endif + float d0 = d - r_curr; + float d1 = d + r_curr; + float inv_mw_extension = 1.0f / mw_extension; + if (d0 >= 0) + coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * + 0.5f; + else // inside + coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * + 0.5f; + } + + if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || + isect->t < p_curr.z) { + tree++; + level = tree & -tree; + continue; + } + + t = p_curr.z; + + /* stochastic fade from minimum width */ + if (difl != 0.0f && lcg_state) { + if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } + } + else { + float l = len(p_en - p_st); + /* minimum width extension */ + float or1 = r1; + float or2 = r2; + + if (difl != 0.0f) { + mw_extension = min(len(p_st - P) * difl, extmax); + or1 = r1 < mw_extension ? mw_extension : r1; + mw_extension = min(len(p_en - P) * difl, extmax); + or2 = r2 < mw_extension ? mw_extension : r2; + } + /* --- */ + float invl = 1.0f / l; + float3 tg = (p_en - p_st) * invl; + gd = (or2 - or1) * invl; + float difz = -dot(p_st, tg); + float cyla = 1.0f - (tg.z * tg.z * (1 + gd * gd)); + float invcyla = 1.0f / cyla; + float halfb = (-p_st.z - tg.z * (difz + gd * (difz * gd + or1))); + float tcentre = -halfb * invcyla; + float zcentre = difz + (tg.z * tcentre); + float3 tdif = -p_st; + tdif.z += tcentre; + float tdifz = dot(tdif, tg); + float tb = 2 * (tdif.z - tg.z * (tdifz + gd * (tdifz * gd + or1))); + float tc = dot(tdif, tdif) - tdifz * tdifz * (1 + gd * gd) - or1 * or1 - + 2 * or1 * tdifz * gd; + float td = tb * tb - 4 * cyla * tc; + if (td < 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float rootd = sqrtf(td); + float correction = (-tb - rootd) * 0.5f * invcyla; + t = tcentre + correction; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st) < 0) + dp_st *= -1; + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + + if (flags & CURVE_KN_BACKFACING && + (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || + isect->t < t || t <= 0.0f)) { + correction = (-tb + rootd) * 0.5f * invcyla; + t = tcentre + correction; + } + + if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || + isect->t < t || t <= 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float w = (zcentre + (tg.z * correction)) * invl; + w = saturate(w); + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + + /* stochastic fade from minimum width */ + if (difl != 0.0f && lcg_state) { + r_curr = r1 + (r2 - r1) * w; + r_ext = or1 + (or2 - or1) * w; + coverage = r_curr / r_ext; + + if (coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } + } + /* we found a new intersection */ + +# ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +# endif + { + /* record intersection */ + isect->t = t; + isect->u = u; + isect->v = gd; + isect->prim = curveAddr; + isect->object = object; + isect->type = type; + hit = true; + } + + tree++; + level = tree & -tree; + } + else { + /* split the curve into two curves and process */ + level = level >> 1; + } + } + + return hit; } ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, @@ -511,245 +561,247 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, float difl, float extmax) { - /* define few macros to minimize code duplication for SSE */ -#ifndef __KERNEL_SSE2__ -# define len3_squared(x) len_squared(x) -# define len3(x) len(x) -# define dot3(x, y) dot(x, y) -#endif - - const bool is_curve_primitive = (type & PRIMITIVE_CURVE); - - if(!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { - const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); - if(time < prim_time.x || time > prim_time.y) { - return false; - } - } - - int segment = PRIMITIVE_UNPACK_SEGMENT(type); - /* curve Intersection check */ - int flags = kernel_data.curve.curveflags; - - int prim = kernel_tex_fetch(__prim_index, curveAddr); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int cnum = __float_as_int(v00.x); - int k0 = cnum + segment; - int k1 = k0 + 1; - -#ifndef __KERNEL_SSE2__ - float4 P_curve[2]; - - if(is_curve_primitive) { - P_curve[0] = kernel_tex_fetch(__curve_keys, k0); - P_curve[1] = kernel_tex_fetch(__curve_keys, k1); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); - } - - float or1 = P_curve[0].w; - float or2 = P_curve[1].w; - float3 p1 = float4_to_float3(P_curve[0]); - float3 p2 = float4_to_float3(P_curve[1]); - - /* minimum width extension */ - float r1 = or1; - float r2 = or2; - float3 dif = P - p1; - float3 dif_second = P - p2; - if(difl != 0.0f) { - float pixelsize = min(len3(dif) * difl, extmax); - r1 = or1 < pixelsize ? pixelsize : or1; - pixelsize = min(len3(dif_second) * difl, extmax); - r2 = or2 < pixelsize ? pixelsize : or2; - } - /* --- */ - - float3 p21_diff = p2 - p1; - float3 sphere_dif1 = (dif + dif_second) * 0.5f; - float3 dir = direction; - float sphere_b_tmp = dot3(dir, sphere_dif1); - float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; -#else - ssef P_curve[2]; - - if(is_curve_primitive) { - P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); - P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); - } - else { - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; - motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve); - } - - const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); - - ssef r12 = or12; - const ssef vP = load4f(P); - const ssef dif = vP - P_curve[0]; - const ssef dif_second = vP - P_curve[1]; - if(difl != 0.0f) { - const ssef len1_sq = len3_squared_splat(dif); - const ssef len2_sq = len3_squared_splat(dif_second); - const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); - const ssef pixelsize12 = min(len12 * difl, ssef(extmax)); - r12 = max(or12, pixelsize12); - } - float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12)); - float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12)); - - const ssef p21_diff = P_curve[1] - P_curve[0]; - const ssef sphere_dif1 = (dif + dif_second) * 0.5f; - const ssef dir = load4f(direction); - const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1); - const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1); -#endif - - float mr = max(r1, r2); - float l = len3(p21_diff); - float invl = 1.0f / l; - float sp_r = mr + 0.5f * l; - - float sphere_b = dot3(dir, sphere_dif2); - float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r; - - if(sdisc < 0.0f) - return false; - - /* obtain parameters and test midpoint distance for suitable modes */ -#ifndef __KERNEL_SSE2__ - float3 tg = p21_diff * invl; -#else - const ssef tg = p21_diff * invl; -#endif - float gd = (r2 - r1) * invl; - - float dirz = dot3(dir, tg); - float difz = dot3(dif, tg); - - float a = 1.0f - (dirz*dirz*(1 + gd*gd)); - - float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1)); - - float tcentre = -halfb/a; - float zcentre = difz + (dirz * tcentre); - - if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) - return false; - if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION)) - return false; - - /* test minimum separation */ -#ifndef __KERNEL_SSE2__ - float3 cprod = cross(tg, dir); - float cprod2sq = len3_squared(cross(tg, dif)); -#else - const ssef cprod = cross(tg, dir); - float cprod2sq = len3_squared(cross_zxy(tg, dif)); -#endif - float cprodsq = len3_squared(cprod); - float distscaled = dot3(cprod, dif); - - if(cprodsq == 0) - distscaled = cprod2sq; - else - distscaled = (distscaled*distscaled)/cprodsq; - - if(distscaled > mr*mr) - return false; - - /* calculate true intersection */ -#ifndef __KERNEL_SSE2__ - float3 tdif = dif + tcentre * dir; -#else - const ssef tdif = madd(ssef(tcentre), dir, dif); -#endif - float tdifz = dot3(tdif, tg); - float tdifma = tdifz*gd + r1; - float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma)); - float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma; - float td = tb*tb - 4*a*tc; - - if(td < 0.0f) - return false; - - float rootd = 0.0f; - float correction = 0.0f; - if(flags & CURVE_KN_ACCURATE) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - } - - float t = tcentre + correction; - - if(t < isect->t) { - - if(flags & CURVE_KN_INTERSECTCORRECTION) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - t = tcentre + correction; - } - - float z = zcentre + (dirz * correction); - // bool backface = false; - - if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { - // backface = true; - correction = ((-tb + rootd)/(2*a)); - t = tcentre + correction; - z = zcentre + (dirz * correction); - } - - /* stochastic fade from minimum width */ - float adjradius = or1 + z * (or2 - or1) * invl; - adjradius = adjradius / (r1 + z * gd); - if(lcg_state && adjradius != 1.0f) { - if(lcg_step_float(lcg_state) > adjradius) - return false; - } - /* --- */ - - if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { - - if(flags & CURVE_KN_ENCLOSEFILTER) { - float enc_ratio = 1.01f; - if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { - float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); - float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; - if(a2*c2 < 0.0f) - return false; - } - } - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->t = t; - isect->u = z*invl; - isect->v = gd; - isect->prim = curveAddr; - isect->object = object; - isect->type = type; - - return true; - } - } - } - - return false; - -#ifndef __KERNEL_SSE2__ -# undef len3_squared -# undef len3 -# undef dot3 -#endif + /* define few macros to minimize code duplication for SSE */ +# ifndef __KERNEL_SSE2__ +# define len3_squared(x) len_squared(x) +# define len3(x) len(x) +# define dot3(x, y) dot(x, y) +# endif + + const bool is_curve_primitive = (type & PRIMITIVE_CURVE); + + if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { + const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); + if (time < prim_time.x || time > prim_time.y) { + return false; + } + } + + int segment = PRIMITIVE_UNPACK_SEGMENT(type); + /* curve Intersection check */ + int flags = kernel_data.curve.curveflags; + + int prim = kernel_tex_fetch(__prim_index, curveAddr); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int cnum = __float_as_int(v00.x); + int k0 = cnum + segment; + int k1 = k0 + 1; + +# ifndef __KERNEL_SSE2__ + float4 P_curve[2]; + + if (is_curve_primitive) { + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); + } + + float or1 = P_curve[0].w; + float or2 = P_curve[1].w; + float3 p1 = float4_to_float3(P_curve[0]); + float3 p2 = float4_to_float3(P_curve[1]); + + /* minimum width extension */ + float r1 = or1; + float r2 = or2; + float3 dif = P - p1; + float3 dif_second = P - p2; + if (difl != 0.0f) { + float pixelsize = min(len3(dif) * difl, extmax); + r1 = or1 < pixelsize ? pixelsize : or1; + pixelsize = min(len3(dif_second) * difl, extmax); + r2 = or2 < pixelsize ? pixelsize : or2; + } + /* --- */ + + float3 p21_diff = p2 - p1; + float3 sphere_dif1 = (dif + dif_second) * 0.5f; + float3 dir = direction; + float sphere_b_tmp = dot3(dir, sphere_dif1); + float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; +# else + ssef P_curve[2]; + + if (is_curve_primitive) { + P_curve[0] = load4f(&kg->__curve_keys.data[k0].x); + P_curve[1] = load4f(&kg->__curve_keys.data[k1].x); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4 *)&P_curve); + } + + const ssef or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); + + ssef r12 = or12; + const ssef vP = load4f(P); + const ssef dif = vP - P_curve[0]; + const ssef dif_second = vP - P_curve[1]; + if (difl != 0.0f) { + const ssef len1_sq = len3_squared_splat(dif); + const ssef len2_sq = len3_squared_splat(dif_second); + const ssef len12 = mm_sqrt(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); + const ssef pixelsize12 = min(len12 * difl, ssef(extmax)); + r12 = max(or12, pixelsize12); + } + float or1 = extract<0>(or12), or2 = extract<0>(shuffle<2>(or12)); + float r1 = extract<0>(r12), r2 = extract<0>(shuffle<2>(r12)); + + const ssef p21_diff = P_curve[1] - P_curve[0]; + const ssef sphere_dif1 = (dif + dif_second) * 0.5f; + const ssef dir = load4f(direction); + const ssef sphere_b_tmp = dot3_splat(dir, sphere_dif1); + const ssef sphere_dif2 = nmadd(sphere_b_tmp, dir, sphere_dif1); +# endif + + float mr = max(r1, r2); + float l = len3(p21_diff); + float invl = 1.0f / l; + float sp_r = mr + 0.5f * l; + + float sphere_b = dot3(dir, sphere_dif2); + float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r; + + if (sdisc < 0.0f) + return false; + + /* obtain parameters and test midpoint distance for suitable modes */ +# ifndef __KERNEL_SSE2__ + float3 tg = p21_diff * invl; +# else + const ssef tg = p21_diff * invl; +# endif + float gd = (r2 - r1) * invl; + + float dirz = dot3(dir, tg); + float difz = dot3(dif, tg); + + float a = 1.0f - (dirz * dirz * (1 + gd * gd)); + + float halfb = dot3(dir, dif) - dirz * (difz + gd * (difz * gd + r1)); + + float tcentre = -halfb / a; + float zcentre = difz + (dirz * tcentre); + + if ((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) + return false; + if ((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && + !(flags & CURVE_KN_INTERSECTCORRECTION)) + return false; + + /* test minimum separation */ +# ifndef __KERNEL_SSE2__ + float3 cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross(tg, dif)); +# else + const ssef cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross_zxy(tg, dif)); +# endif + float cprodsq = len3_squared(cprod); + float distscaled = dot3(cprod, dif); + + if (cprodsq == 0) + distscaled = cprod2sq; + else + distscaled = (distscaled * distscaled) / cprodsq; + + if (distscaled > mr * mr) + return false; + + /* calculate true intersection */ +# ifndef __KERNEL_SSE2__ + float3 tdif = dif + tcentre * dir; +# else + const ssef tdif = madd(ssef(tcentre), dir, dif); +# endif + float tdifz = dot3(tdif, tg); + float tdifma = tdifz * gd + r1; + float tb = 2 * (dot3(dir, tdif) - dirz * (tdifz + gd * tdifma)); + float tc = dot3(tdif, tdif) - tdifz * tdifz - tdifma * tdifma; + float td = tb * tb - 4 * a * tc; + + if (td < 0.0f) + return false; + + float rootd = 0.0f; + float correction = 0.0f; + if (flags & CURVE_KN_ACCURATE) { + rootd = sqrtf(td); + correction = ((-tb - rootd) / (2 * a)); + } + + float t = tcentre + correction; + + if (t < isect->t) { + + if (flags & CURVE_KN_INTERSECTCORRECTION) { + rootd = sqrtf(td); + correction = ((-tb - rootd) / (2 * a)); + t = tcentre + correction; + } + + float z = zcentre + (dirz * correction); + // bool backface = false; + + if (flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { + // backface = true; + correction = ((-tb + rootd) / (2 * a)); + t = tcentre + correction; + z = zcentre + (dirz * correction); + } + + /* stochastic fade from minimum width */ + float adjradius = or1 + z * (or2 - or1) * invl; + adjradius = adjradius / (r1 + z * gd); + if (lcg_state && adjradius != 1.0f) { + if (lcg_step_float(lcg_state) > adjradius) + return false; + } + /* --- */ + + if (t > 0.0f && t < isect->t && z >= 0 && z <= l) { + + if (flags & CURVE_KN_ENCLOSEFILTER) { + float enc_ratio = 1.01f; + if ((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { + float a2 = 1.0f - (dirz * dirz * (1 + gd * gd * enc_ratio * enc_ratio)); + float c2 = dot3(dif, dif) - difz * difz * (1 + gd * gd * enc_ratio * enc_ratio) - + r1 * r1 * enc_ratio * enc_ratio - 2 * r1 * difz * gd * enc_ratio; + if (a2 * c2 < 0.0f) + return false; + } + } + +# ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if (kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +# endif + { + /* record intersection */ + isect->t = t; + isect->u = z * invl; + isect->v = gd; + isect->prim = curveAddr; + isect->object = object; + isect->type = type; + + return true; + } + } + } + + return false; + +# ifndef __KERNEL_SSE2__ +# undef len3_squared +# undef len3 +# undef dot3 +# endif } ccl_device_inline float3 curve_refine(KernelGlobals *kg, @@ -757,154 +809,154 @@ ccl_device_inline float3 curve_refine(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { - int flag = kernel_data.curve.curveflags; - float t = isect->t; - float3 P = ray->P; - float3 D = ray->D; - - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - int prim = kernel_tex_fetch(__prim_index, isect->prim); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); - int k1 = k0 + 1; - - float3 tg; - - if(flag & CURVE_KN_INTERPOLATE) { - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P_curve[4]; - - if(sd->type & PRIMITIVE_CURVE) { - P_curve[0] = kernel_tex_fetch(__curve_keys, ka); - P_curve[1] = kernel_tex_fetch(__curve_keys, k0); - P_curve[2] = kernel_tex_fetch(__curve_keys, k1); - P_curve[3] = kernel_tex_fetch(__curve_keys, kb); - } - else { - motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); - } - - float3 p[4]; - p[0] = float4_to_float3(P_curve[0]); - p[1] = float4_to_float3(P_curve[1]); - p[2] = float4_to_float3(P_curve[2]); - p[3] = float4_to_float3(P_curve[3]); - - P = P + D*t; - -#ifdef __UV__ - sd->u = isect->u; - sd->v = 0.0f; -#endif - - tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); - - if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { - sd->Ng = normalize(-(D - tg * (dot(tg, D)))); - } - else { -#ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - sd->Ng = normalize(isect->Ng); - } - else -#endif - { - /* direction from inside to surface of curve */ - float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - sd->Ng = normalize(P - p_curr); - - /* adjustment for changing radius */ - float gd = isect->v; - - if(gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); - } - } - } - - /* todo: sometimes the normal is still so that this is detected as - * backfacing even if cull backfaces is enabled */ - - sd->N = sd->Ng; - } - else { - float4 P_curve[2]; - - if(sd->type & PRIMITIVE_CURVE) { - P_curve[0]= kernel_tex_fetch(__curve_keys, k0); - P_curve[1]= kernel_tex_fetch(__curve_keys, k1); - } - else { - motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); - } - - float l = 1.0f; - tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l); - - P = P + D*t; - - float3 dif = P - float4_to_float3(P_curve[0]); - -#ifdef __UV__ - sd->u = dot(dif,tg)/l; - sd->v = 0.0f; -#endif - - if(flag & CURVE_KN_TRUETANGENTGNORMAL) { - sd->Ng = -(D - tg * dot(tg, D)); - sd->Ng = normalize(sd->Ng); - } - else { - float gd = isect->v; - - /* direction from inside to surface of curve */ - float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f); - sd->Ng = (dif - tg * sd->u * l) / denom; - - /* adjustment for changing radius */ - if(gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg; - } - - sd->Ng = normalize(sd->Ng); - } - - sd->N = sd->Ng; - } - -#ifdef __DPDU__ - /* dPdu/dPdv */ - sd->dPdu = tg; - sd->dPdv = cross(tg, sd->Ng); -#endif - - if(isect->object != OBJECT_NONE) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; + int flag = kernel_data.curve.curveflags; + float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + + if (isect->object != OBJECT_NONE) { +# ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +# else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +# endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D * t); + D = normalize_len(D, &t); + } + + int prim = kernel_tex_fetch(__prim_index, isect->prim); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float3 tg; + + if (flag & CURVE_KN_INTERPOLATE) { + int ka = max(k0 - 1, __float_as_int(v00.x)); + int kb = min(k1 + 1, __float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + if (sd->type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + } + else { + motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); + } + + float3 p[4]; + p[0] = float4_to_float3(P_curve[0]); + p[1] = float4_to_float3(P_curve[1]); + p[2] = float4_to_float3(P_curve[2]); + p[3] = float4_to_float3(P_curve[3]); + + P = P + D * t; + +# ifdef __UV__ + sd->u = isect->u; + sd->v = 0.0f; +# endif + + tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); + + if (kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { + sd->Ng = normalize(-(D - tg * (dot(tg, D)))); + } + else { +# ifdef __EMBREE__ + if (kernel_data.bvh.scene) { + sd->Ng = normalize(isect->Ng); + } + else +# endif + { + /* direction from inside to surface of curve */ + float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); + sd->Ng = normalize(P - p_curr); + + /* adjustment for changing radius */ + float gd = isect->v; + + if (gd != 0.0f) { + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); + } + } + } + + /* todo: sometimes the normal is still so that this is detected as + * backfacing even if cull backfaces is enabled */ + + sd->N = sd->Ng; + } + else { + float4 P_curve[2]; + + if (sd->type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, k0); + P_curve[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + } + + float l = 1.0f; + tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l); + + P = P + D * t; + + float3 dif = P - float4_to_float3(P_curve[0]); + +# ifdef __UV__ + sd->u = dot(dif, tg) / l; + sd->v = 0.0f; +# endif + + if (flag & CURVE_KN_TRUETANGENTGNORMAL) { + sd->Ng = -(D - tg * dot(tg, D)); + sd->Ng = normalize(sd->Ng); + } + else { + float gd = isect->v; + + /* direction from inside to surface of curve */ + float denom = fmaxf(P_curve[0].w + sd->u * l * gd, 1e-8f); + sd->Ng = (dif - tg * sd->u * l) / denom; + + /* adjustment for changing radius */ + if (gd != 0.0f) { + sd->Ng = sd->Ng - gd * tg; + } + + sd->Ng = normalize(sd->Ng); + } + + sd->N = sd->Ng; + } + +# ifdef __DPDU__ + /* dPdu/dPdv */ + sd->dPdu = tg; + sd->dPdv = cross(tg, sd->Ng); +# endif + + if (isect->object != OBJECT_NONE) { +# ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +# else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +# endif + + P = transform_point(&tfm, P); + } + + return P; } #endif diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h index 5cc22ae2155..7380c506bf4 100644 --- a/intern/cycles/kernel/geom/geom_motion_curve.h +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -25,96 +25,116 @@ CCL_NAMESPACE_BEGIN #ifdef __HAIR__ -ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem) +ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, + int object, + uint id, + AttributeElement *elem) { - /* todo: find a better (faster) solution for this, maybe store offset per object. - * - * NOTE: currently it's not a bottleneck because in test scenes the loop below runs - * zero iterations and rendering is really slow with motion curves. For until other - * areas are speed up it's probably not so crucial to optimize this out. - */ - uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE; - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while(attr_map.x != id) { - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } - - *elem = (AttributeElement)attr_map.y; - - /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + /* todo: find a better (faster) solution for this, maybe store offset per object. + * + * NOTE: currently it's not a bottleneck because in test scenes the loop below runs + * zero iterations and rendering is really slow with motion curves. For until other + * areas are speed up it's probably not so crucial to optimize this out. + */ + uint attr_offset = object_attribute_map_offset(kg, object) + ATTR_PRIM_CURVE; + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while (attr_map.x != id) { + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + *elem = (AttributeElement)attr_map.y; + + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; } -ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2]) +ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, + int offset, + int numkeys, + int numsteps, + int step, + int k0, + int k1, + float4 keys[2]) { - if(step == numsteps) { - /* center step: regular key location */ - keys[0] = kernel_tex_fetch(__curve_keys, k0); - keys[1] = kernel_tex_fetch(__curve_keys, k1); - } - else { - /* center step is not stored in this array */ - if(step > numsteps) - step--; - - offset += step*numkeys; - - keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); - } + if (step == numsteps) { + /* center step: regular key location */ + keys[0] = kernel_tex_fetch(__curve_keys, k0); + keys[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + /* center step is not stored in this array */ + if (step > numsteps) + step--; + + offset += step * numkeys; + + keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); + } } /* return 2 curve key locations */ -ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2]) +ccl_device_inline void motion_curve_keys( + KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2]) { - /* get motion info */ - int numsteps, numkeys; - object_motion_info(kg, object, &numsteps, NULL, &numkeys); + /* get motion info */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); - /* fetch key coordinates */ - float4 next_keys[2]; + /* fetch key coordinates */ + float4 next_keys[2]; - motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys); - motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, next_keys); + motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys); + motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step + 1, k0, k1, next_keys); - /* interpolate between steps */ - keys[0] = (1.0f - t)*keys[0] + t*next_keys[0]; - keys[1] = (1.0f - t)*keys[1] + t*next_keys[1]; + /* interpolate between steps */ + keys[0] = (1.0f - t) * keys[0] + t * next_keys[0]; + keys[1] = (1.0f - t) * keys[1] + t * next_keys[1]; } -ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4]) +ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, + int offset, + int numkeys, + int numsteps, + int step, + int k0, + int k1, + int k2, + int k3, + float4 keys[4]) { - if(step == numsteps) { - /* center step: regular key location */ - keys[0] = kernel_tex_fetch(__curve_keys, k0); - keys[1] = kernel_tex_fetch(__curve_keys, k1); - keys[2] = kernel_tex_fetch(__curve_keys, k2); - keys[3] = kernel_tex_fetch(__curve_keys, k3); - } - else { - /* center step is not stored in this array */ - if(step > numsteps) - step--; - - offset += step*numkeys; - - keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); - keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); - keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2); - keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3); - } + if (step == numsteps) { + /* center step: regular key location */ + keys[0] = kernel_tex_fetch(__curve_keys, k0); + keys[1] = kernel_tex_fetch(__curve_keys, k1); + keys[2] = kernel_tex_fetch(__curve_keys, k2); + keys[3] = kernel_tex_fetch(__curve_keys, k3); + } + else { + /* center step is not stored in this array */ + if (step > numsteps) + step--; + + offset += step * numkeys; + + keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); + keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2); + keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3); + } } /* return 2 curve key locations */ @@ -122,37 +142,41 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, - int k0, int k1, int k2, int k3, + int k0, + int k1, + int k2, + int k3, float4 keys[4]) { - /* get motion info */ - int numsteps, numkeys; - object_motion_info(kg, object, &numsteps, NULL, &numkeys); - - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; - - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* fetch key coordinates */ - float4 next_keys[4]; - - motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys); - motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, k2, k3, next_keys); - - /* interpolate between steps */ - keys[0] = (1.0f - t)*keys[0] + t*next_keys[0]; - keys[1] = (1.0f - t)*keys[1] + t*next_keys[1]; - keys[2] = (1.0f - t)*keys[2] + t*next_keys[2]; - keys[3] = (1.0f - t)*keys[3] + t*next_keys[3]; + /* get motion info */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch key coordinates */ + float4 next_keys[4]; + + motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys); + motion_cardinal_curve_keys_for_step( + kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys); + + /* interpolate between steps */ + keys[0] = (1.0f - t) * keys[0] + t * next_keys[0]; + keys[1] = (1.0f - t) * keys[1] + t * next_keys[1]; + keys[2] = (1.0f - t) * keys[2] + t * next_keys[2]; + keys[3] = (1.0f - t) * keys[3] + t * next_keys[3]; } -#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) +# if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) /* Similar to above, but returns keys as pair of two AVX registers with each * holding two float4. */ @@ -160,56 +184,44 @@ ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg, int object, int prim, float time, - int k0, int k1, - int k2, int k3, + int k0, + int k1, + int k2, + int k3, avxf *out_keys_0_1, avxf *out_keys_2_3) { - /* Get motion info. */ - int numsteps, numkeys; - object_motion_info(kg, object, &numsteps, NULL, &numkeys); - - /* Figure out which steps we need to fetch and their interpolation factor. */ - int maxstep = numsteps * 2; - int step = min((int)(time*maxstep), maxstep - 1); - float t = time*maxstep - step; - - /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_curve_motion(kg, - object, - ATTR_STD_MOTION_VERTEX_POSITION, - &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* Fetch key coordinates. */ - float4 next_keys[4]; - float4 keys[4]; - motion_cardinal_curve_keys_for_step(kg, - offset, - numkeys, - numsteps, - step, - k0, k1, k2, k3, - keys); - motion_cardinal_curve_keys_for_step(kg, - offset, - numkeys, - numsteps, - step + 1, - k0, k1, k2, k3, - next_keys); - - const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128); - const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128); - const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128); - const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128); - - /* Interpolate between steps. */ - *out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1; - *out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3; + /* Get motion info. */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); + + /* Figure out which steps we need to fetch and their interpolation factor. */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* Find attribute. */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* Fetch key coordinates. */ + float4 next_keys[4]; + float4 keys[4]; + motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys); + motion_cardinal_curve_keys_for_step( + kg, offset, numkeys, numsteps, step + 1, k0, k1, k2, k3, next_keys); + + const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128); + const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128); + const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128); + const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128); + + /* Interpolate between steps. */ + *out_keys_0_1 = (1.0f - t) * keys_0_1 + t * next_keys_0_1; + *out_keys_2_3 = (1.0f - t) * keys_2_3 + t * next_keys_2_3; } -#endif +# endif #endif diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 64f6d027b99..53d6b92dd7e 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -29,127 +29,145 @@ CCL_NAMESPACE_BEGIN /* Time interpolation of vertex positions and normals */ -ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem) +ccl_device_inline int find_attribute_motion(KernelGlobals *kg, + int object, + uint id, + AttributeElement *elem) { - /* todo: find a better (faster) solution for this, maybe store offset per object */ - uint attr_offset = object_attribute_map_offset(kg, object); - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + /* todo: find a better (faster) solution for this, maybe store offset per object */ + uint attr_offset = object_attribute_map_offset(kg, object); + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - while(attr_map.x != id) { - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } + while (attr_map.x != id) { + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } - *elem = (AttributeElement)attr_map.y; + *elem = (AttributeElement)attr_map.y; - /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; } -ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3]) +ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, + uint4 tri_vindex, + int offset, + int numverts, + int numsteps, + int step, + float3 verts[3]) { - if(step == numsteps) { - /* center step: regular vertex location */ - verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - } - else { - /* center step not store in this array */ - if(step > numsteps) - step--; - - offset += step*numverts; - - verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); - } + if (step == numsteps) { + /* center step: regular vertex location */ + verts[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + verts[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + verts[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + } + else { + /* center step not store in this array */ + if (step > numsteps) + step--; + + offset += step * numverts; + + verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); + verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); + verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + } } -ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, uint4 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3]) +ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, + uint4 tri_vindex, + int offset, + int numverts, + int numsteps, + int step, + float3 normals[3]) { - if(step == numsteps) { - /* center step: regular vertex location */ - normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); - } - else { - /* center step is not stored in this array */ - if(step > numsteps) - step--; - - offset += step*numverts; - - normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); - } + if (step == numsteps) { + /* center step: regular vertex location */ + normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); + normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); + normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + } + else { + /* center step is not stored in this array */ + if (step > numsteps) + step--; + + offset += step * numverts; + + normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); + normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); + normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + } } -ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, int prim, float time, float3 verts[3]) +ccl_device_inline void motion_triangle_vertices( + KernelGlobals *kg, int object, int prim, float time, float3 verts[3]) { - /* get motion info */ - int numsteps, numverts; - object_motion_info(kg, object, &numsteps, &numverts, NULL); - - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; - - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* fetch vertex coordinates */ - float3 next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); - - /* interpolate between steps */ - verts[0] = (1.0f - t)*verts[0] + t*next_verts[0]; - verts[1] = (1.0f - t)*verts[1] + t*next_verts[1]; - verts[2] = (1.0f - t)*verts[2] + t*next_verts[2]; + /* get motion info */ + int numsteps, numverts; + object_motion_info(kg, object, &numsteps, &numverts, NULL); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch vertex coordinates */ + float3 next_verts[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); + + /* interpolate between steps */ + verts[0] = (1.0f - t) * verts[0] + t * next_verts[0]; + verts[1] = (1.0f - t) * verts[1] + t * next_verts[1]; + verts[2] = (1.0f - t) * verts[2] + t * next_verts[2]; } -ccl_device_inline float3 motion_triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time) +ccl_device_inline float3 motion_triangle_smooth_normal( + KernelGlobals *kg, float3 Ng, int object, int prim, float u, float v, float time) { - /* get motion info */ - int numsteps, numverts; - object_motion_info(kg, object, &numsteps, &numverts, NULL); - - /* figure out which steps we need to fetch and their interpolation factor */ - int maxstep = numsteps*2; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; - - /* find attribute */ - AttributeElement elem; - int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - - /* fetch normals */ - float3 normals[3], next_normals[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals); - - /* interpolate between steps */ - normals[0] = (1.0f - t)*normals[0] + t*next_normals[0]; - normals[1] = (1.0f - t)*normals[1] + t*next_normals[1]; - normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; - - /* interpolate between vertices */ - float w = 1.0f - u - v; - float3 N = safe_normalize(u*normals[0] + v*normals[1] + w*normals[2]); - - return is_zero(N)? Ng: N; + /* get motion info */ + int numsteps, numverts; + object_motion_info(kg, object, &numsteps, &numverts, NULL); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps * 2; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch normals */ + float3 normals[3], next_normals[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + + motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); + motion_triangle_normals_for_step( + kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals); + + /* interpolate between steps */ + normals[0] = (1.0f - t) * normals[0] + t * next_normals[0]; + normals[1] = (1.0f - t) * normals[1] + t * next_normals[1]; + normals[2] = (1.0f - t) * normals[2] + t * next_normals[2]; + + /* interpolate between vertices */ + float w = 1.0f - u - v; + float3 N = safe_normalize(u * normals[0] + v * normals[1] + w * normals[2]); + + return is_zero(N) ? Ng : N; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h index ec7bfad7349..49d4829af38 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_intersect.h @@ -32,64 +32,57 @@ CCL_NAMESPACE_BEGIN * a closer distance. */ -ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, - ShaderData *sd, - const Intersection *isect, - const Ray *ray, - float3 verts[3]) +ccl_device_inline float3 motion_triangle_refine( + KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3]) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; #ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { - if(UNLIKELY(t == 0.0f)) { - return P; - } + if (isect->object != OBJECT_NONE) { + if (UNLIKELY(t == 0.0f)) { + return P; + } # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); # endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D * t); + D = normalize_len(D, &t); + } - P = P + D*t; + P = P + D * t; - /* Compute refined intersection distance. */ - const float3 e1 = verts[0] - verts[2]; - const float3 e2 = verts[1] - verts[2]; - const float3 s1 = cross(D, e2); + /* Compute refined intersection distance. */ + const float3 e1 = verts[0] - verts[2]; + const float3 e2 = verts[1] - verts[2]; + const float3 s1 = cross(D, e2); - const float invdivisor = 1.0f/dot(s1, e1); - const float3 d = P - verts[2]; - const float3 s2 = cross(d, e1); - float rt = dot(e2, s2)*invdivisor; + const float invdivisor = 1.0f / dot(s1, e1); + const float3 d = P - verts[2]; + const float3 s2 = cross(d, e1); + float rt = dot(e2, s2) * invdivisor; - /* Compute refined position. */ - P = P + D*rt; + /* Compute refined position. */ + P = P + D * rt; - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); # endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; #else - return P + D*t; + return P + D * t; #endif } @@ -103,116 +96,112 @@ ccl_device_noinline # else ccl_device_inline # endif -float3 motion_triangle_refine_local(KernelGlobals *kg, - ShaderData *sd, - const Intersection *isect, - const Ray *ray, - float3 verts[3]) + float3 + motion_triangle_refine_local(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray, + float3 verts[3]) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; # ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); # endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } - P = P + D*t; + P = P + D * t; - /* compute refined intersection distance */ - const float3 e1 = verts[0] - verts[2]; - const float3 e2 = verts[1] - verts[2]; - const float3 s1 = cross(D, e2); + /* compute refined intersection distance */ + const float3 e1 = verts[0] - verts[2]; + const float3 e2 = verts[1] - verts[2]; + const float3 s1 = cross(D, e2); - const float invdivisor = 1.0f/dot(s1, e1); - const float3 d = P - verts[2]; - const float3 s2 = cross(d, e1); - float rt = dot(e2, s2)*invdivisor; + const float invdivisor = 1.0f / dot(s1, e1); + const float3 d = P - verts[2]; + const float3 s2 = cross(d, e1); + float rt = dot(e2, s2) * invdivisor; - P = P + D*rt; + P = P + D * rt; - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; # else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); # endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; # else /* __INTERSECTION_REFINE__ */ - return P + D*t; -# endif /* __INTERSECTION_REFINE__ */ + return P + D * t; +# endif /* __INTERSECTION_REFINE__ */ } -#endif /* __BVH_LOCAL__ */ - +#endif /* __BVH_LOCAL__ */ /* Ray intersection. We simply compute the vertex positions at the given ray * time and do a ray intersection with the resulting triangle. */ -ccl_device_inline bool motion_triangle_intersect( - KernelGlobals *kg, - Intersection *isect, - float3 P, - float3 dir, - float time, - uint visibility, - int object, - int prim_addr) +ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, + Intersection *isect, + float3 P, + float3 dir, + float time, + uint visibility, + int object, + int prim_addr) { - /* Primitive index for vertex location lookup. */ - int prim = kernel_tex_fetch(__prim_index, prim_addr); - int fobject = (object == OBJECT_NONE) - ? kernel_tex_fetch(__prim_object, prim_addr) - : object; - /* Get vertex locations for intersection. */ - float3 verts[3]; - motion_triangle_vertices(kg, fobject, prim, time, verts); - /* Ray-triangle intersection, unoptimized. */ - float t, u, v; - if(ray_triangle_intersect(P, - dir, - isect->t, + /* Primitive index for vertex location lookup. */ + int prim = kernel_tex_fetch(__prim_index, prim_addr); + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, prim_addr) : object; + /* Get vertex locations for intersection. */ + float3 verts[3]; + motion_triangle_vertices(kg, fobject, prim, time, verts); + /* Ray-triangle intersection, unoptimized. */ + float t, u, v; + if (ray_triangle_intersect(P, + dir, + isect->t, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - (ssef*)verts, + (ssef *)verts, #else - verts[0], verts[1], verts[2], + verts[0], + verts[1], + verts[2], #endif - &u, &v, &t)) - { + &u, + &v, + &t)) { #ifdef __VISIBILITY_FLAG__ - /* Visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags. - */ - if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) + /* Visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags. + */ + if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif - { - isect->t = t; - isect->u = u; - isect->v = v; - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_MOTION_TRIANGLE; - return true; - } - } - return false; + { + isect->t = t; + isect->u = u; + isect->v = v; + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_MOTION_TRIANGLE; + return true; + } + } + return false; } /* Special ray intersection routines for local intersections. In that case we @@ -221,101 +210,102 @@ ccl_device_inline bool motion_triangle_intersect( * Returns whether traversal should be stopped. */ #ifdef __BVH_LOCAL__ -ccl_device_inline bool motion_triangle_intersect_local( - KernelGlobals *kg, - LocalIntersection *local_isect, - float3 P, - float3 dir, - float time, - int object, - int local_object, - int prim_addr, - float tmax, - uint *lcg_state, - int max_hits) +ccl_device_inline bool motion_triangle_intersect_local(KernelGlobals *kg, + LocalIntersection *local_isect, + float3 P, + float3 dir, + float time, + int object, + int local_object, + int prim_addr, + float tmax, + uint *lcg_state, + int max_hits) { - /* Only intersect with matching object, for instanced objects we - * already know we are only intersecting the right object. */ - if(object == OBJECT_NONE) { - if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) { - return false; - } - } - - /* Primitive index for vertex location lookup. */ - int prim = kernel_tex_fetch(__prim_index, prim_addr); - /* Get vertex locations for intersection. */ - float3 verts[3]; - motion_triangle_vertices(kg, local_object, prim, time, verts); - /* Ray-triangle intersection, unoptimized. */ - float t, u, v; - if(!ray_triangle_intersect(P, - dir, - tmax, -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - (ssef*)verts, -#else - verts[0], verts[1], verts[2], -#endif - &u, &v, &t)) - { - return false; - } - - /* If no actual hit information is requested, just return here. */ - if(max_hits == 0) { - return true; - } - - int hit; - if(lcg_state) { - /* Record up to max_hits intersections. */ - for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { - if(local_isect->hits[i].t == t) { - return false; - } - } - - local_isect->num_hits++; - - if(local_isect->num_hits <= max_hits) { - hit = local_isect->num_hits - 1; - } - else { - /* Reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it. - */ - hit = lcg_step_uint(lcg_state) % local_isect->num_hits; - - if(hit >= max_hits) - return false; - } - } - else { - /* Record closest intersection only. */ - if(local_isect->num_hits && t > local_isect->hits[0].t) { - return false; - } - - hit = 0; - local_isect->num_hits = 1; - } - - /* Record intersection. */ - Intersection *isect = &local_isect->hits[hit]; - isect->t = t; - isect->u = u; - isect->v = v; - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_MOTION_TRIANGLE; - - /* Record geometric normal. */ - local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], - verts[2] - verts[0])); - - return false; + /* Only intersect with matching object, for instanced objects we + * already know we are only intersecting the right object. */ + if (object == OBJECT_NONE) { + if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + return false; + } + } + + /* Primitive index for vertex location lookup. */ + int prim = kernel_tex_fetch(__prim_index, prim_addr); + /* Get vertex locations for intersection. */ + float3 verts[3]; + motion_triangle_vertices(kg, local_object, prim, time, verts); + /* Ray-triangle intersection, unoptimized. */ + float t, u, v; + if (!ray_triangle_intersect(P, + dir, + tmax, +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + (ssef *)verts, +# else + verts[0], + verts[1], + verts[2], +# endif + &u, + &v, + &t)) { + return false; + } + + /* If no actual hit information is requested, just return here. */ + if (max_hits == 0) { + return true; + } + + int hit; + if (lcg_state) { + /* Record up to max_hits intersections. */ + for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (local_isect->hits[i].t == t) { + return false; + } + } + + local_isect->num_hits++; + + if (local_isect->num_hits <= max_hits) { + hit = local_isect->num_hits - 1; + } + else { + /* Reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it. + */ + hit = lcg_step_uint(lcg_state) % local_isect->num_hits; + + if (hit >= max_hits) + return false; + } + } + else { + /* Record closest intersection only. */ + if (local_isect->num_hits && t > local_isect->hits[0].t) { + return false; + } + + hit = 0; + local_isect->num_hits = 1; + } + + /* Record intersection. */ + Intersection *isect = &local_isect->hits[hit]; + isect->t = t; + isect->u = u; + isect->v = v; + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_MOTION_TRIANGLE; + + /* Record geometric normal. */ + local_isect->Ng[hit] = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + + return false; } -#endif /* __BVH_LOCAL__ */ +#endif /* __BVH_LOCAL__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h index e91a4be96ba..5333e82b346 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle_shader.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle_shader.h @@ -32,91 +32,80 @@ CCL_NAMESPACE_BEGIN * normals */ /* return 3 triangle vertex normals */ -ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, - ShaderData *sd, const - Intersection *isect, - const Ray *ray, - bool is_local) +ccl_device_noinline void motion_triangle_shader_setup( + KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool is_local) { - /* Get shader. */ - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); - /* Get motion info. */ - /* TODO(sergey): This logic is really similar to motion_triangle_vertices(), - * can we de-duplicate something here? - */ - int numsteps, numverts; - object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); - /* Figure out which steps we need to fetch and their interpolation factor. */ - int maxstep = numsteps*2; - int step = min((int)(sd->time*maxstep), maxstep-1); - float t = sd->time*maxstep - step; - /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_motion(kg, sd->object, - ATTR_STD_MOTION_VERTEX_POSITION, - &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - /* Fetch vertex coordinates. */ - float3 verts[3], next_verts[3]; - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); - motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); - /* Interpolate between steps. */ - verts[0] = (1.0f - t)*verts[0] + t*next_verts[0]; - verts[1] = (1.0f - t)*verts[1] + t*next_verts[1]; - verts[2] = (1.0f - t)*verts[2] + t*next_verts[2]; - /* Compute refined position. */ + /* Get shader. */ + sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + /* Get motion info. */ + /* TODO(sergey): This logic is really similar to motion_triangle_vertices(), + * can we de-duplicate something here? + */ + int numsteps, numverts; + object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); + /* Figure out which steps we need to fetch and their interpolation factor. */ + int maxstep = numsteps * 2; + int step = min((int)(sd->time * maxstep), maxstep - 1); + float t = sd->time * maxstep - step; + /* Find attribute. */ + AttributeElement elem; + int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + /* Fetch vertex coordinates. */ + float3 verts[3], next_verts[3]; + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step + 1, next_verts); + /* Interpolate between steps. */ + verts[0] = (1.0f - t) * verts[0] + t * next_verts[0]; + verts[1] = (1.0f - t) * verts[1] + t * next_verts[1]; + verts[2] = (1.0f - t) * verts[2] + t * next_verts[2]; + /* Compute refined position. */ #ifdef __BVH_LOCAL__ - if(is_local) { - sd->P = motion_triangle_refine_local(kg, - sd, - isect, - ray, - verts); - } - else -#endif /* __BVH_LOCAL__*/ - { - sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); - } - /* Compute face normal. */ - float3 Ng; - if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); - } - else { - Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); - } - sd->Ng = Ng; - sd->N = Ng; - /* Compute derivatives of P w.r.t. uv. */ + if (is_local) { + sd->P = motion_triangle_refine_local(kg, sd, isect, ray, verts); + } + else +#endif /* __BVH_LOCAL__*/ + { + sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); + } + /* Compute face normal. */ + float3 Ng; + if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + Ng = normalize(cross(verts[2] - verts[0], verts[1] - verts[0])); + } + else { + Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + } + sd->Ng = Ng; + sd->N = Ng; + /* Compute derivatives of P w.r.t. uv. */ #ifdef __DPDU__ - sd->dPdu = (verts[0] - verts[2]); - sd->dPdv = (verts[1] - verts[2]); + sd->dPdu = (verts[0] - verts[2]); + sd->dPdv = (verts[1] - verts[2]); #endif - /* Compute smooth normal. */ - if(sd->shader & SHADER_SMOOTH_NORMAL) { - /* Find attribute. */ - AttributeElement elem; - int offset = find_attribute_motion(kg, - sd->object, - ATTR_STD_MOTION_VERTEX_NORMAL, - &elem); - kernel_assert(offset != ATTR_STD_NOT_FOUND); - /* Fetch vertex coordinates. */ - float3 normals[3], next_normals[3]; - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); - motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals); - /* Interpolate between steps. */ - normals[0] = (1.0f - t)*normals[0] + t*next_normals[0]; - normals[1] = (1.0f - t)*normals[1] + t*next_normals[1]; - normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; - /* Interpolate between vertices. */ - float u = sd->u; - float v = sd->v; - float w = 1.0f - u - v; - sd->N = (u*normals[0] + v*normals[1] + w*normals[2]); - } + /* Compute smooth normal. */ + if (sd->shader & SHADER_SMOOTH_NORMAL) { + /* Find attribute. */ + AttributeElement elem; + int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + /* Fetch vertex coordinates. */ + float3 normals[3], next_normals[3]; + motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); + motion_triangle_normals_for_step( + kg, tri_vindex, offset, numverts, numsteps, step + 1, next_normals); + /* Interpolate between steps. */ + normals[0] = (1.0f - t) * normals[0] + t * next_normals[0]; + normals[1] = (1.0f - t) * normals[1] + t * next_normals[1]; + normals[2] = (1.0f - t) * normals[2] + t * next_normals[2]; + /* Interpolate between vertices. */ + float u = sd->u; + float v = sd->v; + float w = 1.0f - u - v; + sd->N = (u * normals[0] + v * normals[1] + w * normals[2]); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 669c932d720..2792fd64c61 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -27,131 +27,143 @@ CCL_NAMESPACE_BEGIN /* Object attributes, for now a fixed size and contents */ enum ObjectTransform { - OBJECT_TRANSFORM = 0, - OBJECT_INVERSE_TRANSFORM = 1, + OBJECT_TRANSFORM = 0, + OBJECT_INVERSE_TRANSFORM = 1, }; -enum ObjectVectorTransform { - OBJECT_PASS_MOTION_PRE = 0, - OBJECT_PASS_MOTION_POST = 1 -}; +enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST = 1 }; /* Object to world space transformation */ -ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type) +ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, + int object, + enum ObjectTransform type) { - if(type == OBJECT_INVERSE_TRANSFORM) { - return kernel_tex_fetch(__objects, object).itfm; - } - else { - return kernel_tex_fetch(__objects, object).tfm; - } + if (type == OBJECT_INVERSE_TRANSFORM) { + return kernel_tex_fetch(__objects, object).itfm; + } + else { + return kernel_tex_fetch(__objects, object).tfm; + } } /* Lamp to world space transformation */ ccl_device_inline Transform lamp_fetch_transform(KernelGlobals *kg, int lamp, bool inverse) { - if(inverse) { - return kernel_tex_fetch(__lights, lamp).itfm; - } - else { - return kernel_tex_fetch(__lights, lamp).tfm; - } + if (inverse) { + return kernel_tex_fetch(__lights, lamp).itfm; + } + else { + return kernel_tex_fetch(__lights, lamp).tfm; + } } /* Object to world space transformation for motion vectors */ -ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type) +ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals *kg, + int object, + enum ObjectVectorTransform type) { - int offset = object*OBJECT_MOTION_PASS_SIZE + (int)type; - return kernel_tex_fetch(__object_motion_pass, offset); + int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type; + return kernel_tex_fetch(__object_motion_pass, offset); } /* Motion blurred object transformations */ #ifdef __OBJECT_MOTION__ -ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time) -{ - const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset; - const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset); - const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1; - - Transform tfm; -#ifdef __EMBREE__ - if(kernel_data.bvh.scene) { - transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time); - } - else -#endif - transform_motion_array_interpolate(&tfm, motion, num_steps, time); +ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, + int object, + float time) +{ + const uint motion_offset = kernel_tex_fetch(__objects, object).motion_offset; + const ccl_global DecomposedTransform *motion = &kernel_tex_fetch(__object_motion, motion_offset); + const uint num_steps = kernel_tex_fetch(__objects, object).numsteps * 2 + 1; + + Transform tfm; +# ifdef __EMBREE__ + if (kernel_data.bvh.scene) { + transform_motion_array_interpolate_straight(&tfm, motion, num_steps, time); + } + else +# endif + transform_motion_array_interpolate(&tfm, motion, num_steps, time); - return tfm; + return tfm; } -ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, int object, float time, Transform *itfm) +ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg, + int object, + float time, + Transform *itfm) { - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_MOTION) { - /* if we do motion blur */ - Transform tfm = object_fetch_transform_motion(kg, object, time); + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_MOTION) { + /* if we do motion blur */ + Transform tfm = object_fetch_transform_motion(kg, object, time); - if(itfm) - *itfm = transform_quick_inverse(tfm); + if (itfm) + *itfm = transform_quick_inverse(tfm); - return tfm; - } - else { - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - if(itfm) - *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + return tfm; + } + else { + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + if (itfm) + *itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - return tfm; - } + return tfm; + } } #endif /* Transform position from object to world space */ -ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) +ccl_device_inline void object_position_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point_auto(&sd->ob_tfm, *P); + *P = transform_point_auto(&sd->ob_tfm, *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - *P = transform_point(&tfm, *P); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + *P = transform_point(&tfm, *P); #endif } /* Transform position from world to object space */ -ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) +ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *P) { #ifdef __OBJECT_MOTION__ - *P = transform_point_auto(&sd->ob_itfm, *P); + *P = transform_point_auto(&sd->ob_itfm, *P); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - *P = transform_point(&tfm, *P); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + *P = transform_point(&tfm, *P); #endif } /* Transform normal from world to object space */ -ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) +ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *N) { #ifdef __OBJECT_MOTION__ - if((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) { - *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N)); - } + if ((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) { + *N = normalize(transform_direction_transposed_auto(&sd->ob_tfm, *N)); + } #else - if(sd->object != OBJECT_NONE) { - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - *N = normalize(transform_direction_transposed(&tfm, *N)); - } - else if(sd->type == PRIMITIVE_LAMP) { - Transform tfm = lamp_fetch_transform(kg, sd->lamp, false); - *N = normalize(transform_direction_transposed(&tfm, *N)); - } + if (sd->object != OBJECT_NONE) { + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + *N = normalize(transform_direction_transposed(&tfm, *N)); + } + else if (sd->type == PRIMITIVE_LAMP) { + Transform tfm = lamp_fetch_transform(kg, sd->lamp, false); + *N = normalize(transform_direction_transposed(&tfm, *N)); + } #endif } @@ -160,10 +172,10 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ - *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N)); + *N = normalize(transform_direction_transposed_auto(&sd->ob_itfm, *N)); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - *N = normalize(transform_direction_transposed(&tfm, *N)); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + *N = normalize(transform_direction_transposed(&tfm, *N)); #endif } @@ -172,22 +184,24 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderDa ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction_auto(&sd->ob_tfm, *D); + *D = transform_direction_auto(&sd->ob_tfm, *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - *D = transform_direction(&tfm, *D); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + *D = transform_direction(&tfm, *D); #endif } /* Transform direction vector from world to object space */ -ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) +ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, + const ShaderData *sd, + float3 *D) { #ifdef __OBJECT_MOTION__ - *D = transform_direction_auto(&sd->ob_itfm, *D); + *D = transform_direction_auto(&sd->ob_itfm, *D); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - *D = transform_direction(&tfm, *D); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + *D = transform_direction(&tfm, *D); #endif } @@ -195,14 +209,14 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const Sha ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd) { - if(sd->object == OBJECT_NONE) - return make_float3(0.0f, 0.0f, 0.0f); + if (sd->object == OBJECT_NONE) + return make_float3(0.0f, 0.0f, 0.0f); #ifdef __OBJECT_MOTION__ - return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); + return make_float3(sd->ob_tfm.x.w, sd->ob_tfm.y.w, sd->ob_tfm.z.w); #else - Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); + Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + return make_float3(tfm.x.w, tfm.y.w, tfm.z.w); #endif } @@ -210,218 +224,211 @@ ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd ccl_device_inline float object_surface_area(KernelGlobals *kg, int object) { - return kernel_tex_fetch(__objects, object).surface_area; + return kernel_tex_fetch(__objects, object).surface_area; } /* Pass ID number of object */ ccl_device_inline float object_pass_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0.0f; + if (object == OBJECT_NONE) + return 0.0f; - return kernel_tex_fetch(__objects, object).pass_id; + return kernel_tex_fetch(__objects, object).pass_id; } /* Per lamp random number for shader variation */ ccl_device_inline float lamp_random_number(KernelGlobals *kg, int lamp) { - if(lamp == LAMP_NONE) - return 0.0f; + if (lamp == LAMP_NONE) + return 0.0f; - return kernel_tex_fetch(__lights, lamp).random; + return kernel_tex_fetch(__lights, lamp).random; } /* Per object random number for shader variation */ ccl_device_inline float object_random_number(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0.0f; + if (object == OBJECT_NONE) + return 0.0f; - return kernel_tex_fetch(__objects, object).random_number; + return kernel_tex_fetch(__objects, object).random_number; } /* Particle ID from which this object was generated */ ccl_device_inline int object_particle_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0; + if (object == OBJECT_NONE) + return 0; - return kernel_tex_fetch(__objects, object).particle_index; + return kernel_tex_fetch(__objects, object).particle_index; } /* Generated texture coordinate on surface from where object was instanced */ ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return make_float3(0.0f, 0.0f, 0.0f); + if (object == OBJECT_NONE) + return make_float3(0.0f, 0.0f, 0.0f); - const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); - return make_float3(kobject->dupli_generated[0], - kobject->dupli_generated[1], - kobject->dupli_generated[2]); + const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); + return make_float3( + kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]); } /* UV texture coordinate on surface from where object was instanced */ ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return make_float3(0.0f, 0.0f, 0.0f); + if (object == OBJECT_NONE) + return make_float3(0.0f, 0.0f, 0.0f); - const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); - return make_float3(kobject->dupli_uv[0], - kobject->dupli_uv[1], - 0.0f); + const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); + return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f); } /* Information about mesh for motion blurred triangles and curves */ -ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys) +ccl_device_inline void object_motion_info( + KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys) { - if(numkeys) { - *numkeys = kernel_tex_fetch(__objects, object).numkeys; - } + if (numkeys) { + *numkeys = kernel_tex_fetch(__objects, object).numkeys; + } - if(numsteps) - *numsteps = kernel_tex_fetch(__objects, object).numsteps; - if(numverts) - *numverts = kernel_tex_fetch(__objects, object).numverts; + if (numsteps) + *numsteps = kernel_tex_fetch(__objects, object).numsteps; + if (numverts) + *numverts = kernel_tex_fetch(__objects, object).numverts; } /* Offset to an objects patch map */ ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0; + if (object == OBJECT_NONE) + return 0; - return kernel_tex_fetch(__objects, object).patch_map_offset; + return kernel_tex_fetch(__objects, object).patch_map_offset; } /* Pass ID for shader */ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) { - return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; + return kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).pass_id; } /* Cryptomatte ID */ ccl_device_inline float object_cryptomatte_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0.0f; + if (object == OBJECT_NONE) + return 0.0f; - return kernel_tex_fetch(__objects, object).cryptomatte_object; + return kernel_tex_fetch(__objects, object).cryptomatte_object; } ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals *kg, int object) { - if(object == OBJECT_NONE) - return 0; + if (object == OBJECT_NONE) + return 0; - return kernel_tex_fetch(__objects, object).cryptomatte_asset; + return kernel_tex_fetch(__objects, object).cryptomatte_asset; } /* Particle data from which object was instanced */ ccl_device_inline uint particle_index(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).index; + return kernel_tex_fetch(__particles, particle).index; } ccl_device float particle_age(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).age; + return kernel_tex_fetch(__particles, particle).age; } ccl_device float particle_lifetime(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).lifetime; + return kernel_tex_fetch(__particles, particle).lifetime; } ccl_device float particle_size(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).size; + return kernel_tex_fetch(__particles, particle).size; } ccl_device float4 particle_rotation(KernelGlobals *kg, int particle) { - return kernel_tex_fetch(__particles, particle).rotation; + return kernel_tex_fetch(__particles, particle).rotation; } ccl_device float3 particle_location(KernelGlobals *kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).location); + return float4_to_float3(kernel_tex_fetch(__particles, particle).location); } ccl_device float3 particle_velocity(KernelGlobals *kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity); + return float4_to_float3(kernel_tex_fetch(__particles, particle).velocity); } ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle) { - return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity); + return float4_to_float3(kernel_tex_fetch(__particles, particle).angular_velocity); } /* Object intersection in BVH */ ccl_device_inline float3 bvh_clamp_direction(float3 dir) { - /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */ + /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */ #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) - const ssef oopes(8.271806E-25f,8.271806E-25f,8.271806E-25f,0.0f); - const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes); - const ssef signdir = signmsk(dir.m128) | oopes; + const ssef oopes(8.271806E-25f, 8.271806E-25f, 8.271806E-25f, 0.0f); + const ssef mask = _mm_cmpgt_ps(fabs(dir), oopes); + const ssef signdir = signmsk(dir.m128) | oopes; # ifndef __KERNEL_AVX__ - ssef res = mask & ssef(dir); - res = _mm_or_ps(res,_mm_andnot_ps(mask, signdir)); + ssef res = mask & ssef(dir); + res = _mm_or_ps(res, _mm_andnot_ps(mask, signdir)); # else - ssef res = _mm_blendv_ps(signdir, dir, mask); + ssef res = _mm_blendv_ps(signdir, dir, mask); # endif - return float3(res); + return float3(res); #else /* __KERNEL_SSE__ && __KERNEL_SSE2__ */ - const float ooeps = 8.271806E-25f; - return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x), - (fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y), - (fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z)); -#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */ + const float ooeps = 8.271806E-25f; + return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x), + (fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y), + (fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z)); +#endif /* __KERNEL_SSE__ && __KERNEL_SSE2__ */ } ccl_device_inline float3 bvh_inverse_direction(float3 dir) { - return rcp(dir); + return rcp(dir); } /* Transform ray into object space to enter static object in BVH */ -ccl_device_inline float bvh_instance_push(KernelGlobals *kg, - int object, - const Ray *ray, - float3 *P, - float3 *dir, - float3 *idir, - float t) +ccl_device_inline float bvh_instance_push( + KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *P = transform_point(&tfm, ray->P); + *P = transform_point(&tfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(t != FLT_MAX) { - t *= len; - } + if (t != FLT_MAX) { + t *= len; + } - return t; + return t; } #ifdef __QBVH__ @@ -440,85 +447,85 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg, float *t, float *t1) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *P = transform_point(&tfm, ray->P); + *P = transform_point(&tfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(*t != FLT_MAX) - *t *= len; + if (*t != FLT_MAX) + *t *= len; - if(*t1 != -FLT_MAX) - *t1 *= len; + if (*t1 != -FLT_MAX) + *t1 *= len; } #endif /* Transorm ray to exit static object in BVH */ -ccl_device_inline float bvh_instance_pop(KernelGlobals *kg, - int object, - const Ray *ray, - float3 *P, - float3 *dir, - float3 *idir, - float t) +ccl_device_inline float bvh_instance_pop( + KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float t) { - if(t != FLT_MAX) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - t /= len(transform_direction(&tfm, ray->D)); - } + if (t != FLT_MAX) { + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + t /= len(transform_direction(&tfm, ray->D)); + } - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); - return t; + return t; } /* Same as above, but returns scale factor to apply to multiple intersection distances */ -ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac) +ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float *t_fac) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - *t_fac = 1.0f / len(transform_direction(&tfm, ray->D)); + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + *t_fac = 1.0f / len(transform_direction(&tfm, ray->D)); - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); } - #ifdef __OBJECT_MOTION__ /* Transform ray into object space to enter motion blurred object in BVH */ ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg, - int object, - const Ray *ray, - float3 *P, - float3 *dir, - float3 *idir, - float t, - Transform *itfm) + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float t, + Transform *itfm) { - object_fetch_transform_motion_test(kg, object, ray->time, itfm); + object_fetch_transform_motion_test(kg, object, ray->time, itfm); - *P = transform_point(itfm, ray->P); + *P = transform_point(itfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(t != FLT_MAX) { - t *= len; - } + if (t != FLT_MAX) { + t *= len; + } - return t; + return t; } -#ifdef __QBVH__ +# ifdef __QBVH__ /* Same as above, but optimized for QBVH scene intersection, * which needs to modify two max distances. * @@ -535,21 +542,21 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, float *t1, Transform *itfm) { - object_fetch_transform_motion_test(kg, object, ray->time, itfm); + object_fetch_transform_motion_test(kg, object, ray->time, itfm); - *P = transform_point(itfm, ray->P); + *P = transform_point(itfm, ray->P); - float len; - *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); - *idir = bvh_inverse_direction(*dir); + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); - if(*t != FLT_MAX) - *t *= len; + if (*t != FLT_MAX) + *t *= len; - if(*t1 != -FLT_MAX) - *t1 *= len; + if (*t1 != -FLT_MAX) + *t1 *= len; } -#endif +# endif /* Transorm ray to exit motion blurred object in BVH */ @@ -562,15 +569,15 @@ ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg, float t, Transform *itfm) { - if(t != FLT_MAX) { - t /= len(transform_direction(itfm, ray->D)); - } + if (t != FLT_MAX) { + t /= len(transform_direction(itfm, ray->D)); + } - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); - return t; + return t; } /* Same as above, but returns scale factor to apply to multiple intersection distances */ @@ -584,10 +591,10 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, float *t_fac, Transform *itfm) { - *t_fac = 1.0f / len(transform_direction(itfm, ray->D)); - *P = ray->P; - *dir = bvh_clamp_direction(ray->D); - *idir = bvh_inverse_direction(*dir); + *t_fac = 1.0f / len(transform_direction(itfm, ray->D)); + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); } #endif @@ -599,30 +606,30 @@ ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, #ifdef __KERNEL_OPENCL__ ccl_device_inline void object_position_transform_addrspace(KernelGlobals *kg, - const ShaderData *sd, - ccl_addr_space float3 *P) + const ShaderData *sd, + ccl_addr_space float3 *P) { - float3 private_P = *P; - object_position_transform(kg, sd, &private_P); - *P = private_P; + float3 private_P = *P; + object_position_transform(kg, sd, &private_P); + *P = private_P; } ccl_device_inline void object_dir_transform_addrspace(KernelGlobals *kg, const ShaderData *sd, ccl_addr_space float3 *D) { - float3 private_D = *D; - object_dir_transform(kg, sd, &private_D); - *D = private_D; + float3 private_D = *D; + object_dir_transform(kg, sd, &private_D); + *D = private_D; } ccl_device_inline void object_normal_transform_addrspace(KernelGlobals *kg, const ShaderData *sd, ccl_addr_space float3 *N) { - float3 private_N = *N; - object_normal_transform(kg, sd, &private_N); - *N = private_N; + float3 private_N = *N; + object_normal_transform(kg, sd, &private_N); + *N = private_N; } #endif diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h index edb82172959..df19199f68e 100644 --- a/intern/cycles/kernel/geom/geom_patch.h +++ b/intern/cycles/kernel/geom/geom_patch.h @@ -27,342 +27,394 @@ CCL_NAMESPACE_BEGIN typedef struct PatchHandle { - int array_index, patch_index, vert_index; + int array_index, patch_index, vert_index; } PatchHandle; ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v) { - int quadrant = -1; - - if(*u < median) { - if(*v < median) { - quadrant = 0; - } - else { - quadrant = 1; - *v -= median; - } - } - else { - if(*v < median) { - quadrant = 3; - } - else { - quadrant = 2; - *v -= median; - } - *u -= median; - } - - return quadrant; + int quadrant = -1; + + if (*u < median) { + if (*v < median) { + quadrant = 0; + } + else { + quadrant = 1; + *v -= median; + } + } + else { + if (*v < median) { + quadrant = 3; + } + else { + quadrant = 2; + *v -= median; + } + *u -= median; + } + + return quadrant; } /* retrieve PatchHandle from patch coords */ -ccl_device_inline PatchHandle patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v) +ccl_device_inline PatchHandle +patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v) { - PatchHandle handle; + PatchHandle handle; - kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f)); + kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f)); - int node = (object_patch_map_offset(kg, object) + patch)/2; - float median = 0.5f; + int node = (object_patch_map_offset(kg, object) + patch) / 2; + float median = 0.5f; - for(int depth = 0; depth < 0xff; depth++) { - float delta = median * 0.5f; + for (int depth = 0; depth < 0xff; depth++) { + float delta = median * 0.5f; - int quadrant = patch_map_resolve_quadrant(median, &u, &v); - kernel_assert(quadrant >= 0); + int quadrant = patch_map_resolve_quadrant(median, &u, &v); + kernel_assert(quadrant >= 0); - uint child = kernel_tex_fetch(__patches, node + quadrant); + uint child = kernel_tex_fetch(__patches, node + quadrant); - /* is the quadrant a hole? */ - if(!(child & PATCH_MAP_NODE_IS_SET)) { - handle.array_index = -1; - return handle; - } + /* is the quadrant a hole? */ + if (!(child & PATCH_MAP_NODE_IS_SET)) { + handle.array_index = -1; + return handle; + } - uint index = child & PATCH_MAP_NODE_INDEX_MASK; + uint index = child & PATCH_MAP_NODE_INDEX_MASK; - if(child & PATCH_MAP_NODE_IS_LEAF) { - handle.array_index = kernel_tex_fetch(__patches, index + 0); - handle.patch_index = kernel_tex_fetch(__patches, index + 1); - handle.vert_index = kernel_tex_fetch(__patches, index + 2); + if (child & PATCH_MAP_NODE_IS_LEAF) { + handle.array_index = kernel_tex_fetch(__patches, index + 0); + handle.patch_index = kernel_tex_fetch(__patches, index + 1); + handle.vert_index = kernel_tex_fetch(__patches, index + 2); - return handle; - } else { - node = index; - } + return handle; + } + else { + node = index; + } - median = delta; - } + median = delta; + } - /* no leaf found */ - kernel_assert(0); + /* no leaf found */ + kernel_assert(0); - handle.array_index = -1; - return handle; + handle.array_index = -1; + return handle; } ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv) { - /* The four uniform cubic B-Spline basis functions evaluated at t */ - float inv_6 = 1.0f / 6.0f; - - float t2 = t * t; - float t3 = t * t2; - - point[0] = inv_6 * (1.0f - 3.0f*(t - t2) - t3); - point[1] = inv_6 * (4.0f - 6.0f*t2 + 3.0f*t3); - point[2] = inv_6 * (1.0f + 3.0f*(t + t2 - t3)); - point[3] = inv_6 * t3; - - /* Derivatives of the above four basis functions at t */ - deriv[0] = -0.5f*t2 + t - 0.5f; - deriv[1] = 1.5f*t2 - 2.0f*t; - deriv[2] = -1.5f*t2 + t + 0.5f; - deriv[3] = 0.5f*t2; + /* The four uniform cubic B-Spline basis functions evaluated at t */ + float inv_6 = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point[0] = inv_6 * (1.0f - 3.0f * (t - t2) - t3); + point[1] = inv_6 * (4.0f - 6.0f * t2 + 3.0f * t3); + point[2] = inv_6 * (1.0f + 3.0f * (t + t2 - t3)); + point[3] = inv_6 * t3; + + /* Derivatives of the above four basis functions at t */ + deriv[0] = -0.5f * t2 + t - 0.5f; + deriv[1] = 1.5f * t2 - 2.0f * t; + deriv[2] = -1.5f * t2 + t + 0.5f; + deriv[3] = 0.5f * t2; } ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t) { - int boundary = ((bits >> 8) & 0xf); - - if(boundary & 1) { - t[2] -= t[0]; - t[1] += 2*t[0]; - t[0] = 0; - } - - if(boundary & 2) { - s[1] -= s[3]; - s[2] += 2*s[3]; - s[3] = 0; - } - - if(boundary & 4) { - t[1] -= t[3]; - t[2] += 2*t[3]; - t[3] = 0; - } - - if(boundary & 8) { - s[2] -= s[0]; - s[1] += 2*s[0]; - s[0] = 0; - } + int boundary = ((bits >> 8) & 0xf); + + if (boundary & 1) { + t[2] -= t[0]; + t[1] += 2 * t[0]; + t[0] = 0; + } + + if (boundary & 2) { + s[1] -= s[3]; + s[2] += 2 * s[3]; + s[3] = 0; + } + + if (boundary & 4) { + t[1] -= t[3]; + t[2] += 2 * t[3]; + t[3] = 0; + } + + if (boundary & 8) { + s[2] -= s[0]; + s[1] += 2 * s[0]; + s[0] = 0; + } } ccl_device_inline int patch_eval_depth(uint patch_bits) { - return (patch_bits & 0xf); + return (patch_bits & 0xf); } ccl_device_inline float patch_eval_param_fraction(uint patch_bits) { - bool non_quad_root = (patch_bits >> 4) & 0x1; - int depth = patch_eval_depth(patch_bits); - - if(non_quad_root) { - return 1.0f / (float)(1 << (depth-1)); - } - else { - return 1.0f / (float)(1 << depth); - } + bool non_quad_root = (patch_bits >> 4) & 0x1; + int depth = patch_eval_depth(patch_bits); + + if (non_quad_root) { + return 1.0f / (float)(1 << (depth - 1)); + } + else { + return 1.0f / (float)(1 << depth); + } } ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v) { - float frac = patch_eval_param_fraction(patch_bits); + float frac = patch_eval_param_fraction(patch_bits); - int iu = (patch_bits >> 22) & 0x3ff; - int iv = (patch_bits >> 12) & 0x3ff; + int iu = (patch_bits >> 22) & 0x3ff; + int iv = (patch_bits >> 12) & 0x3ff; - /* top left corner */ - float pu = (float)iu*frac; - float pv = (float)iv*frac; + /* top left corner */ + float pu = (float)iu * frac; + float pv = (float)iv * frac; - /* normalize uv coordinates */ - *u = (*u - pu) / frac; - *v = (*v - pv) / frac; + /* normalize uv coordinates */ + *u = (*u - pu) / frac; + *v = (*v - pv) / frac; } /* retrieve patch control indices */ -ccl_device_inline int patch_eval_indices(KernelGlobals *kg, const PatchHandle *handle, int channel, +ccl_device_inline int patch_eval_indices(KernelGlobals *kg, + const PatchHandle *handle, + int channel, int indices[PATCH_MAX_CONTROL_VERTS]) { - int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index; + int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index; - /* XXX: regular patches only */ - for(int i = 0; i < 16; i++) { - indices[i] = kernel_tex_fetch(__patches, index_base + i); - } + /* XXX: regular patches only */ + for (int i = 0; i < 16; i++) { + indices[i] = kernel_tex_fetch(__patches, index_base + i); + } - return 16; + return 16; } /* evaluate patch basis functions */ -ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *handle, float u, float v, - float weights[PATCH_MAX_CONTROL_VERTS], - float weights_du[PATCH_MAX_CONTROL_VERTS], - float weights_dv[PATCH_MAX_CONTROL_VERTS]) +ccl_device_inline void patch_eval_basis(KernelGlobals *kg, + const PatchHandle *handle, + float u, + float v, + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) { - uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */ - float d_scale = 1 << patch_eval_depth(patch_bits); + uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */ + float d_scale = 1 << patch_eval_depth(patch_bits); - bool non_quad_root = (patch_bits >> 4) & 0x1; - if(non_quad_root) { - d_scale *= 0.5f; - } + bool non_quad_root = (patch_bits >> 4) & 0x1; + if (non_quad_root) { + d_scale *= 0.5f; + } - patch_eval_normalize_coords(patch_bits, &u, &v); + patch_eval_normalize_coords(patch_bits, &u, &v); - /* XXX: regular patches only for now. */ + /* XXX: regular patches only for now. */ - float s[4], t[4], ds[4], dt[4]; + float s[4], t[4], ds[4], dt[4]; - patch_eval_bspline_weights(u, s, ds); - patch_eval_bspline_weights(v, t, dt); + patch_eval_bspline_weights(u, s, ds); + patch_eval_bspline_weights(v, t, dt); - patch_eval_adjust_boundary_weights(patch_bits, s, t); - patch_eval_adjust_boundary_weights(patch_bits, ds, dt); + patch_eval_adjust_boundary_weights(patch_bits, s, t); + patch_eval_adjust_boundary_weights(patch_bits, ds, dt); - for(int k = 0; k < 4; k++) { - for(int l = 0; l < 4; l++) { - weights[4*k+l] = s[l] * t[k]; - weights_du[4*k+l] = ds[l] * t[k] * d_scale; - weights_dv[4*k+l] = s[l] * dt[k] * d_scale; - } - } + for (int k = 0; k < 4; k++) { + for (int l = 0; l < 4; l++) { + weights[4 * k + l] = s[l] * t[k]; + weights_du[4 * k + l] = ds[l] * t[k] * d_scale; + weights_dv[4 * k + l] = s[l] * dt[k] * d_scale; + } + } } /* generic function for evaluating indices and weights from patch coords */ -ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, int object, int patch, float u, float v, int channel, - int indices[PATCH_MAX_CONTROL_VERTS], - float weights[PATCH_MAX_CONTROL_VERTS], - float weights_du[PATCH_MAX_CONTROL_VERTS], - float weights_dv[PATCH_MAX_CONTROL_VERTS]) +ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, + int object, + int patch, + float u, + float v, + int channel, + int indices[PATCH_MAX_CONTROL_VERTS], + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) { - PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v); - kernel_assert(handle.array_index >= 0); + PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v); + kernel_assert(handle.array_index >= 0); - int num_control = patch_eval_indices(kg, &handle, channel, indices); - patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv); + int num_control = patch_eval_indices(kg, &handle, channel, indices); + patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv); - return num_control; + return num_control; } /* functions for evaluating attributes on patches */ -ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float *du, float* dv) +ccl_device float patch_eval_float(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float *du, + float *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float val = 0.0f; - if(du) *du = 0.0f; - if(dv) *dv = 0.0f; - - for(int i = 0; i < num_control; i++) { - float v = kernel_tex_fetch(__attributes_float, offset + indices[i]); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float val = 0.0f; + if (du) + *du = 0.0f; + if (dv) + *dv = 0.0f; + + for (int i = 0; i < num_control; i++) { + float v = kernel_tex_fetch(__attributes_float, offset + indices[i]); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } -ccl_device float2 patch_eval_float2(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float2 *du, float2 *dv) +ccl_device float2 patch_eval_float2(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float2 *du, + float2 *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float2 val = make_float2(0.0f, 0.0f); - if(du) *du = make_float2(0.0f, 0.0f); - if(dv) *dv = make_float2(0.0f, 0.0f); - - for(int i = 0; i < num_control; i++) { - float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float2 val = make_float2(0.0f, 0.0f); + if (du) + *du = make_float2(0.0f, 0.0f); + if (dv) + *dv = make_float2(0.0f, 0.0f); + + for (int i = 0; i < num_control; i++) { + float2 v = kernel_tex_fetch(__attributes_float2, offset + indices[i]); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } -ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float3 *du, float3 *dv) +ccl_device float3 patch_eval_float3(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float3 *du, + float3 *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float3 val = make_float3(0.0f, 0.0f, 0.0f); - if(du) *du = make_float3(0.0f, 0.0f, 0.0f); - if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); - - for(int i = 0; i < num_control; i++) { - float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i])); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if (du) + *du = make_float3(0.0f, 0.0f, 0.0f); + if (dv) + *dv = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < num_control; i++) { + float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i])); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } -ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int offset, - int patch, float u, float v, int channel, - float3 *du, float3 *dv) +ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, + const ShaderData *sd, + int offset, + int patch, + float u, + float v, + int channel, + float3 *du, + float3 *dv) { - int indices[PATCH_MAX_CONTROL_VERTS]; - float weights[PATCH_MAX_CONTROL_VERTS]; - float weights_du[PATCH_MAX_CONTROL_VERTS]; - float weights_dv[PATCH_MAX_CONTROL_VERTS]; - - int num_control = patch_eval_control_verts(kg, sd->object, patch, u, v, channel, - indices, weights, weights_du, weights_dv); - - float3 val = make_float3(0.0f, 0.0f, 0.0f); - if(du) *du = make_float3(0.0f, 0.0f, 0.0f); - if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); - - for(int i = 0; i < num_control; i++) { - float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])); - - val += v * weights[i]; - if(du) *du += v * weights_du[i]; - if(dv) *dv += v * weights_dv[i]; - } - - return val; + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts( + kg, sd->object, patch, u, v, channel, indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if (du) + *du = make_float3(0.0f, 0.0f, 0.0f); + if (dv) + *dv = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < num_control; i++) { + float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])); + + val += v * weights[i]; + if (du) + *du += v * weights_du[i]; + if (dv) + *dv += v * weights_dv[i]; + } + + return val; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 95d9d1050fb..7f2b52a24c4 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -22,57 +22,59 @@ CCL_NAMESPACE_BEGIN /* Generic primitive attribute reading functions */ -ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float *dx, float *dy) +ccl_device_inline float primitive_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float(kg, sd, desc, dx, dy); + } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - return volume_attribute_float(kg, sd, desc); - } + else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + return volume_attribute_float(kg, sd, desc); + } #endif - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - return 0.0f; - } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + return 0.0f; + } } -ccl_device_inline float primitive_surface_attribute_float(KernelGlobals *kg, - const ShaderData *sd, - const AttributeDescriptor desc, - float *dx, float *dy) +ccl_device_inline float primitive_surface_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float(kg, sd, desc, dx, dy); + } #endif - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - return 0.0f; - } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + return 0.0f; + } } #ifdef __VOLUME__ @@ -80,120 +82,136 @@ ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { - if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float(kg, sd, desc); - } - else { - return 0.0f; - } + if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float(kg, sd, desc); + } + else { + return 0.0f; + } } #endif ccl_device_inline float2 primitive_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float2 *dx, float2 *dy) + float2 *dx, + float2 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float2(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float2(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float2(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float2(kg, sd, desc, dx, dy); + } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - kernel_assert(0); - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + kernel_assert(0); + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + return make_float2(0.0f, 0.0f); + } #endif - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + return make_float2(0.0f, 0.0f); + } } ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float3 *dx, float3 *dy) + float3 *dx, + float3 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float3(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float3(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float3(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float3(kg, sd, desc, dx, dy); + } #endif #ifdef __VOLUME__ - else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return volume_attribute_float3(kg, sd, desc); - } + else if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + return volume_attribute_float3(kg, sd, desc); + } #endif - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); + } } ccl_device_inline float2 primitive_surface_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float2 *dx, float2 *dy) + float2 *dx, + float2 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float2(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float2(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float2(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float2(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float2(kg, sd, desc, dx, dy); + } #endif - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + return make_float2(0.0f, 0.0f); + } } ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, - float3 *dx, float3 *dy) + float3 *dx, + float3 *dy) { - if(sd->type & PRIMITIVE_ALL_TRIANGLE) { - if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float3(kg, sd, desc, dx, dy); - else - return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); - } + if (sd->type & PRIMITIVE_ALL_TRIANGLE) { + if (subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float3(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); + } #ifdef __HAIR__ - else if(sd->type & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float3(kg, sd, desc, dx, dy); - } + else if (sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float3(kg, sd, desc, dx, dy); + } #endif - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); + } } #ifdef __VOLUME__ @@ -201,12 +219,12 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { - if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float3(kg, sd, desc); - } - else { - return make_float3(0.0f, 0.0f, 0.0f); - } + if (sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float3(kg, sd, desc); + } + else { + return make_float3(0.0f, 0.0f, 0.0f); + } } #endif @@ -214,33 +232,33 @@ ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg, ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) { - const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); - if(desc.offset == ATTR_STD_NOT_FOUND) - return make_float3(0.0f, 0.0f, 0.0f); + if (desc.offset == ATTR_STD_NOT_FOUND) + return make_float3(0.0f, 0.0f, 0.0f); - float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL); - return make_float3(uv.x, uv.y, 1.0f); + float2 uv = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL); + return make_float3(uv.x, uv.y, 1.0f); } /* Ptex coordinates */ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id) { - /* storing ptex data as attributes is not memory efficient but simple for tests */ - const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID); - const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV); + /* storing ptex data as attributes is not memory efficient but simple for tests */ + const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID); + const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV); - if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND) - return false; + if (desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND) + return false; - float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL); - float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL); + float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL); + float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL); - *uv = make_float2(uv3.x, uv3.y); - *face_id = (int)face_id_f; + *uv = make_float2(uv3.x, uv3.y); + *face_id = (int)face_id_f; - return true; + return true; } /* Surface tangent */ @@ -248,125 +266,125 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) { #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) + if (sd->type & PRIMITIVE_ALL_CURVE) # ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(sd->dPdu); # else - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); # endif #endif - /* try to create spherical tangent from generated coordinates */ - const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); - - if(desc.offset != ATTR_STD_NOT_FOUND) { - float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); - data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); - object_normal_transform(kg, sd, &data); - return cross(sd->N, normalize(cross(data, sd->N))); - } - else { - /* otherwise use surface derivatives */ + /* try to create spherical tangent from generated coordinates */ + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); + + if (desc.offset != ATTR_STD_NOT_FOUND) { + float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); + object_normal_transform(kg, sd, &data); + return cross(sd->N, normalize(cross(data, sd->N))); + } + else { + /* otherwise use surface derivatives */ #ifdef __DPDU__ - return normalize(sd->dPdu); + return normalize(sd->dPdu); #else - return make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); #endif - } + } } /* Motion vector for motion pass */ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) { - /* center position */ - float3 center; + /* center position */ + float3 center; #ifdef __HAIR__ - bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE; - if(is_curve_primitive) { - center = curve_motion_center_location(kg, sd); - - if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_position_transform(kg, sd, ¢er); - } - } - else + bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE; + if (is_curve_primitive) { + center = curve_motion_center_location(kg, sd); + + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_position_transform(kg, sd, ¢er); + } + } + else #endif - center = sd->P; + center = sd->P; - float3 motion_pre = center, motion_post = center; + float3 motion_pre = center, motion_post = center; - /* deformation motion */ - AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION); + /* deformation motion */ + AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION); - if(desc.offset != ATTR_STD_NOT_FOUND) { - /* get motion info */ - int numverts, numkeys; - object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); + if (desc.offset != ATTR_STD_NOT_FOUND) { + /* get motion info */ + int numverts, numkeys; + object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); - /* lookup attributes */ - motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + /* lookup attributes */ + motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); - desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys; - motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE) ? numverts : numkeys; + motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); #ifdef __HAIR__ - if(is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { - object_position_transform(kg, sd, &motion_pre); - object_position_transform(kg, sd, &motion_post); - } + if (is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { + object_position_transform(kg, sd, &motion_pre); + object_position_transform(kg, sd, &motion_post); + } #endif - } - - /* object motion. note that depending on the mesh having motion vectors, this - * transformation was set match the world/object space of motion_pre/post */ - Transform tfm; - - tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE); - motion_pre = transform_point(&tfm, motion_pre); - - tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST); - motion_post = transform_point(&tfm, motion_post); - - float3 motion_center; - - /* camera motion, for perspective/orthographic motion.pre/post will be a - * world-to-raster matrix, for panorama it's world-to-camera */ - if(kernel_data.cam.type != CAMERA_PANORAMA) { - ProjectionTransform projection = kernel_data.cam.worldtoraster; - motion_center = transform_perspective(&projection, center); - - projection = kernel_data.cam.perspective_pre; - motion_pre = transform_perspective(&projection, motion_pre); - - projection = kernel_data.cam.perspective_post; - motion_post = transform_perspective(&projection, motion_post); - } - else { - tfm = kernel_data.cam.worldtocamera; - motion_center = normalize(transform_point(&tfm, center)); - motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center)); - motion_center.x *= kernel_data.cam.width; - motion_center.y *= kernel_data.cam.height; - - tfm = kernel_data.cam.motion_pass_pre; - motion_pre = normalize(transform_point(&tfm, motion_pre)); - motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre)); - motion_pre.x *= kernel_data.cam.width; - motion_pre.y *= kernel_data.cam.height; - - tfm = kernel_data.cam.motion_pass_post; - motion_post = normalize(transform_point(&tfm, motion_post)); - motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post)); - motion_post.x *= kernel_data.cam.width; - motion_post.y *= kernel_data.cam.height; - } - - motion_pre = motion_pre - motion_center; - motion_post = motion_center - motion_post; - - return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y); + } + + /* object motion. note that depending on the mesh having motion vectors, this + * transformation was set match the world/object space of motion_pre/post */ + Transform tfm; + + tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_PRE); + motion_pre = transform_point(&tfm, motion_pre); + + tfm = object_fetch_motion_pass_transform(kg, sd->object, OBJECT_PASS_MOTION_POST); + motion_post = transform_point(&tfm, motion_post); + + float3 motion_center; + + /* camera motion, for perspective/orthographic motion.pre/post will be a + * world-to-raster matrix, for panorama it's world-to-camera */ + if (kernel_data.cam.type != CAMERA_PANORAMA) { + ProjectionTransform projection = kernel_data.cam.worldtoraster; + motion_center = transform_perspective(&projection, center); + + projection = kernel_data.cam.perspective_pre; + motion_pre = transform_perspective(&projection, motion_pre); + + projection = kernel_data.cam.perspective_post; + motion_post = transform_perspective(&projection, motion_post); + } + else { + tfm = kernel_data.cam.worldtocamera; + motion_center = normalize(transform_point(&tfm, center)); + motion_center = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_center)); + motion_center.x *= kernel_data.cam.width; + motion_center.y *= kernel_data.cam.height; + + tfm = kernel_data.cam.motion_pass_pre; + motion_pre = normalize(transform_point(&tfm, motion_pre)); + motion_pre = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_pre)); + motion_pre.x *= kernel_data.cam.width; + motion_pre.y *= kernel_data.cam.height; + + tfm = kernel_data.cam.motion_pass_post; + motion_post = normalize(transform_point(&tfm, motion_post)); + motion_post = float2_to_float3(direction_to_panorama(&kernel_data.cam, motion_post)); + motion_post.x *= kernel_data.cam.width; + motion_post.y *= kernel_data.cam.height; + } + + motion_pre = motion_pre - motion_center; + motion_post = motion_center - motion_post; + + return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h index 251e070c21f..8d5b3c12833 100644 --- a/intern/cycles/kernel/geom/geom_subd_triangle.h +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -22,455 +22,492 @@ CCL_NAMESPACE_BEGIN ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd) { - return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0; + return (sd->prim != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, sd->prim) : ~0; } /* UV coords of triangle within patch */ -ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, const ShaderData *sd, float2 uv[3]) +ccl_device_inline void subd_triangle_patch_uv(KernelGlobals *kg, + const ShaderData *sd, + float2 uv[3]) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x); - uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y); - uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z); + uv[0] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.x); + uv[1] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.y); + uv[2] = kernel_tex_fetch(__tri_patch_uv, tri_vindex.z); } /* Vertex indices of patch */ ccl_device_inline uint4 subd_triangle_patch_indices(KernelGlobals *kg, int patch) { - uint4 indices; + uint4 indices; - indices.x = kernel_tex_fetch(__patches, patch+0); - indices.y = kernel_tex_fetch(__patches, patch+1); - indices.z = kernel_tex_fetch(__patches, patch+2); - indices.w = kernel_tex_fetch(__patches, patch+3); + indices.x = kernel_tex_fetch(__patches, patch + 0); + indices.y = kernel_tex_fetch(__patches, patch + 1); + indices.z = kernel_tex_fetch(__patches, patch + 2); + indices.w = kernel_tex_fetch(__patches, patch + 3); - return indices; + return indices; } /* Originating face for patch */ ccl_device_inline uint subd_triangle_patch_face(KernelGlobals *kg, int patch) { - return kernel_tex_fetch(__patches, patch+4); + return kernel_tex_fetch(__patches, patch + 4); } /* Number of corners on originating face */ ccl_device_inline uint subd_triangle_patch_num_corners(KernelGlobals *kg, int patch) { - return kernel_tex_fetch(__patches, patch+5) & 0xffff; + return kernel_tex_fetch(__patches, patch + 5) & 0xffff; } /* Indices of the four corners that are used by the patch */ ccl_device_inline void subd_triangle_patch_corners(KernelGlobals *kg, int patch, int corners[4]) { - uint4 data; - - data.x = kernel_tex_fetch(__patches, patch+4); - data.y = kernel_tex_fetch(__patches, patch+5); - data.z = kernel_tex_fetch(__patches, patch+6); - data.w = kernel_tex_fetch(__patches, patch+7); - - int num_corners = data.y & 0xffff; - - if(num_corners == 4) { - /* quad */ - corners[0] = data.z; - corners[1] = data.z+1; - corners[2] = data.z+2; - corners[3] = data.z+3; - } - else { - /* ngon */ - int c = data.y >> 16; - - corners[0] = data.z + c; - corners[1] = data.z + mod(c+1, num_corners); - corners[2] = data.w; - corners[3] = data.z + mod(c-1, num_corners); - } + uint4 data; + + data.x = kernel_tex_fetch(__patches, patch + 4); + data.y = kernel_tex_fetch(__patches, patch + 5); + data.z = kernel_tex_fetch(__patches, patch + 6); + data.w = kernel_tex_fetch(__patches, patch + 7); + + int num_corners = data.y & 0xffff; + + if (num_corners == 4) { + /* quad */ + corners[0] = data.z; + corners[1] = data.z + 1; + corners[2] = data.z + 2; + corners[3] = data.z + 3; + } + else { + /* ngon */ + int c = data.y >> 16; + + corners[0] = data.z + c; + corners[1] = data.z + mod(c + 1, num_corners); + corners[2] = data.w; + corners[3] = data.z + mod(c - 1, num_corners); + } } /* Reading attributes on various subdivision triangle elements */ -ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +ccl_device_noinline float subd_triangle_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd); #ifdef __PATCH_EVAL__ - if(desc.flags & ATTR_SUBDIVIDED) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; - - /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; - - float a, dads, dadt; - a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + if (desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + + float a, dads, dadt; + a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if (dx) { + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; + + float dsdx = dsdu * dudx + dsdv * dvdx; + float dtdx = dtdu * dudx + dtdv * dvdx; + + *dx = dads * dsdx + dadt * dtdx; + } + if (dy) { + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; + + float dsdy = dsdu * dudy + dsdv * dvdy; + float dtdy = dtdu * dudy + dtdv * dvdy; + + *dy = dads * dsdy + dadt * dtdy; + } + } +# endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; + + return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z); + float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w); + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx || dy) { - float dsdu = dpdu.x; - float dtdu = dpdu.y; - float dsdv = dpdv.x; - float dtdv = dpdv.y; - - if(dx) { - float dudx = sd->du.dx; - float dvdx = sd->dv.dx; - - float dsdx = dsdu*dudx + dsdv*dvdx; - float dtdx = dtdu*dudx + dtdv*dvdx; - - *dx = dads*dsdx + dadt*dtdx; - } - if(dy) { - float dudy = sd->du.dy; - float dvdy = sd->dv.dy; - - float dsdy = dsdu*dudy + dsdv*dvdy; - float dtdy = dtdu*dudy + dtdv*dvdy; - - *dy = dads*dsdy + dadt*dtdy; - } - } + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return a; - } - else -#endif /* __PATCH_EVAL__ */ - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); - return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); - uint4 v = subd_triangle_patch_indices(kg, patch); + float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset); + float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset); + float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset); + float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset); - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y); - float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z); - float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w); + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; - float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset); - float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset); - float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset); - float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset); - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; -#endif - - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - - return 0.0f; - } + return 0.0f; + } } -ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy) +ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float2 *dx, + float2 *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd); #ifdef __PATCH_EVAL__ - if(desc.flags & ATTR_SUBDIVIDED) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; - - /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; - - float2 a, dads, dadt; - - a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx || dy) { - float dsdu = dpdu.x; - float dtdu = dpdu.y; - float dsdv = dpdv.x; - float dtdv = dpdv.y; - - if(dx) { - float dudx = sd->du.dx; - float dvdx = sd->dv.dx; - - float dsdx = dsdu*dudx + dsdv*dvdx; - float dtdx = dtdu*dudx + dtdv*dvdx; - - *dx = dads*dsdx + dadt*dtdx; - } - if(dy) { - float dudy = sd->du.dy; - float dvdy = sd->dv.dy; - - float dsdy = dsdu*dudy + dsdv*dvdy; - float dtdy = dtdu*dudy + dtdv*dvdy; - - *dy = dads*dsdy + dadt*dtdy; - } - } -#endif - - return a; - } - else -#endif /* __PATCH_EVAL__ */ - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); - - return kernel_tex_fetch(__attributes_float2, desc.offset + subd_triangle_patch_face(kg, patch)); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - uint4 v = subd_triangle_patch_indices(kg, patch); - - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y); - float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z); - float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w); - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + if (desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + + float2 a, dads, dadt; + + a = patch_eval_float2(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + +# ifdef __RAY_DIFFERENTIALS__ + if (dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if (dx) { + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; + + float dsdx = dsdu * dudx + dsdv * dvdx; + float dtdx = dtdu * dudx + dtdv * dvdx; + + *dx = dads * dsdx + dadt * dtdx; + } + if (dy) { + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; + + float dsdy = dsdu * dudy + dsdv * dvdy; + float dtdy = dtdu * dudy + dtdv * dvdy; + + *dy = dads * dsdy + dadt * dtdy; + } + } +# endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); + + return kernel_tex_fetch(__attributes_float2, + desc.offset + subd_triangle_patch_face(kg, patch)); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + v.x); + float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + v.y); + float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + v.z); + float2 f3 = kernel_tex_fetch(__attributes_float2, desc.offset + v.w); + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); - float2 f0, f1, f2, f3; + float2 f0, f1, f2, f3; - f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset); - f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset); - f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset); - f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset); + f0 = kernel_tex_fetch(__attributes_float2, corners[0] + desc.offset); + f1 = kernel_tex_fetch(__attributes_float2, corners[1] + desc.offset); + f2 = kernel_tex_fetch(__attributes_float2, corners[2] + desc.offset); + f3 = kernel_tex_fetch(__attributes_float2, corners[3] + desc.offset); - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } - float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float2 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float2 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float2 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + return make_float2(0.0f, 0.0f); + } } -ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, + float3 *dy) { - int patch = subd_triangle_patch(kg, sd); + int patch = subd_triangle_patch(kg, sd); #ifdef __PATCH_EVAL__ - if(desc.flags & ATTR_SUBDIVIDED) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - float2 dpdu = uv[0] - uv[2]; - float2 dpdv = uv[1] - uv[2]; - - /* p is [s, t] */ - float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; - - float3 a, dads, dadt; - - if(desc.element == ATTR_ELEMENT_CORNER_BYTE) { - a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); - } - else { - a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); - } - -#ifdef __RAY_DIFFERENTIALS__ - if(dx || dy) { - float dsdu = dpdu.x; - float dtdu = dpdu.y; - float dsdv = dpdv.x; - float dtdv = dpdv.y; - - if(dx) { - float dudx = sd->du.dx; - float dvdx = sd->dv.dx; - - float dsdx = dsdu*dudx + dsdv*dvdx; - float dtdx = dtdu*dudx + dtdv*dvdx; - - *dx = dads*dsdx + dadt*dtdx; - } - if(dy) { - float dudy = sd->du.dy; - float dvdy = sd->dv.dy; - - float dsdy = dsdu*dudy + dsdv*dvdy; - float dtdy = dtdu*dudy + dtdv*dvdy; - - *dy = dads*dsdy + dadt*dtdy; - } - } -#endif - - return a; - } - else -#endif /* __PATCH_EVAL__ */ - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch))); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - uint4 v = subd_triangle_patch_indices(kg, patch); - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z)); - float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w)); - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + if (desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * sd->u + dpdv * sd->v + uv[2]; + + float3 a, dads, dadt; + + if (desc.element == ATTR_ELEMENT_CORNER_BYTE) { + a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + else { + a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + +# ifdef __RAY_DIFFERENTIALS__ + if (dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if (dx) { + float dudx = sd->du.dx; + float dvdx = sd->dv.dx; + + float dsdx = dsdu * dudx + dsdv * dvdx; + float dtdx = dtdu * dudx + dtdv * dvdx; + + *dx = dads * dsdx + dadt * dtdx; + } + if (dy) { + float dudy = sd->du.dy; + float dvdy = sd->dv.dy; + + float dsdy = dsdu * dudy + dsdv * dvdy; + float dtdy = dtdu * dudy + dtdv * dvdy; + + *dy = dads * dsdy + dadt * dtdy; + } + } +# endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch))); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + uint4 v = subd_triangle_patch_indices(kg, patch); + + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z)); + float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w)); + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { - float2 uv[3]; - subd_triangle_patch_uv(kg, sd, uv); - - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); - - float3 f0, f1, f2, f3; - - if(desc.element == ATTR_ELEMENT_CORNER) { - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset)); - f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset)); - } - else { - f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)); - f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)); - f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)); - f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)); - } - - if(subd_triangle_patch_num_corners(kg, patch) != 4) { - f1 = (f1+f0)*0.5f; - f3 = (f3+f0)*0.5f; - } - - float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); + + float3 f0, f1, f2, f3; + + if (desc.element == ATTR_ELEMENT_CORNER) { + f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset)); + f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset)); + f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset)); + f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset)); + } + else { + f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)); + f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)); + f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)); + f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)); + } + + if (subd_triangle_patch_num_corners(kg, patch) != 4) { + f1 = (f1 + f0) * 0.5f; + f3 = (f3 + f0) * 0.5f; + } + + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*a + sd->dv.dx*b - (sd->du.dx + sd->dv.dx)*c; - if(dy) *dy = sd->du.dy*a + sd->dv.dy*b - (sd->du.dy + sd->dv.dy)*c; + if (dx) + *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c; + if (dy) + *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c; #endif - return sd->u*a + sd->v*b + (1.0f - sd->u - sd->v)*c; - } - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c; + } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + return make_float3(0.0f, 0.0f, 0.0f); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 300227c38e6..9938c0ba2c3 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -25,227 +25,268 @@ CCL_NAMESPACE_BEGIN /* normal on triangle */ ccl_device_inline float3 triangle_normal(KernelGlobals *kg, ShaderData *sd) { - /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - - /* return normal */ - if(sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - return normalize(cross(v2 - v0, v1 - v0)); - } - else { - return normalize(cross(v1 - v0, v2 - v0)); - } + /* load triangle vertices */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + const float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + const float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + const float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + + /* return normal */ + if (sd->object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + return normalize(cross(v2 - v0, v1 - v0)); + } + else { + return normalize(cross(v1 - v0, v2 - v0)); + } } /* point and normal on triangle */ -ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) +ccl_device_inline void triangle_point_normal( + KernelGlobals *kg, int object, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) { - /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - /* compute point */ - float t = 1.0f - u - v; - *P = (u*v0 + v*v1 + t*v2); - /* get object flags */ - int object_flag = kernel_tex_fetch(__object_flag, object); - /* compute normal */ - if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - *Ng = normalize(cross(v2 - v0, v1 - v0)); - } - else { - *Ng = normalize(cross(v1 - v0, v2 - v0)); - } - /* shader`*/ - *shader = kernel_tex_fetch(__tri_shader, prim); + /* load triangle vertices */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + float3 v0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + float3 v1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + float3 v2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + /* compute point */ + float t = 1.0f - u - v; + *P = (u * v0 + v * v1 + t * v2); + /* get object flags */ + int object_flag = kernel_tex_fetch(__object_flag, object); + /* compute normal */ + if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + *Ng = normalize(cross(v2 - v0, v1 - v0)); + } + else { + *Ng = normalize(cross(v1 - v0, v2 - v0)); + } + /* shader`*/ + *shader = kernel_tex_fetch(__tri_shader, prim); } /* Triangle vertex locations */ ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3]) { - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + P[0] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + P[1] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + P[2] = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); } /* Interpolate smooth vertex normal from vertices */ -ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v) +ccl_device_inline float3 +triangle_smooth_normal(KernelGlobals *kg, float3 Ng, int prim, float u, float v) { - /* load triangle vertices */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); - float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); - float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); + /* load triangle vertices */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.x)); + float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.y)); + float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, tri_vindex.z)); - float3 N = safe_normalize((1.0f - u - v)*n2 + u*n0 + v*n1); + float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1); - return is_zero(N)? Ng: N; + return is_zero(N) ? Ng : N; } /* Ray differentials on triangle */ -ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_space float3 *dPdu, ccl_addr_space float3 *dPdv) +ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, + int prim, + ccl_addr_space float3 *dPdu, + ccl_addr_space float3 *dPdv) { - /* fetch triangle vertex coordinates */ - const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); - const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+0)); - const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+1)); - const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w+2)); - - /* compute derivatives of P w.r.t. uv */ - *dPdu = (p0 - p2); - *dPdv = (p1 - p2); + /* fetch triangle vertex coordinates */ + const uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim); + const float3 p0 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 0)); + const float3 p1 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 1)); + const float3 p2 = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex.w + 2)); + + /* compute derivatives of P w.r.t. uv */ + *dPdu = (p0 - p2); + *dPdv = (p1 - p2); } /* Reading attributes on various triangle elements */ -ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +ccl_device float triangle_attribute_float( + KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + return kernel_tex_fetch(__attributes_float, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); - float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); - float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - int tri = desc.offset + sd->prim*3; - float f0 = kernel_tex_fetch(__attributes_float, tri + 0); - float f1 = kernel_tex_fetch(__attributes_float, tri + 1); - float f2 = kernel_tex_fetch(__attributes_float, tri + 2); + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + int tri = desc.offset + sd->prim * 3; + float f0 = kernel_tex_fetch(__attributes_float, tri + 0); + float f1 = kernel_tex_fetch(__attributes_float, tri + 1); + float f2 = kernel_tex_fetch(__attributes_float, tri + 2); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else { + if (dx) + *dx = 0.0f; + if (dy) + *dy = 0.0f; - return 0.0f; - } + return 0.0f; + } } -ccl_device float2 triangle_attribute_float2(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float2 *dx, float2 *dy) +ccl_device float2 triangle_attribute_float2(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float2 *dx, + float2 *dy) { - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); - return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + return kernel_tex_fetch(__attributes_float2, desc.offset + sd->prim); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x); - float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y); - float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z); + float2 f0 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.x); + float2 f1 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.y); + float2 f2 = kernel_tex_fetch(__attributes_float2, desc.offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(desc.element == ATTR_ELEMENT_CORNER) { - int tri = desc.offset + sd->prim*3; - float2 f0, f1, f2; + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else if (desc.element == ATTR_ELEMENT_CORNER) { + int tri = desc.offset + sd->prim * 3; + float2 f0, f1, f2; - if(desc.element == ATTR_ELEMENT_CORNER) { - f0 = kernel_tex_fetch(__attributes_float2, tri + 0); - f1 = kernel_tex_fetch(__attributes_float2, tri + 1); - f2 = kernel_tex_fetch(__attributes_float2, tri + 2); - } + if (desc.element == ATTR_ELEMENT_CORNER) { + f0 = kernel_tex_fetch(__attributes_float2, tri + 0); + f1 = kernel_tex_fetch(__attributes_float2, tri + 1); + f2 = kernel_tex_fetch(__attributes_float2, tri + 2); + } #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = make_float2(0.0f, 0.0f); - if(dy) *dy = make_float2(0.0f, 0.0f); + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else { + if (dx) + *dx = make_float2(0.0f, 0.0f); + if (dy) + *dy = make_float2(0.0f, 0.0f); - return make_float2(0.0f, 0.0f); - } + return make_float2(0.0f, 0.0f); + } } -ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, + float3 *dy) { - if(desc.element == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); - } - else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { - uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); + if (desc.element == ATTR_ELEMENT_FACE) { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + sd->prim)); + } + else if (desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { + uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, sd->prim); + + float3 f0 = float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); + float3 f1 = float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); + float3 f2 = float4_to_float3( + kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { - int tri = desc.offset + sd->prim*3; - float3 f0, f1, f2; - - if(desc.element == ATTR_ELEMENT_CORNER) { - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); - } - else { - f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0)); - f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1)); - f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2)); - } + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { + int tri = desc.offset + sd->prim * 3; + float3 f0, f1, f2; + + if (desc.element == ATTR_ELEMENT_CORNER) { + f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); + f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); + f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); + } + else { + f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 0)); + f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 1)); + f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, tri + 2)); + } #ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; + if (dx) + *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2; + if (dy) + *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2; #endif - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2; + } + else { + if (dx) + *dx = make_float3(0.0f, 0.0f, 0.0f); + if (dy) + *dy = make_float3(0.0f, 0.0f, 0.0f); - return make_float3(0.0f, 0.0f, 0.0f); - } + return make_float3(0.0f, 0.0f, 0.0f); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index 56dbc4473fa..bcad03102d2 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -1,4 +1,4 @@ - /* +/* * Copyright 2014, Blender Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,447 +30,464 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, int object, int prim_addr) { - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; + const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; #else - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); #endif - float t, u, v; - if(ray_triangle_intersect(P, - dir, - isect->t, + float t, u, v; + if (ray_triangle_intersect(P, + dir, + isect->t, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - ssef_verts, + ssef_verts, #else - float4_to_float3(tri_a), - float4_to_float3(tri_b), - float4_to_float3(tri_c), + float4_to_float3(tri_a), + float4_to_float3(tri_b), + float4_to_float3(tri_c), #endif - &u, &v, &t)) - { + &u, + &v, + &t)) { #ifdef __VISIBILITY_FLAG__ - /* Visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags. - */ - if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) + /* Visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags. + */ + if (kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif - { - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; - return true; - } - } - return false; + { + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + return true; + } + } + return false; } #ifdef __KERNEL_AVX2__ -#define cross256(A,B, C,D) _mm256_fmsub_ps(A,B, _mm256_mul_ps(C,D)) -ccl_device_inline int ray_triangle_intersect8( - KernelGlobals *kg, - float3 ray_P, - float3 ray_dir, - Intersection **isect, - uint visibility, - int object, - __m256 *triA, - __m256 *triB, - __m256 *triC, - int prim_addr, - int prim_num, - uint *num_hits, - uint max_hits, - int *num_hits_in_instance, - float isect_t) +# define cross256(A, B, C, D) _mm256_fmsub_ps(A, B, _mm256_mul_ps(C, D)) +ccl_device_inline int ray_triangle_intersect8(KernelGlobals *kg, + float3 ray_P, + float3 ray_dir, + Intersection **isect, + uint visibility, + int object, + __m256 *triA, + __m256 *triB, + __m256 *triC, + int prim_addr, + int prim_num, + uint *num_hits, + uint max_hits, + int *num_hits_in_instance, + float isect_t) { - const unsigned char prim_num_mask = (1 << prim_num) - 1; - - const __m256i zero256 = _mm256_setzero_si256(); - - const __m256 Px256 = _mm256_set1_ps(ray_P.x); - const __m256 Py256 = _mm256_set1_ps(ray_P.y); - const __m256 Pz256 = _mm256_set1_ps(ray_P.z); - - const __m256 dirx256 = _mm256_set1_ps(ray_dir.x); - const __m256 diry256 = _mm256_set1_ps(ray_dir.y); - const __m256 dirz256 = _mm256_set1_ps(ray_dir.z); - - /* Calculate vertices relative to ray origin. */ - __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256); - __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256); - __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256); - - __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256); - __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256); - __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256); - - __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256); - __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256); - __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256); - - __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256); - __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256); - __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256); - - __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256); - __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256); - __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256); - - __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256); - __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256); - __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256); - - /* Calculate triangle edges. */ - __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256); - __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256); - __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256); - - __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256); - __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256); - __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256); - - __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256); - __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256); - __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256); - - /* Perform edge tests. */ - /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */ - __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256); - __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256); - __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256); - /* vertical dot */ - __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256); - U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256); - U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256); - - __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256); - __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256); - __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256); - /* vertical dot */ - __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256); - V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256); - V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256); - - __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256); - __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256); - __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256); - /* vertical dot */ - __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256); - W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256); - W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256); - - __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31); - __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31); - __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31); - __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1); - - const __m256i one256 = _mm256_set1_epi32(1); - const __m256i two256 = _mm256_set1_epi32(2); - - __m256i mask_minmaxUVW_256 = _mm256_or_si256( - _mm256_cmpeq_epi32(one256, UVW_256_1), - _mm256_cmpeq_epi32(two256, UVW_256_1)); - - unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256)); - if((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set - return false; - } - - /* Calculate geometry normal and denominator. */ - __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256); - __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256); - __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256); - - Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256); - Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256); - Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256); - - /* vertical dot */ - __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256); - den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256); - den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256); - - /* Perform depth test. */ - __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256); - T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256); - T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256); - - const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000); - __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000); - - __m256 sign_T_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256)); - - unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256); - if(((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) { - return false; - } - - __m256 xor_signmask_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)); - - ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8]; - ccl_align(32) unsigned int mask_minmaxUVW8[8]; - - if(visibility == PATH_RAY_SHADOW_OPAQUE) { - __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256); - __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256); - __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256)); - __m256 rayt_256 = _mm256_set1_ps((*isect)->t); - __m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256), - _mm256_castps_si256( - _mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256) - ) - ); - mask0 = _mm256_or_si256(mask1, mask0); - mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) - mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden) - unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256)); - if((mask_final & prim_num_mask) == 0) { - return false; - } - const int i = __bsf(mask_final); - __m256 inv_den_256 = _mm256_rcp_ps(den_256); - U_256 = _mm256_mul_ps(U_256, inv_den_256); - V_256 = _mm256_mul_ps(V_256, inv_den_256); - T_256 = _mm256_mul_ps(T_256, inv_den_256); - _mm256_store_ps(U8, U_256); - _mm256_store_ps(V8, V_256); - _mm256_store_ps(T8, T_256); - /* NOTE: Here we assume visibility for all triangles in the node is - * the same. */ - (*isect)->u = U8[i]; - (*isect)->v = V8[i]; - (*isect)->t = T8[i]; - (*isect)->prim = (prim_addr + i); - (*isect)->object = object; - (*isect)->type = PRIMITIVE_TRIANGLE; - return true; - } - else { - _mm256_store_ps(den8, den_256); - _mm256_store_ps(U8, U_256); - _mm256_store_ps(V8, V_256); - _mm256_store_ps(T8, T_256); - - _mm256_store_ps(sign_T8, sign_T_256); - _mm256_store_ps(xor_signmask8, xor_signmask_256); - _mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256); - - int ret = false; - - if(visibility == PATH_RAY_SHADOW) { - for(int i = 0; i < prim_num; i++) { - if(mask_minmaxUVW8[i]) { - continue; - } -#ifdef __VISIBILITY_FLAG__ - if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { - continue; - } -#endif - if((sign_T8[i] < 0.0f) || - (sign_T8[i] > (*isect)->t * xor_signmask8[i])) - { - continue; - } - if(!den8[i]) { - continue; - } - const float inv_den = 1.0f / den8[i]; - (*isect)->u = U8[i] * inv_den; - (*isect)->v = V8[i] * inv_den; - (*isect)->t = T8[i] * inv_den; - (*isect)->prim = (prim_addr + i); - (*isect)->object = object; - (*isect)->type = PRIMITIVE_TRIANGLE; - const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim); - int shader = 0; -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE) -#endif - { - shader = kernel_tex_fetch(__tri_shader, prim); - } -#ifdef __HAIR__ - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; - /* If no transparent shadows, all light is blocked. */ - if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { - return 2; - } - /* If maximum number of hits reached, block all light. */ - else if(num_hits == NULL || *num_hits == max_hits) { - return 2; - } - /* Move on to next entry in intersections array. */ - ret = true; - (*isect)++; - (*num_hits)++; - (*num_hits_in_instance)++; - (*isect)->t = isect_t; - } - } - else { - for(int i = 0; i < prim_num; i++) { - if(mask_minmaxUVW8[i]) { - continue; - } -#ifdef __VISIBILITY_FLAG__ - if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { - continue; - } -#endif - if((sign_T8[i] < 0.0f) || - (sign_T8[i] > (*isect)->t * xor_signmask8[i])) - { - continue; - } - if(!den8[i]) { - continue; - } - const float inv_den = 1.0f / den8[i]; - (*isect)->u = U8[i] * inv_den; - (*isect)->v = V8[i] * inv_den; - (*isect)->t = T8[i] * inv_den; - (*isect)->prim = (prim_addr + i); - (*isect)->object = object; - (*isect)->type = PRIMITIVE_TRIANGLE; - ret = true; - } - } - return ret; - } + const unsigned char prim_num_mask = (1 << prim_num) - 1; + + const __m256i zero256 = _mm256_setzero_si256(); + + const __m256 Px256 = _mm256_set1_ps(ray_P.x); + const __m256 Py256 = _mm256_set1_ps(ray_P.y); + const __m256 Pz256 = _mm256_set1_ps(ray_P.z); + + const __m256 dirx256 = _mm256_set1_ps(ray_dir.x); + const __m256 diry256 = _mm256_set1_ps(ray_dir.y); + const __m256 dirz256 = _mm256_set1_ps(ray_dir.z); + + /* Calculate vertices relative to ray origin. */ + __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256); + __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256); + __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256); + + __m256 v1_x_256 = _mm256_sub_ps(triA[0], Px256); + __m256 v1_y_256 = _mm256_sub_ps(triA[1], Py256); + __m256 v1_z_256 = _mm256_sub_ps(triA[2], Pz256); + + __m256 v2_x_256 = _mm256_sub_ps(triB[0], Px256); + __m256 v2_y_256 = _mm256_sub_ps(triB[1], Py256); + __m256 v2_z_256 = _mm256_sub_ps(triB[2], Pz256); + + __m256 v0_v1_x_256 = _mm256_add_ps(v0_x_256, v1_x_256); + __m256 v0_v1_y_256 = _mm256_add_ps(v0_y_256, v1_y_256); + __m256 v0_v1_z_256 = _mm256_add_ps(v0_z_256, v1_z_256); + + __m256 v0_v2_x_256 = _mm256_add_ps(v0_x_256, v2_x_256); + __m256 v0_v2_y_256 = _mm256_add_ps(v0_y_256, v2_y_256); + __m256 v0_v2_z_256 = _mm256_add_ps(v0_z_256, v2_z_256); + + __m256 v1_v2_x_256 = _mm256_add_ps(v1_x_256, v2_x_256); + __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256); + __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256); + + /* Calculate triangle edges. */ + __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256); + __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256); + __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256); + + __m256 e1_x_256 = _mm256_sub_ps(v0_x_256, v1_x_256); + __m256 e1_y_256 = _mm256_sub_ps(v0_y_256, v1_y_256); + __m256 e1_z_256 = _mm256_sub_ps(v0_z_256, v1_z_256); + + __m256 e2_x_256 = _mm256_sub_ps(v1_x_256, v2_x_256); + __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256); + __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256); + + /* Perform edge tests. */ + /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */ + __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256); + __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256); + __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256); + /* vertical dot */ + __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256); + U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256); + U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256); + + __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256); + __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256); + __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256); + /* vertical dot */ + __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256); + V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256); + V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256); + + __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256); + __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256); + __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256); + /* vertical dot */ + __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256); + W_256 = _mm256_fmadd_ps(W_y_256, diry256, W_256); + W_256 = _mm256_fmadd_ps(W_z_256, dirz256, W_256); + + __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31); + __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31); + __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31); + __m256i UVW_256_1 = _mm256_add_epi32(_mm256_add_epi32(U_256_1, V_256_1), W_256_1); + + const __m256i one256 = _mm256_set1_epi32(1); + const __m256i two256 = _mm256_set1_epi32(2); + + __m256i mask_minmaxUVW_256 = _mm256_or_si256(_mm256_cmpeq_epi32(one256, UVW_256_1), + _mm256_cmpeq_epi32(two256, UVW_256_1)); + + unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256)); + if ((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set + return false; + } + + /* Calculate geometry normal and denominator. */ + __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256); + __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256); + __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256); + + Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256); + Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256); + Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256); + + /* vertical dot */ + __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256); + den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256, den_256); + den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256, den_256); + + /* Perform depth test. */ + __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256); + T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256, T_256); + T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256, T_256); + + const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000); + __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000); + + __m256 sign_T_256 = _mm256_castsi256_ps( + _mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256)); + + unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256); + if (((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) { + return false; + } + + __m256 xor_signmask_256 = _mm256_castsi256_ps( + _mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)); + + ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8]; + ccl_align(32) unsigned int mask_minmaxUVW8[8]; + + if (visibility == PATH_RAY_SHADOW_OPAQUE) { + __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256); + __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256); + __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256)); + __m256 rayt_256 = _mm256_set1_ps((*isect)->t); + __m256i mask1 = _mm256_cmpgt_epi32( + _mm256_castps_si256(sign_T_256), + _mm256_castps_si256(_mm256_mul_ps( + _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), + rayt_256))); + mask0 = _mm256_or_si256(mask1, mask0); + mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) + mask_final_256 = _mm256_andnot_si256( + maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden) + unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256)); + if ((mask_final & prim_num_mask) == 0) { + return false; + } + const int i = __bsf(mask_final); + __m256 inv_den_256 = _mm256_rcp_ps(den_256); + U_256 = _mm256_mul_ps(U_256, inv_den_256); + V_256 = _mm256_mul_ps(V_256, inv_den_256); + T_256 = _mm256_mul_ps(T_256, inv_den_256); + _mm256_store_ps(U8, U_256); + _mm256_store_ps(V8, V_256); + _mm256_store_ps(T8, T_256); + /* NOTE: Here we assume visibility for all triangles in the node is + * the same. */ + (*isect)->u = U8[i]; + (*isect)->v = V8[i]; + (*isect)->t = T8[i]; + (*isect)->prim = (prim_addr + i); + (*isect)->object = object; + (*isect)->type = PRIMITIVE_TRIANGLE; + return true; + } + else { + _mm256_store_ps(den8, den_256); + _mm256_store_ps(U8, U_256); + _mm256_store_ps(V8, V_256); + _mm256_store_ps(T8, T_256); + + _mm256_store_ps(sign_T8, sign_T_256); + _mm256_store_ps(xor_signmask8, xor_signmask_256); + _mm256_store_si256((__m256i *)mask_minmaxUVW8, mask_minmaxUVW_256); + + int ret = false; + + if (visibility == PATH_RAY_SHADOW) { + for (int i = 0; i < prim_num; i++) { + if (mask_minmaxUVW8[i]) { + continue; + } +# ifdef __VISIBILITY_FLAG__ + if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { + continue; + } +# endif + if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) { + continue; + } + if (!den8[i]) { + continue; + } + const float inv_den = 1.0f / den8[i]; + (*isect)->u = U8[i] * inv_den; + (*isect)->v = V8[i] * inv_den; + (*isect)->t = T8[i] * inv_den; + (*isect)->prim = (prim_addr + i); + (*isect)->object = object; + (*isect)->type = PRIMITIVE_TRIANGLE; + const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim); + int shader = 0; +# ifdef __HAIR__ + if (kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE) +# endif + { + shader = kernel_tex_fetch(__tri_shader, prim); + } +# ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +# endif + const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; + /* If no transparent shadows, all light is blocked. */ + if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return 2; + } + /* If maximum number of hits reached, block all light. */ + else if (num_hits == NULL || *num_hits == max_hits) { + return 2; + } + /* Move on to next entry in intersections array. */ + ret = true; + (*isect)++; + (*num_hits)++; + (*num_hits_in_instance)++; + (*isect)->t = isect_t; + } + } + else { + for (int i = 0; i < prim_num; i++) { + if (mask_minmaxUVW8[i]) { + continue; + } +# ifdef __VISIBILITY_FLAG__ + if ((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) { + continue; + } +# endif + if ((sign_T8[i] < 0.0f) || (sign_T8[i] > (*isect)->t * xor_signmask8[i])) { + continue; + } + if (!den8[i]) { + continue; + } + const float inv_den = 1.0f / den8[i]; + (*isect)->u = U8[i] * inv_den; + (*isect)->v = V8[i] * inv_den; + (*isect)->t = T8[i] * inv_den; + (*isect)->prim = (prim_addr + i); + (*isect)->object = object; + (*isect)->type = PRIMITIVE_TRIANGLE; + ret = true; + } + } + return ret; + } } -ccl_device_inline int triangle_intersect8( - KernelGlobals *kg, - Intersection **isect, - float3 P, - float3 dir, - uint visibility, - int object, - int prim_addr, - int prim_num, - uint *num_hits, - uint max_hits, - int *num_hits_in_instance, - float isect_t) - { - __m128 tri_a[8], tri_b[8], tri_c[8]; - __m256 tritmp[12], tri[12]; - __m256 triA[3], triB[3], triC[3]; - - int i, r; - - uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); - for(i = 0; i < prim_num; i++) { - tri_a[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++]; - tri_b[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++]; - tri_c[i] = *(__m128*)&kg->__prim_tri_verts.data[tri_vindex++]; - } - //create 9 or 12 placeholders - tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256 - tri[1] = _mm256_castps128_ps256(tri_b[0]);//_mm256_zextps128_ps256 - tri[2] = _mm256_castps128_ps256(tri_c[0]);//_mm256_zextps128_ps256 - - tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256 - tri[4] = _mm256_castps128_ps256(tri_b[1]);//_mm256_zextps128_ps256 - tri[5] = _mm256_castps128_ps256(tri_c[1]);//_mm256_zextps128_ps256 - - tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256 - tri[7] = _mm256_castps128_ps256(tri_b[2]);//_mm256_zextps128_ps256 - tri[8] = _mm256_castps128_ps256(tri_c[2]);//_mm256_zextps128_ps256 - - if(prim_num > 3) { - tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256 - tri[10] = _mm256_castps128_ps256(tri_b[3]);//_mm256_zextps128_ps256 - tri[11] = _mm256_castps128_ps256(tri_c[3]);//_mm256_zextps128_ps256 - } - - for(i = 4, r = 0; i < prim_num; i ++, r += 3) { - tri[r] = _mm256_insertf128_ps(tri[r] , tri_a[i], 1); - tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1); - tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1); - } - - //------------------------------------------------ - //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1 - //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1 - //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1 - - //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1 - //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1 - //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1 - - //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1 - //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1 - //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1 - - //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1 - //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1 - //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1 - - //"transpose" - tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5 - tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1 - - tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7 - tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1 - - tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5 - tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1 - - tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7 - tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1 - - tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5 - tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1 - - tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7 - tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1 - - /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ - triA[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7 - triA[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]), _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7 - triA[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]), _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7 - - triB[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7 - triB[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]), _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7 - triB[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]), _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7 - - triC[0] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7 - triC[1] = _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7 - triC[2] = _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7 - - /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ - - int result = ray_triangle_intersect8(kg, P, - dir, - isect, - visibility, object, - triA, - triB, - triC, - prim_addr, - prim_num, - num_hits, - max_hits, - num_hits_in_instance, - isect_t); - return result; +ccl_device_inline int triangle_intersect8(KernelGlobals *kg, + Intersection **isect, + float3 P, + float3 dir, + uint visibility, + int object, + int prim_addr, + int prim_num, + uint *num_hits, + uint max_hits, + int *num_hits_in_instance, + float isect_t) +{ + __m128 tri_a[8], tri_b[8], tri_c[8]; + __m256 tritmp[12], tri[12]; + __m256 triA[3], triB[3], triC[3]; + + int i, r; + + uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); + for (i = 0; i < prim_num; i++) { + tri_a[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; + tri_b[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; + tri_c[i] = *(__m128 *)&kg->__prim_tri_verts.data[tri_vindex++]; + } + //create 9 or 12 placeholders + tri[0] = _mm256_castps128_ps256(tri_a[0]); //_mm256_zextps128_ps256 + tri[1] = _mm256_castps128_ps256(tri_b[0]); //_mm256_zextps128_ps256 + tri[2] = _mm256_castps128_ps256(tri_c[0]); //_mm256_zextps128_ps256 + + tri[3] = _mm256_castps128_ps256(tri_a[1]); //_mm256_zextps128_ps256 + tri[4] = _mm256_castps128_ps256(tri_b[1]); //_mm256_zextps128_ps256 + tri[5] = _mm256_castps128_ps256(tri_c[1]); //_mm256_zextps128_ps256 + + tri[6] = _mm256_castps128_ps256(tri_a[2]); //_mm256_zextps128_ps256 + tri[7] = _mm256_castps128_ps256(tri_b[2]); //_mm256_zextps128_ps256 + tri[8] = _mm256_castps128_ps256(tri_c[2]); //_mm256_zextps128_ps256 + + if (prim_num > 3) { + tri[9] = _mm256_castps128_ps256(tri_a[3]); //_mm256_zextps128_ps256 + tri[10] = _mm256_castps128_ps256(tri_b[3]); //_mm256_zextps128_ps256 + tri[11] = _mm256_castps128_ps256(tri_c[3]); //_mm256_zextps128_ps256 + } + + for (i = 4, r = 0; i < prim_num; i++, r += 3) { + tri[r] = _mm256_insertf128_ps(tri[r], tri_a[i], 1); + tri[r + 1] = _mm256_insertf128_ps(tri[r + 1], tri_b[i], 1); + tri[r + 2] = _mm256_insertf128_ps(tri[r + 2], tri_c[i], 1); + } + + //------------------------------------------------ + //0! Xa0 Ya0 Za0 1 Xa4 Ya4 Za4 1 + //1! Xb0 Yb0 Zb0 1 Xb4 Yb4 Zb4 1 + //2! Xc0 Yc0 Zc0 1 Xc4 Yc4 Zc4 1 + + //3! Xa1 Ya1 Za1 1 Xa5 Ya5 Za5 1 + //4! Xb1 Yb1 Zb1 1 Xb5 Yb5 Zb5 1 + //5! Xc1 Yc1 Zc1 1 Xc5 Yc5 Zc5 1 + + //6! Xa2 Ya2 Za2 1 Xa6 Ya6 Za6 1 + //7! Xb2 Yb2 Zb2 1 Xb6 Yb6 Zb6 1 + //8! Xc2 Yc2 Zc2 1 Xc6 Yc6 Zc6 1 + + //9! Xa3 Ya3 Za3 1 Xa7 Ya7 Za7 1 + //10! Xb3 Yb3 Zb3 1 Xb7 Yb7 Zb7 1 + //11! Xc3 Yc3 Zc3 1 Xc7 Yc7 Zc7 1 + + //"transpose" + tritmp[0] = _mm256_unpacklo_ps(tri[0], tri[3]); //0! Xa0 Xa1 Ya0 Ya1 Xa4 Xa5 Ya4 Ya5 + tritmp[1] = _mm256_unpackhi_ps(tri[0], tri[3]); //1! Za0 Za1 1 1 Za4 Za5 1 1 + + tritmp[2] = _mm256_unpacklo_ps(tri[6], tri[9]); //2! Xa2 Xa3 Ya2 Ya3 Xa6 Xa7 Ya6 Ya7 + tritmp[3] = _mm256_unpackhi_ps(tri[6], tri[9]); //3! Za2 Za3 1 1 Za6 Za7 1 1 + + tritmp[4] = _mm256_unpacklo_ps(tri[1], tri[4]); //4! Xb0 Xb1 Yb0 Yb1 Xb4 Xb5 Yb4 Yb5 + tritmp[5] = _mm256_unpackhi_ps(tri[1], tri[4]); //5! Zb0 Zb1 1 1 Zb4 Zb5 1 1 + + tritmp[6] = _mm256_unpacklo_ps(tri[7], tri[10]); //6! Xb2 Xb3 Yb2 Yb3 Xb6 Xb7 Yb6 Yb7 + tritmp[7] = _mm256_unpackhi_ps(tri[7], tri[10]); //7! Zb2 Zb3 1 1 Zb6 Zb7 1 1 + + tritmp[8] = _mm256_unpacklo_ps(tri[2], tri[5]); //8! Xc0 Xc1 Yc0 Yc1 Xc4 Xc5 Yc4 Yc5 + tritmp[9] = _mm256_unpackhi_ps(tri[2], tri[5]); //9! Zc0 Zc1 1 1 Zc4 Zc5 1 1 + + tritmp[10] = _mm256_unpacklo_ps(tri[8], tri[11]); //10! Xc2 Xc3 Yc2 Yc3 Xc6 Xc7 Yc6 Yc7 + tritmp[11] = _mm256_unpackhi_ps(tri[8], tri[11]); //11! Zc2 Zc3 1 1 Zc6 Zc7 1 1 + + /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ + triA[0] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[0]), + _mm256_castps_pd(tritmp[2]))); // Xa0 Xa1 Xa2 Xa3 Xa4 Xa5 Xa6 Xa7 + triA[1] = _mm256_castpd_ps( + _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[0]), + _mm256_castps_pd(tritmp[2]))); // Ya0 Ya1 Ya2 Ya3 Ya4 Ya5 Ya6 Ya7 + triA[2] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[1]), + _mm256_castps_pd(tritmp[3]))); // Za0 Za1 Za2 Za3 Za4 Za5 Za6 Za7 + + triB[0] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[4]), + _mm256_castps_pd(tritmp[6]))); // Xb0 Xb1 Xb2 Xb3 Xb4 Xb5 Xb5 Xb7 + triB[1] = _mm256_castpd_ps( + _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[4]), + _mm256_castps_pd(tritmp[6]))); // Yb0 Yb1 Yb2 Yb3 Yb4 Yb5 Yb5 Yb7 + triB[2] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[5]), + _mm256_castps_pd(tritmp[7]))); // Zb0 Zb1 Zb2 Zb3 Zb4 Zb5 Zb5 Zb7 + + triC[0] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[8]), + _mm256_castps_pd(tritmp[10]))); //Xc0 Xc1 Xc2 Xc3 Xc4 Xc5 Xc6 Xc7 + triC[1] = _mm256_castpd_ps( + _mm256_unpackhi_pd(_mm256_castps_pd(tritmp[8]), + _mm256_castps_pd(tritmp[10]))); //Yc0 Yc1 Yc2 Yc3 Yc4 Yc5 Yc6 Yc7 + triC[2] = _mm256_castpd_ps( + _mm256_unpacklo_pd(_mm256_castps_pd(tritmp[9]), + _mm256_castps_pd(tritmp[11]))); //Zc0 Zc1 Zc2 Zc3 Zc4 Zc5 Zc6 Zc7 + + /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ + + int result = ray_triangle_intersect8(kg, + P, + dir, + isect, + visibility, + object, + triA, + triB, + triC, + prim_addr, + prim_num, + num_hits, + max_hits, + num_hits_in_instance, + isect_t); + return result; } -#endif /* __KERNEL_AVX2__ */ +#endif /* __KERNEL_AVX2__ */ /* Special ray intersection routines for subsurface scattering. In that case we * only want to intersect with primitives in the same object, and if case of @@ -479,106 +496,108 @@ ccl_device_inline int triangle_intersect8( */ #ifdef __BVH_LOCAL__ -ccl_device_inline bool triangle_intersect_local( - KernelGlobals *kg, - LocalIntersection *local_isect, - float3 P, - float3 dir, - int object, - int local_object, - int prim_addr, - float tmax, - uint *lcg_state, - int max_hits) +ccl_device_inline bool triangle_intersect_local(KernelGlobals *kg, + LocalIntersection *local_isect, + float3 P, + float3 dir, + int object, + int local_object, + int prim_addr, + float tmax, + uint *lcg_state, + int max_hits) { - /* Only intersect with matching object, for instanced objects we - * already know we are only intersecting the right object. */ - if(object == OBJECT_NONE) { - if(kernel_tex_fetch(__prim_object, prim_addr) != local_object) { - return false; - } - } - - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex]; -#else - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2)); -#endif - float t, u, v; - if(!ray_triangle_intersect(P, - dir, - tmax, -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - ssef_verts, -#else - tri_a, tri_b, tri_c, -#endif - &u, &v, &t)) - { - return false; - } - - /* If no actual hit information is requested, just return here. */ - if(max_hits == 0) { - return true; - } - - int hit; - if(lcg_state) { - /* Record up to max_hits intersections. */ - for(int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { - if(local_isect->hits[i].t == t) { - return false; - } - } - - local_isect->num_hits++; - - if(local_isect->num_hits <= max_hits) { - hit = local_isect->num_hits - 1; - } - else { - /* reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it */ - hit = lcg_step_uint(lcg_state) % local_isect->num_hits; - - if(hit >= max_hits) - return false; - } - } - else { - /* Record closest intersection only. */ - if(local_isect->num_hits && t > local_isect->hits[0].t) { - return false; - } - - hit = 0; - local_isect->num_hits = 1; - } - - /* Record intersection. */ - Intersection *isect = &local_isect->hits[hit]; - isect->prim = prim_addr; - isect->object = object; - isect->type = PRIMITIVE_TRIANGLE; - isect->u = u; - isect->v = v; - isect->t = t; - - /* Record geometric normal. */ -#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)), - tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)), - tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2)); -#endif - local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); + /* Only intersect with matching object, for instanced objects we + * already know we are only intersecting the right object. */ + if (object == OBJECT_NONE) { + if (kernel_tex_fetch(__prim_object, prim_addr) != local_object) { + return false; + } + } + + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + const ssef *ssef_verts = (ssef *)&kg->__prim_tri_verts.data[tri_vindex]; +# else + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); +# endif + float t, u, v; + if (!ray_triangle_intersect(P, + dir, + tmax, +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + ssef_verts, +# else + tri_a, + tri_b, + tri_c, +# endif + &u, + &v, + &t)) { + return false; + } + + /* If no actual hit information is requested, just return here. */ + if (max_hits == 0) { + return true; + } + + int hit; + if (lcg_state) { + /* Record up to max_hits intersections. */ + for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { + if (local_isect->hits[i].t == t) { + return false; + } + } + + local_isect->num_hits++; + + if (local_isect->num_hits <= max_hits) { + hit = local_isect->num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % local_isect->num_hits; + + if (hit >= max_hits) + return false; + } + } + else { + /* Record closest intersection only. */ + if (local_isect->num_hits && t > local_isect->hits[0].t) { + return false; + } + + hit = 0; + local_isect->num_hits = 1; + } + + /* Record intersection. */ + Intersection *isect = &local_isect->hits[hit]; + isect->prim = prim_addr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + + /* Record geometric normal. */ +# if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)), + tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)), + tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); +# endif + local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); - return false; + return false; } -#endif /* __BVH_LOCAL__ */ +#endif /* __BVH_LOCAL__ */ /* Refine triangle intersection to more precise hit point. For rays that travel * far the precision is often not so good, this reintersects the primitive from @@ -596,61 +615,61 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; #ifdef __INTERSECTION_REFINE__ - if(isect->object != OBJECT_NONE) { - if(UNLIKELY(t == 0.0f)) { - return P; - } + if (isect->object != OBJECT_NONE) { + if (UNLIKELY(t == 0.0f)) { + return P; + } # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; # else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); # endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - P = P + D*t; - - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); - float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); - float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); - float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); - float3 qvec = cross(tvec, edge1); - float3 pvec = cross(D, edge2); - float det = dot(edge1, pvec); - if(det != 0.0f) { - /* If determinant is zero it means ray lies in the plane of - * the triangle. It is possible in theory due to watertight - * nature of triangle intersection. For such cases we simply - * don't refine intersection hoping it'll go all fine. - */ - float rt = dot(edge2, qvec) / det; - P = P + D*rt; - } - - if(isect->object != OBJECT_NONE) { + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D * t); + D = normalize_len(D, &t); + } + + P = P + D * t; + + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float det = dot(edge1, pvec); + if (det != 0.0f) { + /* If determinant is zero it means ray lies in the plane of + * the triangle. It is possible in theory due to watertight + * nature of triangle intersection. For such cases we simply + * don't refine intersection hoping it'll go all fine. + */ + float rt = dot(edge2, qvec) / det; + P = P + D * rt; + } + + if (isect->object != OBJECT_NONE) { # ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; # else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); # endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; #else - return P + D*t; + return P + D * t; #endif } @@ -662,61 +681,57 @@ ccl_device_inline float3 triangle_refine_local(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; - if(isect->object != OBJECT_NONE) { + if (isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; #else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_INVERSE_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); #endif - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } - P = P + D*t; + P = P + D * t; #ifdef __INTERSECTION_REFINE__ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); - const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), - tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), - tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); - float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); - float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); - float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); - float3 qvec = cross(tvec, edge1); - float3 pvec = cross(D, edge2); - float det = dot(edge1, pvec); - if(det != 0.0f) { - /* If determinant is zero it means ray lies in the plane of - * the triangle. It is possible in theory due to watertight - * nature of triangle intersection. For such cases we simply - * don't refine intersection hoping it'll go all fine. - */ - float rt = dot(edge2, qvec) / det; - P = P + D*rt; - } -#endif /* __INTERSECTION_REFINE__ */ - - if(isect->object != OBJECT_NONE) { + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); + const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0), + tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1), + tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2); + float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z); + float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z); + float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z); + float3 qvec = cross(tvec, edge1); + float3 pvec = cross(D, edge2); + float det = dot(edge1, pvec); + if (det != 0.0f) { + /* If determinant is zero it means ray lies in the plane of + * the triangle. It is possible in theory due to watertight + * nature of triangle intersection. For such cases we simply + * don't refine intersection hoping it'll go all fine. + */ + float rt = dot(edge2, qvec) / det; + P = P + D * rt; + } +#endif /* __INTERSECTION_REFINE__ */ + + if (isect->object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; #else - Transform tfm = object_fetch_transform(kg, - isect->object, - OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); #endif - P = transform_point(&tfm, P); - } + P = transform_point(&tfm, P); + } - return P; + return P; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 1977d263ece..96cf35a40dc 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -33,41 +33,47 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) { - /* todo: optimize this so it's just a single matrix multiplication when - * possible (not motion blur), or perhaps even just translation + scale */ - const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM); + /* todo: optimize this so it's just a single matrix multiplication when + * possible (not motion blur), or perhaps even just translation + scale */ + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM); - object_inverse_position_transform(kg, sd, &P); + object_inverse_position_transform(kg, sd, &P); - if(desc.offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, desc); - P = transform_point(&tfm, P); - } + if (desc.offset != ATTR_STD_NOT_FOUND) { + Transform tfm = primitive_attribute_matrix(kg, sd, desc); + P = transform_point(&tfm, P); + } - return P; + return P; } -ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) +ccl_device float volume_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) { - float3 P = volume_normalized_position(kg, sd, sd->P); - InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); - return average(float4_to_float3(r)); + float3 P = volume_normalized_position(kg, sd, sd->P); + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : + INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); + return average(float4_to_float3(r)); } -ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) +ccl_device float3 volume_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) { - float3 P = volume_normalized_position(kg, sd, sd->P); - InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; - float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); + float3 P = volume_normalized_position(kg, sd, sd->P); + InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC) ? INTERPOLATION_CUBIC : + INTERPOLATION_NONE; + float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); - if(r.w > 1e-6f && r.w != 1.0f) { - /* For RGBA colors, unpremultiply after interpolation. */ - return float4_to_float3(r) / r.w; - } - else { - return float4_to_float3(r); - } + if (r.w > 1e-6f && r.w != 1.0f) { + /* For RGBA colors, unpremultiply after interpolation. */ + return float4_to_float3(r) / r.w; + } + else { + return float4_to_float3(r); + } } #endif diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 1c8c91d15e6..dfdd8843f29 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -24,8 +24,8 @@ CCL_NAMESPACE_BEGIN -#define KERNEL_NAME_JOIN(x, y, z) x ## _ ## y ## _ ## z -#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name) +#define KERNEL_NAME_JOIN(x, y, z) x##_##y##_##z +#define KERNEL_NAME_EVAL(arch, name) KERNEL_NAME_JOIN(kernel, arch, name) #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name) struct KernelGlobals; @@ -38,10 +38,7 @@ void *kernel_osl_memory(KernelGlobals *kg); bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); -void kernel_tex_copy(KernelGlobals *kg, - const char *name, - void *mem, - size_t size); +void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size); #define KERNEL_ARCH cpu #include "kernel/kernels/cpu/kernel_cpu.h" @@ -63,4 +60,4 @@ void kernel_tex_copy(KernelGlobals *kg, CCL_NAMESPACE_END -#endif /* __KERNEL_H__ */ +#endif /* __KERNEL_H__ */ diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 86ad6e1a061..b9d723222a1 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -21,149 +21,150 @@ CCL_NAMESPACE_BEGIN * BSDF evaluation result, split per BSDF type. This is used to accumulate * render passes separately. */ -ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, - const ShaderData *sd); +ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd); -ccl_device_inline void bsdf_eval_init(BsdfEval *eval, ClosureType type, float3 value, int use_light_pass) +ccl_device_inline void bsdf_eval_init(BsdfEval *eval, + ClosureType type, + float3 value, + int use_light_pass) { #ifdef __PASSES__ - eval->use_light_pass = use_light_pass; - - if(eval->use_light_pass) { - eval->diffuse = make_float3(0.0f, 0.0f, 0.0f); - eval->glossy = make_float3(0.0f, 0.0f, 0.0f); - eval->transmission = make_float3(0.0f, 0.0f, 0.0f); - eval->transparent = make_float3(0.0f, 0.0f, 0.0f); - eval->subsurface = make_float3(0.0f, 0.0f, 0.0f); - eval->scatter = make_float3(0.0f, 0.0f, 0.0f); - - if(type == CLOSURE_BSDF_TRANSPARENT_ID) - eval->transparent = value; - else if(CLOSURE_IS_BSDF_DIFFUSE(type)) - eval->diffuse = value; - else if(CLOSURE_IS_BSDF_GLOSSY(type)) - eval->glossy = value; - else if(CLOSURE_IS_BSDF_TRANSMISSION(type)) - eval->transmission = value; - else if(CLOSURE_IS_BSDF_BSSRDF(type)) - eval->subsurface = value; - else if(CLOSURE_IS_PHASE(type)) - eval->scatter = value; - } - else -#endif - { - eval->diffuse = value; - } + eval->use_light_pass = use_light_pass; + + if (eval->use_light_pass) { + eval->diffuse = make_float3(0.0f, 0.0f, 0.0f); + eval->glossy = make_float3(0.0f, 0.0f, 0.0f); + eval->transmission = make_float3(0.0f, 0.0f, 0.0f); + eval->transparent = make_float3(0.0f, 0.0f, 0.0f); + eval->subsurface = make_float3(0.0f, 0.0f, 0.0f); + eval->scatter = make_float3(0.0f, 0.0f, 0.0f); + + if (type == CLOSURE_BSDF_TRANSPARENT_ID) + eval->transparent = value; + else if (CLOSURE_IS_BSDF_DIFFUSE(type)) + eval->diffuse = value; + else if (CLOSURE_IS_BSDF_GLOSSY(type)) + eval->glossy = value; + else if (CLOSURE_IS_BSDF_TRANSMISSION(type)) + eval->transmission = value; + else if (CLOSURE_IS_BSDF_BSSRDF(type)) + eval->subsurface = value; + else if (CLOSURE_IS_PHASE(type)) + eval->scatter = value; + } + else +#endif + { + eval->diffuse = value; + } #ifdef __SHADOW_TRICKS__ - eval->sum_no_mis = make_float3(0.0f, 0.0f, 0.0f); + eval->sum_no_mis = make_float3(0.0f, 0.0f, 0.0f); #endif } -ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, ClosureType type, float3 value, float mis_weight) +ccl_device_inline void bsdf_eval_accum(BsdfEval *eval, + ClosureType type, + float3 value, + float mis_weight) { #ifdef __SHADOW_TRICKS__ - eval->sum_no_mis += value; + eval->sum_no_mis += value; #endif - value *= mis_weight; + value *= mis_weight; #ifdef __PASSES__ - if(eval->use_light_pass) { - if(CLOSURE_IS_BSDF_DIFFUSE(type)) - eval->diffuse += value; - else if(CLOSURE_IS_BSDF_GLOSSY(type)) - eval->glossy += value; - else if(CLOSURE_IS_BSDF_TRANSMISSION(type)) - eval->transmission += value; - else if(CLOSURE_IS_BSDF_BSSRDF(type)) - eval->subsurface += value; - else if(CLOSURE_IS_PHASE(type)) - eval->scatter += value; - - /* skipping transparent, this function is used by for eval(), will be zero then */ - } - else -#endif - { - eval->diffuse += value; - } + if (eval->use_light_pass) { + if (CLOSURE_IS_BSDF_DIFFUSE(type)) + eval->diffuse += value; + else if (CLOSURE_IS_BSDF_GLOSSY(type)) + eval->glossy += value; + else if (CLOSURE_IS_BSDF_TRANSMISSION(type)) + eval->transmission += value; + else if (CLOSURE_IS_BSDF_BSSRDF(type)) + eval->subsurface += value; + else if (CLOSURE_IS_PHASE(type)) + eval->scatter += value; + + /* skipping transparent, this function is used by for eval(), will be zero then */ + } + else +#endif + { + eval->diffuse += value; + } } ccl_device_inline bool bsdf_eval_is_zero(BsdfEval *eval) { #ifdef __PASSES__ - if(eval->use_light_pass) { - return is_zero(eval->diffuse) - && is_zero(eval->glossy) - && is_zero(eval->transmission) - && is_zero(eval->transparent) - && is_zero(eval->subsurface) - && is_zero(eval->scatter); - } - else -#endif - { - return is_zero(eval->diffuse); - } + if (eval->use_light_pass) { + return is_zero(eval->diffuse) && is_zero(eval->glossy) && is_zero(eval->transmission) && + is_zero(eval->transparent) && is_zero(eval->subsurface) && is_zero(eval->scatter); + } + else +#endif + { + return is_zero(eval->diffuse); + } } ccl_device_inline void bsdf_eval_mis(BsdfEval *eval, float value) { #ifdef __PASSES__ - if(eval->use_light_pass) { - eval->diffuse *= value; - eval->glossy *= value; - eval->transmission *= value; - eval->subsurface *= value; - eval->scatter *= value; - - /* skipping transparent, this function is used by for eval(), will be zero then */ - } - else -#endif - { - eval->diffuse *= value; - } + if (eval->use_light_pass) { + eval->diffuse *= value; + eval->glossy *= value; + eval->transmission *= value; + eval->subsurface *= value; + eval->scatter *= value; + + /* skipping transparent, this function is used by for eval(), will be zero then */ + } + else +#endif + { + eval->diffuse *= value; + } } ccl_device_inline void bsdf_eval_mul(BsdfEval *eval, float value) { #ifdef __SHADOW_TRICKS__ - eval->sum_no_mis *= value; + eval->sum_no_mis *= value; #endif - bsdf_eval_mis(eval, value); + bsdf_eval_mis(eval, value); } ccl_device_inline void bsdf_eval_mul3(BsdfEval *eval, float3 value) { #ifdef __SHADOW_TRICKS__ - eval->sum_no_mis *= value; + eval->sum_no_mis *= value; #endif #ifdef __PASSES__ - if(eval->use_light_pass) { - eval->diffuse *= value; - eval->glossy *= value; - eval->transmission *= value; - eval->subsurface *= value; - eval->scatter *= value; - - /* skipping transparent, this function is used by for eval(), will be zero then */ - } - else - eval->diffuse *= value; + if (eval->use_light_pass) { + eval->diffuse *= value; + eval->glossy *= value; + eval->transmission *= value; + eval->subsurface *= value; + eval->scatter *= value; + + /* skipping transparent, this function is used by for eval(), will be zero then */ + } + else + eval->diffuse *= value; #else - eval->diffuse *= value; + eval->diffuse *= value; #endif } ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval) { #ifdef __PASSES__ - if(eval->use_light_pass) { - return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter; - } - else + if (eval->use_light_pass) { + return eval->diffuse + eval->glossy + eval->transmission + eval->subsurface + eval->scatter; + } + else #endif - return eval->diffuse; + return eval->diffuse; } /* Path Radiance @@ -175,115 +176,113 @@ ccl_device_inline float3 bsdf_eval_sum(const BsdfEval *eval) ccl_device_inline void path_radiance_init(PathRadiance *L, int use_light_pass) { - /* clear all */ + /* clear all */ #ifdef __PASSES__ - L->use_light_pass = use_light_pass; - - if(use_light_pass) { - L->indirect = make_float3(0.0f, 0.0f, 0.0f); - L->direct_emission = make_float3(0.0f, 0.0f, 0.0f); - - L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f); - L->color_glossy = make_float3(0.0f, 0.0f, 0.0f); - L->color_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f); - - L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); - L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); - L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); - - L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); - - L->transparent = 0.0f; - L->emission = make_float3(0.0f, 0.0f, 0.0f); - L->background = make_float3(0.0f, 0.0f, 0.0f); - L->ao = make_float3(0.0f, 0.0f, 0.0f); - L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - L->mist = 0.0f; - - L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f); - L->state.glossy = make_float3(0.0f, 0.0f, 0.0f); - L->state.transmission = make_float3(0.0f, 0.0f, 0.0f); - L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->state.scatter = make_float3(0.0f, 0.0f, 0.0f); - L->state.direct = make_float3(0.0f, 0.0f, 0.0f); - } - else -#endif - { - L->transparent = 0.0f; - L->emission = make_float3(0.0f, 0.0f, 0.0f); - } + L->use_light_pass = use_light_pass; + + if (use_light_pass) { + L->indirect = make_float3(0.0f, 0.0f, 0.0f); + L->direct_emission = make_float3(0.0f, 0.0f, 0.0f); + + L->color_diffuse = make_float3(0.0f, 0.0f, 0.0f); + L->color_glossy = make_float3(0.0f, 0.0f, 0.0f); + L->color_transmission = make_float3(0.0f, 0.0f, 0.0f); + L->color_subsurface = make_float3(0.0f, 0.0f, 0.0f); + + L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); + L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); + L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); + L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); + + L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); + + L->transparent = 0.0f; + L->emission = make_float3(0.0f, 0.0f, 0.0f); + L->background = make_float3(0.0f, 0.0f, 0.0f); + L->ao = make_float3(0.0f, 0.0f, 0.0f); + L->shadow = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + L->mist = 0.0f; + + L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f); + L->state.glossy = make_float3(0.0f, 0.0f, 0.0f); + L->state.transmission = make_float3(0.0f, 0.0f, 0.0f); + L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->state.scatter = make_float3(0.0f, 0.0f, 0.0f); + L->state.direct = make_float3(0.0f, 0.0f, 0.0f); + } + else +#endif + { + L->transparent = 0.0f; + L->emission = make_float3(0.0f, 0.0f, 0.0f); + } #ifdef __SHADOW_TRICKS__ - L->path_total = make_float3(0.0f, 0.0f, 0.0f); - L->path_total_shaded = make_float3(0.0f, 0.0f, 0.0f); - L->shadow_background_color = make_float3(0.0f, 0.0f, 0.0f); - L->shadow_throughput = 0.0f; - L->shadow_transparency = 1.0f; - L->has_shadow_catcher = 0; + L->path_total = make_float3(0.0f, 0.0f, 0.0f); + L->path_total_shaded = make_float3(0.0f, 0.0f, 0.0f); + L->shadow_background_color = make_float3(0.0f, 0.0f, 0.0f); + L->shadow_throughput = 0.0f; + L->shadow_transparency = 1.0f; + L->has_shadow_catcher = 0; #endif #ifdef __DENOISING_FEATURES__ - L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f); - L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f); - L->denoising_depth = 0.0f; + L->denoising_normal = make_float3(0.0f, 0.0f, 0.0f); + L->denoising_albedo = make_float3(0.0f, 0.0f, 0.0f); + L->denoising_depth = 0.0f; #endif #ifdef __KERNEL_DEBUG__ - L->debug_data.num_bvh_traversed_nodes = 0; - L->debug_data.num_bvh_traversed_instances = 0; - L->debug_data.num_bvh_intersections = 0; - L->debug_data.num_ray_bounces = 0; + L->debug_data.num_bvh_traversed_nodes = 0; + L->debug_data.num_bvh_traversed_instances = 0; + L->debug_data.num_bvh_intersections = 0; + L->debug_data.num_ray_bounces = 0; #endif } -ccl_device_inline void path_radiance_bsdf_bounce( - KernelGlobals *kg, - PathRadianceState *L_state, - ccl_addr_space float3 *throughput, - BsdfEval *bsdf_eval, - float bsdf_pdf, int bounce, int bsdf_label) +ccl_device_inline void path_radiance_bsdf_bounce(KernelGlobals *kg, + PathRadianceState *L_state, + ccl_addr_space float3 *throughput, + BsdfEval *bsdf_eval, + float bsdf_pdf, + int bounce, + int bsdf_label) { - float inverse_pdf = 1.0f/bsdf_pdf; + float inverse_pdf = 1.0f / bsdf_pdf; #ifdef __PASSES__ - if(kernel_data.film.use_light_pass) { - if(bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) { - /* first on directly visible surface */ - float3 value = *throughput*inverse_pdf; - - L_state->diffuse = bsdf_eval->diffuse*value; - L_state->glossy = bsdf_eval->glossy*value; - L_state->transmission = bsdf_eval->transmission*value; - L_state->subsurface = bsdf_eval->subsurface*value; - L_state->scatter = bsdf_eval->scatter*value; - - *throughput = L_state->diffuse + - L_state->glossy + - L_state->transmission + - L_state->subsurface + - L_state->scatter; - - L_state->direct = *throughput; - } - else { - /* transparent bounce before first hit, or indirectly visible through BSDF */ - float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf; - *throughput *= sum; - } - } - else -#endif - { - *throughput *= bsdf_eval->diffuse*inverse_pdf; - } + if (kernel_data.film.use_light_pass) { + if (bounce == 0 && !(bsdf_label & LABEL_TRANSPARENT)) { + /* first on directly visible surface */ + float3 value = *throughput * inverse_pdf; + + L_state->diffuse = bsdf_eval->diffuse * value; + L_state->glossy = bsdf_eval->glossy * value; + L_state->transmission = bsdf_eval->transmission * value; + L_state->subsurface = bsdf_eval->subsurface * value; + L_state->scatter = bsdf_eval->scatter * value; + + *throughput = L_state->diffuse + L_state->glossy + L_state->transmission + + L_state->subsurface + L_state->scatter; + + L_state->direct = *throughput; + } + else { + /* transparent bounce before first hit, or indirectly visible through BSDF */ + float3 sum = (bsdf_eval_sum(bsdf_eval) + bsdf_eval->transparent) * inverse_pdf; + *throughput *= sum; + } + } + else +#endif + { + *throughput *= bsdf_eval->diffuse * inverse_pdf; + } } ccl_device_inline void path_radiance_accum_emission(PathRadiance *L, @@ -292,25 +291,25 @@ ccl_device_inline void path_radiance_accum_emission(PathRadiance *L, float3 value) { #ifdef __SHADOW_TRICKS__ - if(state->flag & PATH_RAY_SHADOW_CATCHER) { - return; - } + if (state->flag & PATH_RAY_SHADOW_CATCHER) { + return; + } #endif #ifdef __PASSES__ - if(L->use_light_pass) { - if(state->bounce == 0) - L->emission += throughput*value; - else if(state->bounce == 1) - L->direct_emission += throughput*value; - else - L->indirect += throughput*value; - } - else -#endif - { - L->emission += throughput*value; - } + if (L->use_light_pass) { + if (state->bounce == 0) + L->emission += throughput * value; + else if (state->bounce == 1) + L->direct_emission += throughput * value; + else + L->indirect += throughput * value; + } + else +#endif + { + L->emission += throughput * value; + } } ccl_device_inline void path_radiance_accum_ao(PathRadiance *L, @@ -320,57 +319,56 @@ ccl_device_inline void path_radiance_accum_ao(PathRadiance *L, float3 bsdf, float3 ao) { - /* Store AO pass. */ - if(L->use_light_pass && state->bounce == 0) { - L->ao += alpha*throughput*ao; - } + /* Store AO pass. */ + if (L->use_light_pass && state->bounce == 0) { + L->ao += alpha * throughput * ao; + } #ifdef __SHADOW_TRICKS__ - /* For shadow catcher, accumulate ratio. */ - if(state->flag & PATH_RAY_STORE_SHADOW_INFO) { - float3 light = throughput * bsdf; - L->path_total += light; - L->path_total_shaded += ao * light; + /* For shadow catcher, accumulate ratio. */ + if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { + float3 light = throughput * bsdf; + L->path_total += light; + L->path_total_shaded += ao * light; - if(state->flag & PATH_RAY_SHADOW_CATCHER) { - return; - } - } + if (state->flag & PATH_RAY_SHADOW_CATCHER) { + return; + } + } #endif #ifdef __PASSES__ - if(L->use_light_pass) { - if(state->bounce == 0) { - /* Directly visible lighting. */ - L->direct_diffuse += throughput*bsdf*ao; - } - else { - /* Indirectly visible lighting after BSDF bounce. */ - L->indirect += throughput*bsdf*ao; - } - } - else -#endif - { - L->emission += throughput*bsdf*ao; - } + if (L->use_light_pass) { + if (state->bounce == 0) { + /* Directly visible lighting. */ + L->direct_diffuse += throughput * bsdf * ao; + } + else { + /* Indirectly visible lighting after BSDF bounce. */ + L->indirect += throughput * bsdf * ao; + } + } + else +#endif + { + L->emission += throughput * bsdf * ao; + } } -ccl_device_inline void path_radiance_accum_total_ao( - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - float3 bsdf) +ccl_device_inline void path_radiance_accum_total_ao(PathRadiance *L, + ccl_addr_space PathState *state, + float3 throughput, + float3 bsdf) { #ifdef __SHADOW_TRICKS__ - if(state->flag & PATH_RAY_STORE_SHADOW_INFO) { - L->path_total += throughput * bsdf; - } + if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { + L->path_total += throughput * bsdf; + } #else - (void) L; - (void) state; - (void) throughput; - (void) bsdf; + (void)L; + (void)state; + (void)throughput; + (void)bsdf; #endif } @@ -383,171 +381,166 @@ ccl_device_inline void path_radiance_accum_light(PathRadiance *L, bool is_lamp) { #ifdef __SHADOW_TRICKS__ - if(state->flag & PATH_RAY_STORE_SHADOW_INFO) { - float3 light = throughput * bsdf_eval->sum_no_mis; - L->path_total += light; - L->path_total_shaded += shadow * light; + if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { + float3 light = throughput * bsdf_eval->sum_no_mis; + L->path_total += light; + L->path_total_shaded += shadow * light; - if(state->flag & PATH_RAY_SHADOW_CATCHER) { - return; - } - } + if (state->flag & PATH_RAY_SHADOW_CATCHER) { + return; + } + } #endif #ifdef __PASSES__ - if(L->use_light_pass) { - if(state->bounce == 0) { - /* directly visible lighting */ - L->direct_diffuse += throughput*bsdf_eval->diffuse*shadow; - L->direct_glossy += throughput*bsdf_eval->glossy*shadow; - L->direct_transmission += throughput*bsdf_eval->transmission*shadow; - L->direct_subsurface += throughput*bsdf_eval->subsurface*shadow; - L->direct_scatter += throughput*bsdf_eval->scatter*shadow; - - if(is_lamp) { - L->shadow.x += shadow.x*shadow_fac; - L->shadow.y += shadow.y*shadow_fac; - L->shadow.z += shadow.z*shadow_fac; - } - } - else { - /* indirectly visible lighting after BSDF bounce */ - L->indirect += throughput*bsdf_eval_sum(bsdf_eval)*shadow; - } - } - else -#endif - { - L->emission += throughput*bsdf_eval->diffuse*shadow; - } + if (L->use_light_pass) { + if (state->bounce == 0) { + /* directly visible lighting */ + L->direct_diffuse += throughput * bsdf_eval->diffuse * shadow; + L->direct_glossy += throughput * bsdf_eval->glossy * shadow; + L->direct_transmission += throughput * bsdf_eval->transmission * shadow; + L->direct_subsurface += throughput * bsdf_eval->subsurface * shadow; + L->direct_scatter += throughput * bsdf_eval->scatter * shadow; + + if (is_lamp) { + L->shadow.x += shadow.x * shadow_fac; + L->shadow.y += shadow.y * shadow_fac; + L->shadow.z += shadow.z * shadow_fac; + } + } + else { + /* indirectly visible lighting after BSDF bounce */ + L->indirect += throughput * bsdf_eval_sum(bsdf_eval) * shadow; + } + } + else +#endif + { + L->emission += throughput * bsdf_eval->diffuse * shadow; + } } -ccl_device_inline void path_radiance_accum_total_light( - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - const BsdfEval *bsdf_eval) +ccl_device_inline void path_radiance_accum_total_light(PathRadiance *L, + ccl_addr_space PathState *state, + float3 throughput, + const BsdfEval *bsdf_eval) { #ifdef __SHADOW_TRICKS__ - if(state->flag & PATH_RAY_STORE_SHADOW_INFO) { - L->path_total += throughput * bsdf_eval->sum_no_mis; - } + if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { + L->path_total += throughput * bsdf_eval->sum_no_mis; + } #else - (void) L; - (void) state; - (void) throughput; - (void) bsdf_eval; + (void)L; + (void)state; + (void)throughput; + (void)bsdf_eval; #endif } -ccl_device_inline void path_radiance_accum_background( - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput, - float3 value) +ccl_device_inline void path_radiance_accum_background(PathRadiance *L, + ccl_addr_space PathState *state, + float3 throughput, + float3 value) { #ifdef __SHADOW_TRICKS__ - if(state->flag & PATH_RAY_STORE_SHADOW_INFO) { - L->path_total += throughput * value; - L->path_total_shaded += throughput * value * L->shadow_transparency; + if (state->flag & PATH_RAY_STORE_SHADOW_INFO) { + L->path_total += throughput * value; + L->path_total_shaded += throughput * value * L->shadow_transparency; - if(state->flag & PATH_RAY_SHADOW_CATCHER) { - return; - } - } + if (state->flag & PATH_RAY_SHADOW_CATCHER) { + return; + } + } #endif #ifdef __PASSES__ - if(L->use_light_pass) { - if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) - L->background += throughput*value; - else if(state->bounce == 1) - L->direct_emission += throughput*value; - else - L->indirect += throughput*value; - } - else -#endif - { - L->emission += throughput*value; - } + if (L->use_light_pass) { + if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) + L->background += throughput * value; + else if (state->bounce == 1) + L->direct_emission += throughput * value; + else + L->indirect += throughput * value; + } + else +#endif + { + L->emission += throughput * value; + } #ifdef __DENOISING_FEATURES__ - L->denoising_albedo += state->denoising_feature_weight * value; -#endif /* __DENOISING_FEATURES__ */ + L->denoising_albedo += state->denoising_feature_weight * value; +#endif /* __DENOISING_FEATURES__ */ } -ccl_device_inline void path_radiance_accum_transparent( - PathRadiance *L, - ccl_addr_space PathState *state, - float3 throughput) +ccl_device_inline void path_radiance_accum_transparent(PathRadiance *L, + ccl_addr_space PathState *state, + float3 throughput) { - L->transparent += average(throughput); + L->transparent += average(throughput); } #ifdef __SHADOW_TRICKS__ -ccl_device_inline void path_radiance_accum_shadowcatcher( - PathRadiance *L, - float3 throughput, - float3 background) +ccl_device_inline void path_radiance_accum_shadowcatcher(PathRadiance *L, + float3 throughput, + float3 background) { - L->shadow_throughput += average(throughput); - L->shadow_background_color += throughput * background; - L->has_shadow_catcher = 1; + L->shadow_throughput += average(throughput); + L->shadow_background_color += throughput * background; + L->has_shadow_catcher = 1; } #endif ccl_device_inline void path_radiance_sum_indirect(PathRadiance *L) { #ifdef __PASSES__ - /* this division is a bit ugly, but means we only have to keep track of - * only a single throughput further along the path, here we recover just - * the indirect path that is not influenced by any particular BSDF type */ - if(L->use_light_pass) { - L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct); - L->direct_diffuse += L->state.diffuse*L->direct_emission; - L->direct_glossy += L->state.glossy*L->direct_emission; - L->direct_transmission += L->state.transmission*L->direct_emission; - L->direct_subsurface += L->state.subsurface*L->direct_emission; - L->direct_scatter += L->state.scatter*L->direct_emission; - - L->indirect = safe_divide_color(L->indirect, L->state.direct); - L->indirect_diffuse += L->state.diffuse*L->indirect; - L->indirect_glossy += L->state.glossy*L->indirect; - L->indirect_transmission += L->state.transmission*L->indirect; - L->indirect_subsurface += L->state.subsurface*L->indirect; - L->indirect_scatter += L->state.scatter*L->indirect; - } + /* this division is a bit ugly, but means we only have to keep track of + * only a single throughput further along the path, here we recover just + * the indirect path that is not influenced by any particular BSDF type */ + if (L->use_light_pass) { + L->direct_emission = safe_divide_color(L->direct_emission, L->state.direct); + L->direct_diffuse += L->state.diffuse * L->direct_emission; + L->direct_glossy += L->state.glossy * L->direct_emission; + L->direct_transmission += L->state.transmission * L->direct_emission; + L->direct_subsurface += L->state.subsurface * L->direct_emission; + L->direct_scatter += L->state.scatter * L->direct_emission; + + L->indirect = safe_divide_color(L->indirect, L->state.direct); + L->indirect_diffuse += L->state.diffuse * L->indirect; + L->indirect_glossy += L->state.glossy * L->indirect; + L->indirect_transmission += L->state.transmission * L->indirect; + L->indirect_subsurface += L->state.subsurface * L->indirect; + L->indirect_scatter += L->state.scatter * L->indirect; + } #endif } ccl_device_inline void path_radiance_reset_indirect(PathRadiance *L) { #ifdef __PASSES__ - if(L->use_light_pass) { - L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f); - L->state.glossy = make_float3(0.0f, 0.0f, 0.0f); - L->state.transmission = make_float3(0.0f, 0.0f, 0.0f); - L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->state.scatter = make_float3(0.0f, 0.0f, 0.0f); + if (L->use_light_pass) { + L->state.diffuse = make_float3(0.0f, 0.0f, 0.0f); + L->state.glossy = make_float3(0.0f, 0.0f, 0.0f); + L->state.transmission = make_float3(0.0f, 0.0f, 0.0f); + L->state.subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->state.scatter = make_float3(0.0f, 0.0f, 0.0f); - L->direct_emission = make_float3(0.0f, 0.0f, 0.0f); - L->indirect = make_float3(0.0f, 0.0f, 0.0f); - } + L->direct_emission = make_float3(0.0f, 0.0f, 0.0f); + L->indirect = make_float3(0.0f, 0.0f, 0.0f); + } #endif } -ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L, - const PathRadiance *L_src) +ccl_device_inline void path_radiance_copy_indirect(PathRadiance *L, const PathRadiance *L_src) { #ifdef __PASSES__ - if(L->use_light_pass) { - L->state = L_src->state; + if (L->use_light_pass) { + L->state = L_src->state; - L->direct_emission = L_src->direct_emission; - L->indirect = L_src->indirect; - } + L->direct_emission = L_src->direct_emission; + L->indirect = L_src->indirect; + } #endif } @@ -557,213 +550,219 @@ ccl_device_inline void path_radiance_sum_shadowcatcher(KernelGlobals *kg, float3 *L_sum, float *alpha) { - /* Calculate current shadow of the path. */ - float path_total = average(L->path_total); - float shadow; - - if(UNLIKELY(!isfinite_safe(path_total))) { - kernel_assert(!"Non-finite total radiance along the path"); - shadow = 0.0f; - } - else if(path_total == 0.0f) { - shadow = L->shadow_transparency; - } - else { - float path_total_shaded = average(L->path_total_shaded); - shadow = path_total_shaded / path_total; - } - - /* Calculate final light sum and transparency for shadow catcher object. */ - if(kernel_data.background.transparent) { - *alpha -= L->shadow_throughput * shadow; - } - else { - L->shadow_background_color *= shadow; - *L_sum += L->shadow_background_color; - } + /* Calculate current shadow of the path. */ + float path_total = average(L->path_total); + float shadow; + + if (UNLIKELY(!isfinite_safe(path_total))) { + kernel_assert(!"Non-finite total radiance along the path"); + shadow = 0.0f; + } + else if (path_total == 0.0f) { + shadow = L->shadow_transparency; + } + else { + float path_total_shaded = average(L->path_total_shaded); + shadow = path_total_shaded / path_total; + } + + /* Calculate final light sum and transparency for shadow catcher object. */ + if (kernel_data.background.transparent) { + *alpha -= L->shadow_throughput * shadow; + } + else { + L->shadow_background_color *= shadow; + *L_sum += L->shadow_background_color; + } } #endif -ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadiance *L, float *alpha) +ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, + PathRadiance *L, + float *alpha) { - float3 L_sum; - /* Light Passes are used */ + float3 L_sum; + /* Light Passes are used */ #ifdef __PASSES__ - float3 L_direct, L_indirect; - float clamp_direct = kernel_data.integrator.sample_clamp_direct; - float clamp_indirect = kernel_data.integrator.sample_clamp_indirect; - if(L->use_light_pass) { - path_radiance_sum_indirect(L); - - L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + L->direct_subsurface + L->direct_scatter + L->emission; - L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + L->indirect_subsurface + L->indirect_scatter; - - if(!kernel_data.background.transparent) - L_direct += L->background; - - L_sum = L_direct + L_indirect; - float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z); - - /* Reject invalid value */ - if(!isfinite_safe(sum)) { - kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!"); - L_sum = make_float3(0.0f, 0.0f, 0.0f); - - L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); - L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); - L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); - - L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); - L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); - - L->emission = make_float3(0.0f, 0.0f, 0.0f); - } - - /* Clamp direct and indirect samples */ -#ifdef __CLAMP_SAMPLE__ - else if(sum > clamp_direct || sum > clamp_indirect) { - float scale; - - /* Direct */ - float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z); - if(sum_direct > clamp_direct) { - scale = clamp_direct/sum_direct; - L_direct *= scale; - - L->direct_diffuse *= scale; - L->direct_glossy *= scale; - L->direct_transmission *= scale; - L->direct_subsurface *= scale; - L->direct_scatter *= scale; - L->emission *= scale; - L->background *= scale; - } - - /* Indirect */ - float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z); - if(sum_indirect > clamp_indirect) { - scale = clamp_indirect/sum_indirect; - L_indirect *= scale; - - L->indirect_diffuse *= scale; - L->indirect_glossy *= scale; - L->indirect_transmission *= scale; - L->indirect_subsurface *= scale; - L->indirect_scatter *= scale; - } - - /* Sum again, after clamping */ - L_sum = L_direct + L_indirect; - } -#endif - } - - /* No Light Passes */ - else -#endif - { - L_sum = L->emission; - - /* Reject invalid value */ - float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z); - if(!isfinite_safe(sum)) { - kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!"); - L_sum = make_float3(0.0f, 0.0f, 0.0f); - } - } - - /* Compute alpha. */ - *alpha = 1.0f - L->transparent; - - /* Add shadow catcher contributions. */ + float3 L_direct, L_indirect; + float clamp_direct = kernel_data.integrator.sample_clamp_direct; + float clamp_indirect = kernel_data.integrator.sample_clamp_indirect; + if (L->use_light_pass) { + path_radiance_sum_indirect(L); + + L_direct = L->direct_diffuse + L->direct_glossy + L->direct_transmission + + L->direct_subsurface + L->direct_scatter + L->emission; + L_indirect = L->indirect_diffuse + L->indirect_glossy + L->indirect_transmission + + L->indirect_subsurface + L->indirect_scatter; + + if (!kernel_data.background.transparent) + L_direct += L->background; + + L_sum = L_direct + L_indirect; + float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z); + + /* Reject invalid value */ + if (!isfinite_safe(sum)) { + kernel_assert(!"Non-finite sum in path_radiance_clamp_and_sum!"); + L_sum = make_float3(0.0f, 0.0f, 0.0f); + + L->direct_diffuse = make_float3(0.0f, 0.0f, 0.0f); + L->direct_glossy = make_float3(0.0f, 0.0f, 0.0f); + L->direct_transmission = make_float3(0.0f, 0.0f, 0.0f); + L->direct_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->direct_scatter = make_float3(0.0f, 0.0f, 0.0f); + + L->indirect_diffuse = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_glossy = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_transmission = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_subsurface = make_float3(0.0f, 0.0f, 0.0f); + L->indirect_scatter = make_float3(0.0f, 0.0f, 0.0f); + + L->emission = make_float3(0.0f, 0.0f, 0.0f); + } + + /* Clamp direct and indirect samples */ +# ifdef __CLAMP_SAMPLE__ + else if (sum > clamp_direct || sum > clamp_indirect) { + float scale; + + /* Direct */ + float sum_direct = fabsf(L_direct.x) + fabsf(L_direct.y) + fabsf(L_direct.z); + if (sum_direct > clamp_direct) { + scale = clamp_direct / sum_direct; + L_direct *= scale; + + L->direct_diffuse *= scale; + L->direct_glossy *= scale; + L->direct_transmission *= scale; + L->direct_subsurface *= scale; + L->direct_scatter *= scale; + L->emission *= scale; + L->background *= scale; + } + + /* Indirect */ + float sum_indirect = fabsf(L_indirect.x) + fabsf(L_indirect.y) + fabsf(L_indirect.z); + if (sum_indirect > clamp_indirect) { + scale = clamp_indirect / sum_indirect; + L_indirect *= scale; + + L->indirect_diffuse *= scale; + L->indirect_glossy *= scale; + L->indirect_transmission *= scale; + L->indirect_subsurface *= scale; + L->indirect_scatter *= scale; + } + + /* Sum again, after clamping */ + L_sum = L_direct + L_indirect; + } +# endif + } + + /* No Light Passes */ + else +#endif + { + L_sum = L->emission; + + /* Reject invalid value */ + float sum = fabsf((L_sum).x) + fabsf((L_sum).y) + fabsf((L_sum).z); + if (!isfinite_safe(sum)) { + kernel_assert(!"Non-finite final sum in path_radiance_clamp_and_sum!"); + L_sum = make_float3(0.0f, 0.0f, 0.0f); + } + } + + /* Compute alpha. */ + *alpha = 1.0f - L->transparent; + + /* Add shadow catcher contributions. */ #ifdef __SHADOW_TRICKS__ - if(L->has_shadow_catcher) { - path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha); - } -#endif /* __SHADOW_TRICKS__ */ + if (L->has_shadow_catcher) { + path_radiance_sum_shadowcatcher(kg, L, &L_sum, alpha); + } +#endif /* __SHADOW_TRICKS__ */ - return L_sum; + return L_sum; } -ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg, PathRadiance *L, float3 *noisy, float3 *clean) +ccl_device_inline void path_radiance_split_denoising(KernelGlobals *kg, + PathRadiance *L, + float3 *noisy, + float3 *clean) { #ifdef __PASSES__ - kernel_assert(L->use_light_pass); - - *clean = L->emission + L->background; - *noisy = L->direct_scatter + L->indirect_scatter; - -# define ADD_COMPONENT(flag, component) \ - if(kernel_data.film.denoising_flags & flag) \ - *clean += component; \ - else \ - *noisy += component; - - ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse); - ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse); - ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy); - ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy); - ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission); - ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission); - ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface); - ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface); + kernel_assert(L->use_light_pass); + + *clean = L->emission + L->background; + *noisy = L->direct_scatter + L->indirect_scatter; + +# define ADD_COMPONENT(flag, component) \ + if (kernel_data.film.denoising_flags & flag) \ + *clean += component; \ + else \ + *noisy += component; + + ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_DIR, L->direct_diffuse); + ADD_COMPONENT(DENOISING_CLEAN_DIFFUSE_IND, L->indirect_diffuse); + ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_DIR, L->direct_glossy); + ADD_COMPONENT(DENOISING_CLEAN_GLOSSY_IND, L->indirect_glossy); + ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_DIR, L->direct_transmission); + ADD_COMPONENT(DENOISING_CLEAN_TRANSMISSION_IND, L->indirect_transmission); + ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_DIR, L->direct_subsurface); + ADD_COMPONENT(DENOISING_CLEAN_SUBSURFACE_IND, L->indirect_subsurface); # undef ADD_COMPONENT #else - *noisy = L->emission; - *clean = make_float3(0.0f, 0.0f, 0.0f); + *noisy = L->emission; + *clean = make_float3(0.0f, 0.0f, 0.0f); #endif #ifdef __SHADOW_TRICKS__ - if(L->has_shadow_catcher) { - *noisy += L->shadow_background_color; - } + if (L->has_shadow_catcher) { + *noisy += L->shadow_background_color; + } #endif - *noisy = ensure_finite3(*noisy); - *clean = ensure_finite3(*clean); + *noisy = ensure_finite3(*noisy); + *clean = ensure_finite3(*clean); } ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample) { #ifdef __SPLIT_KERNEL__ # define safe_float3_add(f, v) \ - do { \ - ccl_global float *p = (ccl_global float*)(&(f)); \ - atomic_add_and_fetch_float(p+0, (v).x); \ - atomic_add_and_fetch_float(p+1, (v).y); \ - atomic_add_and_fetch_float(p+2, (v).z); \ - } while(0) -# define safe_float_add(f, v) \ - atomic_add_and_fetch_float(&(f), (v)) + do { \ + ccl_global float *p = (ccl_global float *)(&(f)); \ + atomic_add_and_fetch_float(p + 0, (v).x); \ + atomic_add_and_fetch_float(p + 1, (v).y); \ + atomic_add_and_fetch_float(p + 2, (v).z); \ + } while (0) +# define safe_float_add(f, v) atomic_add_and_fetch_float(&(f), (v)) #else # define safe_float3_add(f, v) (f) += (v) # define safe_float_add(f, v) (f) += (v) -#endif /* __SPLIT_KERNEL__ */ +#endif /* __SPLIT_KERNEL__ */ #ifdef __PASSES__ - safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse); - safe_float3_add(L->direct_glossy, L_sample->direct_glossy); - safe_float3_add(L->direct_transmission, L_sample->direct_transmission); - safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface); - safe_float3_add(L->direct_scatter, L_sample->direct_scatter); - - safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse); - safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy); - safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission); - safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface); - safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter); - - safe_float3_add(L->background, L_sample->background); - safe_float3_add(L->ao, L_sample->ao); - safe_float3_add(L->shadow, L_sample->shadow); - safe_float_add(L->mist, L_sample->mist); -#endif /* __PASSES__ */ - safe_float3_add(L->emission, L_sample->emission); + safe_float3_add(L->direct_diffuse, L_sample->direct_diffuse); + safe_float3_add(L->direct_glossy, L_sample->direct_glossy); + safe_float3_add(L->direct_transmission, L_sample->direct_transmission); + safe_float3_add(L->direct_subsurface, L_sample->direct_subsurface); + safe_float3_add(L->direct_scatter, L_sample->direct_scatter); + + safe_float3_add(L->indirect_diffuse, L_sample->indirect_diffuse); + safe_float3_add(L->indirect_glossy, L_sample->indirect_glossy); + safe_float3_add(L->indirect_transmission, L_sample->indirect_transmission); + safe_float3_add(L->indirect_subsurface, L_sample->indirect_subsurface); + safe_float3_add(L->indirect_scatter, L_sample->indirect_scatter); + + safe_float3_add(L->background, L_sample->background); + safe_float3_add(L->ao, L_sample->ao); + safe_float3_add(L->shadow, L_sample->shadow); + safe_float_add(L->mist, L_sample->mist); +#endif /* __PASSES__ */ + safe_float3_add(L->emission, L_sample->emission); #undef safe_float_add #undef safe_float3_add diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 37c163f2538..10b71bc6bdf 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -18,191 +18,172 @@ CCL_NAMESPACE_BEGIN #ifdef __BAKING__ -ccl_device_inline void compute_light_pass(KernelGlobals *kg, - ShaderData *sd, - PathRadiance *L, - uint rng_hash, - int pass_filter, - int sample) +ccl_device_inline void compute_light_pass( + KernelGlobals *kg, ShaderData *sd, PathRadiance *L, uint rng_hash, int pass_filter, int sample) { - kernel_assert(kernel_data.film.use_light_pass); - - PathRadiance L_sample; - PathState state; - Ray ray; - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - - /* emission and indirect shader data memory used by various functions */ - ShaderData emission_sd, indirect_sd; - - ray.P = sd->P + sd->Ng; - ray.D = -sd->Ng; - ray.t = FLT_MAX; -#ifdef __CAMERA_MOTION__ - ray.time = 0.5f; -#endif - - /* init radiance */ - path_radiance_init(&L_sample, kernel_data.film.use_light_pass); - - /* init path state */ - path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL); - - /* evaluate surface shader */ - shader_eval_surface(kg, sd, &state, state.flag); - - /* TODO, disable more closures we don't need besides transparent */ - shader_bsdf_disable_transparency(kg, sd); - -#ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched) { - /* regular path tracer */ -#endif - - /* sample ambient occlusion */ - if(pass_filter & BAKE_FILTER_AO) { - kernel_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd)); - } - - /* sample emission */ - if((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { - float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(&L_sample, &state, throughput, emission); - } - - bool is_sss_sample = false; - -#ifdef __SUBSURFACE__ - /* sample subsurface scattering */ - if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { - /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ - SubsurfaceIndirectRays ss_indirect; - kernel_path_subsurface_init_indirect(&ss_indirect); - if(kernel_path_subsurface_scatter(kg, - sd, - &emission_sd, - &L_sample, - &state, - &ray, - &throughput, - &ss_indirect)) - { - while(ss_indirect.num_rays) { - kernel_path_subsurface_setup_indirect(kg, - &ss_indirect, - &state, - &ray, - &L_sample, - &throughput); - kernel_path_indirect(kg, - &indirect_sd, - &emission_sd, - &ray, - throughput, - &state, - &L_sample); - } - is_sss_sample = true; - } - } -#endif - - /* sample light and BSDF */ - if(!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) { - kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample); - - if(kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) { -#ifdef __LAMP_MIS__ - state.ray_t = 0.0f; -#endif - /* compute indirect light */ - kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample); - - /* sum and reset indirect light pass variables for the next samples */ - path_radiance_sum_indirect(&L_sample); - path_radiance_reset_indirect(&L_sample); - } - } -#ifdef __BRANCHED_PATH__ - } - else { - /* branched path tracer */ - - /* sample ambient occlusion */ - if(pass_filter & BAKE_FILTER_AO) { - kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput); - } - - /* sample emission */ - if((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { - float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); - path_radiance_accum_emission(&L_sample, &state, throughput, emission); - } - -#ifdef __SUBSURFACE__ - /* sample subsurface scattering */ - if((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { - /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ - kernel_branched_path_subsurface_scatter(kg, sd, &indirect_sd, - &emission_sd, &L_sample, &state, &ray, throughput); - } -#endif - - /* sample light and BSDF */ - if(pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT)) { -#if defined(__EMISSION__) - /* direct light */ - if(kernel_data.integrator.use_direct_light) { - int all = kernel_data.integrator.sample_all_lights_direct; - kernel_branched_path_surface_connect_light(kg, - sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all); - } -#endif - - /* indirect light */ - kernel_branched_path_surface_indirect_light(kg, - sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample); - } - } -#endif - - /* accumulate into master L */ - path_radiance_accum_sample(L, &L_sample); + kernel_assert(kernel_data.film.use_light_pass); + + PathRadiance L_sample; + PathState state; + Ray ray; + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + + /* emission and indirect shader data memory used by various functions */ + ShaderData emission_sd, indirect_sd; + + ray.P = sd->P + sd->Ng; + ray.D = -sd->Ng; + ray.t = FLT_MAX; +# ifdef __CAMERA_MOTION__ + ray.time = 0.5f; +# endif + + /* init radiance */ + path_radiance_init(&L_sample, kernel_data.film.use_light_pass); + + /* init path state */ + path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL); + + /* evaluate surface shader */ + shader_eval_surface(kg, sd, &state, state.flag); + + /* TODO, disable more closures we don't need besides transparent */ + shader_bsdf_disable_transparency(kg, sd); + +# ifdef __BRANCHED_PATH__ + if (!kernel_data.integrator.branched) { + /* regular path tracer */ +# endif + + /* sample ambient occlusion */ + if (pass_filter & BAKE_FILTER_AO) { + kernel_path_ao( + kg, sd, &emission_sd, &L_sample, &state, throughput, shader_bsdf_alpha(kg, sd)); + } + + /* sample emission */ + if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { + float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); + path_radiance_accum_emission(&L_sample, &state, throughput, emission); + } + + bool is_sss_sample = false; + +# ifdef __SUBSURFACE__ + /* sample subsurface scattering */ + if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { + /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ + SubsurfaceIndirectRays ss_indirect; + kernel_path_subsurface_init_indirect(&ss_indirect); + if (kernel_path_subsurface_scatter( + kg, sd, &emission_sd, &L_sample, &state, &ray, &throughput, &ss_indirect)) { + while (ss_indirect.num_rays) { + kernel_path_subsurface_setup_indirect( + kg, &ss_indirect, &state, &ray, &L_sample, &throughput); + kernel_path_indirect( + kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample); + } + is_sss_sample = true; + } + } +# endif + + /* sample light and BSDF */ + if (!is_sss_sample && (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT))) { + kernel_path_surface_connect_light(kg, sd, &emission_sd, throughput, &state, &L_sample); + + if (kernel_path_surface_bounce(kg, sd, &throughput, &state, &L_sample.state, &ray)) { +# ifdef __LAMP_MIS__ + state.ray_t = 0.0f; +# endif + /* compute indirect light */ + kernel_path_indirect(kg, &indirect_sd, &emission_sd, &ray, throughput, &state, &L_sample); + + /* sum and reset indirect light pass variables for the next samples */ + path_radiance_sum_indirect(&L_sample); + path_radiance_reset_indirect(&L_sample); + } + } +# ifdef __BRANCHED_PATH__ + } + else { + /* branched path tracer */ + + /* sample ambient occlusion */ + if (pass_filter & BAKE_FILTER_AO) { + kernel_branched_path_ao(kg, sd, &emission_sd, &L_sample, &state, throughput); + } + + /* sample emission */ + if ((pass_filter & BAKE_FILTER_EMISSION) && (sd->flag & SD_EMISSION)) { + float3 emission = indirect_primitive_emission(kg, sd, 0.0f, state.flag, state.ray_pdf); + path_radiance_accum_emission(&L_sample, &state, throughput, emission); + } + +# ifdef __SUBSURFACE__ + /* sample subsurface scattering */ + if ((pass_filter & BAKE_FILTER_SUBSURFACE) && (sd->flag & SD_BSSRDF)) { + /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ + kernel_branched_path_subsurface_scatter( + kg, sd, &indirect_sd, &emission_sd, &L_sample, &state, &ray, throughput); + } +# endif + + /* sample light and BSDF */ + if (pass_filter & (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT)) { +# if defined(__EMISSION__) + /* direct light */ + if (kernel_data.integrator.use_direct_light) { + int all = kernel_data.integrator.sample_all_lights_direct; + kernel_branched_path_surface_connect_light( + kg, sd, &emission_sd, &state, throughput, 1.0f, &L_sample, all); + } +# endif + + /* indirect light */ + kernel_branched_path_surface_indirect_light( + kg, sd, &indirect_sd, &emission_sd, throughput, 1.0f, &state, &L_sample); + } + } +# endif + + /* accumulate into master L */ + path_radiance_accum_sample(L, &L_sample); } /* this helps with AA but it's not the real solution as it does not AA the geometry * but it's better than nothing, thus committed */ ccl_device_inline float bake_clamp_mirror_repeat(float u, float max) { - /* use mirror repeat (like opengl texture) so that if the barycentric - * coordinate goes past the end of the triangle it is not always clamped - * to the same value, gives ugly patterns */ - u /= max; - float fu = floorf(u); - u = u - fu; - - return ((((int)fu) & 1)? 1.0f - u: u) * max; + /* use mirror repeat (like opengl texture) so that if the barycentric + * coordinate goes past the end of the triangle it is not always clamped + * to the same value, gives ugly patterns */ + u /= max; + float fu = floorf(u); + u = u - fu; + + return ((((int)fu) & 1) ? 1.0f - u : u) * max; } ccl_device_inline float3 kernel_bake_shader_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderEvalType type) { - switch(type) { - case SHADER_EVAL_DIFFUSE: - return shader_bsdf_diffuse(kg, sd); - case SHADER_EVAL_GLOSSY: - return shader_bsdf_glossy(kg, sd); - case SHADER_EVAL_TRANSMISSION: - return shader_bsdf_transmission(kg, sd); -#ifdef __SUBSURFACE__ - case SHADER_EVAL_SUBSURFACE: - return shader_bsdf_subsurface(kg, sd); -#endif - default: - kernel_assert(!"Unknown bake type passed to BSDF evaluate"); - return make_float3(0.0f, 0.0f, 0.0f); - } + switch (type) { + case SHADER_EVAL_DIFFUSE: + return shader_bsdf_diffuse(kg, sd); + case SHADER_EVAL_GLOSSY: + return shader_bsdf_glossy(kg, sd); + case SHADER_EVAL_TRANSMISSION: + return shader_bsdf_transmission(kg, sd); +# ifdef __SUBSURFACE__ + case SHADER_EVAL_SUBSURFACE: + return shader_bsdf_subsurface(kg, sd); +# endif + default: + kernel_assert(!"Unknown bake type passed to BSDF evaluate"); + return make_float3(0.0f, 0.0f, 0.0f); + } } ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg, @@ -213,316 +194,301 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg, const ShaderEvalType type, const int pass_filter) { - float3 color; - const bool is_color = (pass_filter & BAKE_FILTER_COLOR) != 0; - const bool is_direct = (pass_filter & BAKE_FILTER_DIRECT) != 0; - const bool is_indirect = (pass_filter & BAKE_FILTER_INDIRECT) != 0; - float3 out = make_float3(0.0f, 0.0f, 0.0f); - - if(is_color) { - if(is_direct || is_indirect) { - /* Leave direct and diffuse channel colored. */ - color = make_float3(1.0f, 1.0f, 1.0f); - } - else { - /* surface color of the pass only */ - shader_eval_surface(kg, sd, state, 0); - return kernel_bake_shader_bsdf(kg, sd, type); - } - } - else { - shader_eval_surface(kg, sd, state, 0); - color = kernel_bake_shader_bsdf(kg, sd, type); - } - - if(is_direct) { - out += safe_divide_even_color(direct, color); - } - - if(is_indirect) { - out += safe_divide_even_color(indirect, color); - } - - return out; + float3 color; + const bool is_color = (pass_filter & BAKE_FILTER_COLOR) != 0; + const bool is_direct = (pass_filter & BAKE_FILTER_DIRECT) != 0; + const bool is_indirect = (pass_filter & BAKE_FILTER_INDIRECT) != 0; + float3 out = make_float3(0.0f, 0.0f, 0.0f); + + if (is_color) { + if (is_direct || is_indirect) { + /* Leave direct and diffuse channel colored. */ + color = make_float3(1.0f, 1.0f, 1.0f); + } + else { + /* surface color of the pass only */ + shader_eval_surface(kg, sd, state, 0); + return kernel_bake_shader_bsdf(kg, sd, type); + } + } + else { + shader_eval_surface(kg, sd, state, 0); + color = kernel_bake_shader_bsdf(kg, sd, type); + } + + if (is_direct) { + out += safe_divide_even_color(direct, color); + } + + if (is_indirect) { + out += safe_divide_even_color(indirect, color); + } + + return out; } -ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, - ShaderEvalType type, int pass_filter, int i, int offset, int sample) +ccl_device void kernel_bake_evaluate(KernelGlobals *kg, + ccl_global uint4 *input, + ccl_global float4 *output, + ShaderEvalType type, + int pass_filter, + int i, + int offset, + int sample) { - ShaderData sd; - PathState state = {0}; - uint4 in = input[i * 2]; - uint4 diff = input[i * 2 + 1]; - - float3 out = make_float3(0.0f, 0.0f, 0.0f); - - int object = in.x; - int prim = in.y; - - if(prim == -1) - return; - - float u = __uint_as_float(in.z); - float v = __uint_as_float(in.w); - - float dudx = __uint_as_float(diff.x); - float dudy = __uint_as_float(diff.y); - float dvdx = __uint_as_float(diff.z); - float dvdy = __uint_as_float(diff.w); - - int num_samples = kernel_data.integrator.aa_samples; - - /* random number generator */ - uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed); - - float filter_x, filter_y; - if(sample == 0) { - filter_x = filter_y = 0.5f; - } - else { - path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y); - } - - /* subpixel u/v offset */ - if(sample > 0) { - u = bake_clamp_mirror_repeat(u + dudx*(filter_x - 0.5f) + dudy*(filter_y - 0.5f), 1.0f); - v = bake_clamp_mirror_repeat(v + dvdx*(filter_x - 0.5f) + dvdy*(filter_y - 0.5f), 1.0f - u); - } - - /* triangle */ - int shader; - float3 P, Ng; - - triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); - - /* light passes */ - PathRadiance L; - path_radiance_init(&L, kernel_data.film.use_light_pass); - - shader_setup_from_sample(kg, &sd, - P, Ng, Ng, - shader, object, prim, - u, v, 1.0f, 0.5f, - !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), - LAMP_NONE); - sd.I = sd.N; - - /* update differentials */ - sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx; - sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy; - sd.du.dx = dudx; - sd.du.dy = dudy; - sd.dv.dx = dvdx; - sd.dv.dy = dvdy; - - /* set RNG state for shaders that use sampling */ - state.rng_hash = rng_hash; - state.rng_offset = 0; - state.sample = sample; - state.num_samples = num_samples; - state.min_ray_pdf = FLT_MAX; - - /* light passes if we need more than color */ - if(pass_filter & ~BAKE_FILTER_COLOR) - compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample); - - switch(type) { - /* data passes */ - case SHADER_EVAL_NORMAL: - case SHADER_EVAL_ROUGHNESS: - case SHADER_EVAL_EMISSION: - { - if(type != SHADER_EVAL_NORMAL || (sd.flag & SD_HAS_BUMP)) { - int path_flag = (type == SHADER_EVAL_EMISSION) ? PATH_RAY_EMISSION : 0; - shader_eval_surface(kg, &sd, &state, path_flag); - } - - if(type == SHADER_EVAL_NORMAL) { - float3 N = sd.N; - if(sd.flag & SD_HAS_BUMP) { - N = shader_bsdf_average_normal(kg, &sd); - } - - /* encoding: normal = (2 * color) - 1 */ - out = N * 0.5f + make_float3(0.5f, 0.5f, 0.5f); - } - else if(type == SHADER_EVAL_ROUGHNESS) { - float roughness = shader_bsdf_average_roughness(&sd); - out = make_float3(roughness, roughness, roughness); - } - else { - out = shader_emissive_eval(&sd); - } - break; - } - case SHADER_EVAL_UV: - { - out = primitive_uv(kg, &sd); - break; - } -#ifdef __PASSES__ - /* light passes */ - case SHADER_EVAL_AO: - { - out = L.ao; - break; - } - case SHADER_EVAL_COMBINED: - { - if((pass_filter & BAKE_FILTER_COMBINED) == BAKE_FILTER_COMBINED) { - float alpha; - out = path_radiance_clamp_and_sum(kg, &L, &alpha); - break; - } - - if((pass_filter & BAKE_FILTER_DIFFUSE_DIRECT) == BAKE_FILTER_DIFFUSE_DIRECT) - out += L.direct_diffuse; - if((pass_filter & BAKE_FILTER_DIFFUSE_INDIRECT) == BAKE_FILTER_DIFFUSE_INDIRECT) - out += L.indirect_diffuse; - - if((pass_filter & BAKE_FILTER_GLOSSY_DIRECT) == BAKE_FILTER_GLOSSY_DIRECT) - out += L.direct_glossy; - if((pass_filter & BAKE_FILTER_GLOSSY_INDIRECT) == BAKE_FILTER_GLOSSY_INDIRECT) - out += L.indirect_glossy; - - if((pass_filter & BAKE_FILTER_TRANSMISSION_DIRECT) == BAKE_FILTER_TRANSMISSION_DIRECT) - out += L.direct_transmission; - if((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT) - out += L.indirect_transmission; - - if((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT) - out += L.direct_subsurface; - if((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT) - out += L.indirect_subsurface; - - if((pass_filter & BAKE_FILTER_EMISSION) != 0) - out += L.emission; - - break; - } - case SHADER_EVAL_SHADOW: - { - out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z); - break; - } - case SHADER_EVAL_DIFFUSE: - { - out = kernel_bake_evaluate_direct_indirect(kg, - &sd, - &state, - L.direct_diffuse, - L.indirect_diffuse, - type, - pass_filter); - break; - } - case SHADER_EVAL_GLOSSY: - { - out = kernel_bake_evaluate_direct_indirect(kg, - &sd, - &state, - L.direct_glossy, - L.indirect_glossy, - type, - pass_filter); - break; - } - case SHADER_EVAL_TRANSMISSION: - { - out = kernel_bake_evaluate_direct_indirect(kg, - &sd, - &state, - L.direct_transmission, - L.indirect_transmission, - type, - pass_filter); - break; - } - case SHADER_EVAL_SUBSURFACE: - { -#ifdef __SUBSURFACE__ - out = kernel_bake_evaluate_direct_indirect(kg, - &sd, - &state, - L.direct_subsurface, - L.indirect_subsurface, - type, - pass_filter); -#endif - break; - } -#endif - - /* extra */ - case SHADER_EVAL_ENVIRONMENT: - { - /* setup ray */ - Ray ray; - - ray.P = make_float3(0.0f, 0.0f, 0.0f); - ray.D = normalize(P); - ray.t = 0.0f; -#ifdef __CAMERA_MOTION__ - ray.time = 0.5f; -#endif - -#ifdef __RAY_DIFFERENTIALS__ - ray.dD = differential3_zero(); - ray.dP = differential3_zero(); -#endif - - /* setup shader data */ - shader_setup_from_background(kg, &sd, &ray); - - /* evaluate */ - int path_flag = 0; /* we can't know which type of BSDF this is for */ - shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION); - out = shader_background_eval(&sd); - break; - } - default: - { - /* no real shader, returning the position of the verts for debugging */ - out = normalize(P); - break; - } - } - - /* write output */ - const float output_fac = 1.0f/num_samples; - const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac; - - output[i] = (sample == 0)? scaled_result: output[i] + scaled_result; + ShaderData sd; + PathState state = {0}; + uint4 in = input[i * 2]; + uint4 diff = input[i * 2 + 1]; + + float3 out = make_float3(0.0f, 0.0f, 0.0f); + + int object = in.x; + int prim = in.y; + + if (prim == -1) + return; + + float u = __uint_as_float(in.z); + float v = __uint_as_float(in.w); + + float dudx = __uint_as_float(diff.x); + float dudy = __uint_as_float(diff.y); + float dvdx = __uint_as_float(diff.z); + float dvdy = __uint_as_float(diff.w); + + int num_samples = kernel_data.integrator.aa_samples; + + /* random number generator */ + uint rng_hash = cmj_hash(offset + i, kernel_data.integrator.seed); + + float filter_x, filter_y; + if (sample == 0) { + filter_x = filter_y = 0.5f; + } + else { + path_rng_2D(kg, rng_hash, sample, num_samples, PRNG_FILTER_U, &filter_x, &filter_y); + } + + /* subpixel u/v offset */ + if (sample > 0) { + u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f); + v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f), + 1.0f - u); + } + + /* triangle */ + int shader; + float3 P, Ng; + + triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); + + /* light passes */ + PathRadiance L; + path_radiance_init(&L, kernel_data.film.use_light_pass); + + shader_setup_from_sample( + kg, + &sd, + P, + Ng, + Ng, + shader, + object, + prim, + u, + v, + 1.0f, + 0.5f, + !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), + LAMP_NONE); + sd.I = sd.N; + + /* update differentials */ + sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx; + sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy; + sd.du.dx = dudx; + sd.du.dy = dudy; + sd.dv.dx = dvdx; + sd.dv.dy = dvdy; + + /* set RNG state for shaders that use sampling */ + state.rng_hash = rng_hash; + state.rng_offset = 0; + state.sample = sample; + state.num_samples = num_samples; + state.min_ray_pdf = FLT_MAX; + + /* light passes if we need more than color */ + if (pass_filter & ~BAKE_FILTER_COLOR) + compute_light_pass(kg, &sd, &L, rng_hash, pass_filter, sample); + + switch (type) { + /* data passes */ + case SHADER_EVAL_NORMAL: + case SHADER_EVAL_ROUGHNESS: + case SHADER_EVAL_EMISSION: { + if (type != SHADER_EVAL_NORMAL || (sd.flag & SD_HAS_BUMP)) { + int path_flag = (type == SHADER_EVAL_EMISSION) ? PATH_RAY_EMISSION : 0; + shader_eval_surface(kg, &sd, &state, path_flag); + } + + if (type == SHADER_EVAL_NORMAL) { + float3 N = sd.N; + if (sd.flag & SD_HAS_BUMP) { + N = shader_bsdf_average_normal(kg, &sd); + } + + /* encoding: normal = (2 * color) - 1 */ + out = N * 0.5f + make_float3(0.5f, 0.5f, 0.5f); + } + else if (type == SHADER_EVAL_ROUGHNESS) { + float roughness = shader_bsdf_average_roughness(&sd); + out = make_float3(roughness, roughness, roughness); + } + else { + out = shader_emissive_eval(&sd); + } + break; + } + case SHADER_EVAL_UV: { + out = primitive_uv(kg, &sd); + break; + } +# ifdef __PASSES__ + /* light passes */ + case SHADER_EVAL_AO: { + out = L.ao; + break; + } + case SHADER_EVAL_COMBINED: { + if ((pass_filter & BAKE_FILTER_COMBINED) == BAKE_FILTER_COMBINED) { + float alpha; + out = path_radiance_clamp_and_sum(kg, &L, &alpha); + break; + } + + if ((pass_filter & BAKE_FILTER_DIFFUSE_DIRECT) == BAKE_FILTER_DIFFUSE_DIRECT) + out += L.direct_diffuse; + if ((pass_filter & BAKE_FILTER_DIFFUSE_INDIRECT) == BAKE_FILTER_DIFFUSE_INDIRECT) + out += L.indirect_diffuse; + + if ((pass_filter & BAKE_FILTER_GLOSSY_DIRECT) == BAKE_FILTER_GLOSSY_DIRECT) + out += L.direct_glossy; + if ((pass_filter & BAKE_FILTER_GLOSSY_INDIRECT) == BAKE_FILTER_GLOSSY_INDIRECT) + out += L.indirect_glossy; + + if ((pass_filter & BAKE_FILTER_TRANSMISSION_DIRECT) == BAKE_FILTER_TRANSMISSION_DIRECT) + out += L.direct_transmission; + if ((pass_filter & BAKE_FILTER_TRANSMISSION_INDIRECT) == BAKE_FILTER_TRANSMISSION_INDIRECT) + out += L.indirect_transmission; + + if ((pass_filter & BAKE_FILTER_SUBSURFACE_DIRECT) == BAKE_FILTER_SUBSURFACE_DIRECT) + out += L.direct_subsurface; + if ((pass_filter & BAKE_FILTER_SUBSURFACE_INDIRECT) == BAKE_FILTER_SUBSURFACE_INDIRECT) + out += L.indirect_subsurface; + + if ((pass_filter & BAKE_FILTER_EMISSION) != 0) + out += L.emission; + + break; + } + case SHADER_EVAL_SHADOW: { + out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z); + break; + } + case SHADER_EVAL_DIFFUSE: { + out = kernel_bake_evaluate_direct_indirect( + kg, &sd, &state, L.direct_diffuse, L.indirect_diffuse, type, pass_filter); + break; + } + case SHADER_EVAL_GLOSSY: { + out = kernel_bake_evaluate_direct_indirect( + kg, &sd, &state, L.direct_glossy, L.indirect_glossy, type, pass_filter); + break; + } + case SHADER_EVAL_TRANSMISSION: { + out = kernel_bake_evaluate_direct_indirect( + kg, &sd, &state, L.direct_transmission, L.indirect_transmission, type, pass_filter); + break; + } + case SHADER_EVAL_SUBSURFACE: { +# ifdef __SUBSURFACE__ + out = kernel_bake_evaluate_direct_indirect( + kg, &sd, &state, L.direct_subsurface, L.indirect_subsurface, type, pass_filter); +# endif + break; + } +# endif + + /* extra */ + case SHADER_EVAL_ENVIRONMENT: { + /* setup ray */ + Ray ray; + + ray.P = make_float3(0.0f, 0.0f, 0.0f); + ray.D = normalize(P); + ray.t = 0.0f; +# ifdef __CAMERA_MOTION__ + ray.time = 0.5f; +# endif + +# ifdef __RAY_DIFFERENTIALS__ + ray.dD = differential3_zero(); + ray.dP = differential3_zero(); +# endif + + /* setup shader data */ + shader_setup_from_background(kg, &sd, &ray); + + /* evaluate */ + int path_flag = 0; /* we can't know which type of BSDF this is for */ + shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION); + out = shader_background_eval(&sd); + break; + } + default: { + /* no real shader, returning the position of the verts for debugging */ + out = normalize(P); + break; + } + } + + /* write output */ + const float output_fac = 1.0f / num_samples; + const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac; + + output[i] = (sample == 0) ? scaled_result : output[i] + scaled_result; } -#endif /* __BAKING__ */ +#endif /* __BAKING__ */ ccl_device void kernel_displace_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, int i) { - ShaderData sd; - PathState state = {0}; - uint4 in = input[i]; + ShaderData sd; + PathState state = {0}; + uint4 in = input[i]; - /* setup shader data */ - int object = in.x; - int prim = in.y; - float u = __uint_as_float(in.z); - float v = __uint_as_float(in.w); + /* setup shader data */ + int object = in.x; + int prim = in.y; + float u = __uint_as_float(in.z); + float v = __uint_as_float(in.w); - shader_setup_from_displace(kg, &sd, object, prim, u, v); + shader_setup_from_displace(kg, &sd, object, prim, u, v); - /* evaluate */ - float3 P = sd.P; - shader_eval_displacement(kg, &sd, &state); - float3 D = sd.P - P; + /* evaluate */ + float3 P = sd.P; + shader_eval_displacement(kg, &sd, &state); + float3 D = sd.P - P; - object_inverse_dir_transform(kg, &sd, &D); + object_inverse_dir_transform(kg, &sd, &D); - /* write output */ - output[i] += make_float4(D.x, D.y, D.z, 0.0f); + /* write output */ + output[i] += make_float4(D.x, D.y, D.z, 0.0f); } ccl_device void kernel_background_evaluate(KernelGlobals *kg, @@ -530,37 +496,37 @@ ccl_device void kernel_background_evaluate(KernelGlobals *kg, ccl_global float4 *output, int i) { - ShaderData sd; - PathState state = {0}; - uint4 in = input[i]; - - /* setup ray */ - Ray ray; - float u = __uint_as_float(in.x); - float v = __uint_as_float(in.y); - - ray.P = make_float3(0.0f, 0.0f, 0.0f); - ray.D = equirectangular_to_direction(u, v); - ray.t = 0.0f; + ShaderData sd; + PathState state = {0}; + uint4 in = input[i]; + + /* setup ray */ + Ray ray; + float u = __uint_as_float(in.x); + float v = __uint_as_float(in.y); + + ray.P = make_float3(0.0f, 0.0f, 0.0f); + ray.D = equirectangular_to_direction(u, v); + ray.t = 0.0f; #ifdef __CAMERA_MOTION__ - ray.time = 0.5f; + ray.time = 0.5f; #endif #ifdef __RAY_DIFFERENTIALS__ - ray.dD = differential3_zero(); - ray.dP = differential3_zero(); + ray.dD = differential3_zero(); + ray.dP = differential3_zero(); #endif - /* setup shader data */ - shader_setup_from_background(kg, &sd, &ray); + /* setup shader data */ + shader_setup_from_background(kg, &sd, &ray); - /* evaluate */ - int path_flag = 0; /* we can't know which type of BSDF this is for */ - shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION); - float3 color = shader_background_eval(&sd); + /* evaluate */ + int path_flag = 0; /* we can't know which type of BSDF this is for */ + shader_eval_surface(kg, &sd, &state, path_flag | PATH_RAY_EMISSION); + float3 color = shader_background_eval(&sd); - /* write output */ - output[i] += make_float4(color.x, color.y, color.z, 0.0f); + /* write output */ + output[i] += make_float4(color.x, color.y, color.z, 0.0f); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index b73ad47dad3..1085930c33a 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -20,209 +20,217 @@ CCL_NAMESPACE_BEGIN ccl_device float2 camera_sample_aperture(ccl_constant KernelCamera *cam, float u, float v) { - float blades = cam->blades; - float2 bokeh; - - if(blades == 0.0f) { - /* sample disk */ - bokeh = concentric_sample_disk(u, v); - } - else { - /* sample polygon */ - float rotation = cam->bladesrotation; - bokeh = regular_polygon_sample(blades, rotation, u, v); - } - - /* anamorphic lens bokeh */ - bokeh.x *= cam->inv_aperture_ratio; - - return bokeh; + float blades = cam->blades; + float2 bokeh; + + if (blades == 0.0f) { + /* sample disk */ + bokeh = concentric_sample_disk(u, v); + } + else { + /* sample polygon */ + float rotation = cam->bladesrotation; + bokeh = regular_polygon_sample(blades, rotation, u, v); + } + + /* anamorphic lens bokeh */ + bokeh.x *= cam->inv_aperture_ratio; + + return bokeh; } -ccl_device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray) +ccl_device void camera_sample_perspective(KernelGlobals *kg, + float raster_x, + float raster_y, + float lens_u, + float lens_v, + ccl_addr_space Ray *ray) { - /* create ray form raster position */ - ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera; - float3 raster = make_float3(raster_x, raster_y, 0.0f); - float3 Pcamera = transform_perspective(&rastertocamera, raster); + /* create ray form raster position */ + ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera; + float3 raster = make_float3(raster_x, raster_y, 0.0f); + float3 Pcamera = transform_perspective(&rastertocamera, raster); #ifdef __CAMERA_MOTION__ - if(kernel_data.cam.have_perspective_motion) { - /* TODO(sergey): Currently we interpolate projected coordinate which - * gives nice looking result and which is simple, but is in fact a bit - * different comparing to constructing projective matrix from an - * interpolated field of view. - */ - if(ray->time < 0.5f) { - ProjectionTransform rastertocamera_pre = kernel_data.cam.perspective_pre; - float3 Pcamera_pre = - transform_perspective(&rastertocamera_pre, raster); - Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f); - } - else { - ProjectionTransform rastertocamera_post = kernel_data.cam.perspective_post; - float3 Pcamera_post = - transform_perspective(&rastertocamera_post, raster); - Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f); - } - } + if (kernel_data.cam.have_perspective_motion) { + /* TODO(sergey): Currently we interpolate projected coordinate which + * gives nice looking result and which is simple, but is in fact a bit + * different comparing to constructing projective matrix from an + * interpolated field of view. + */ + if (ray->time < 0.5f) { + ProjectionTransform rastertocamera_pre = kernel_data.cam.perspective_pre; + float3 Pcamera_pre = transform_perspective(&rastertocamera_pre, raster); + Pcamera = interp(Pcamera_pre, Pcamera, ray->time * 2.0f); + } + else { + ProjectionTransform rastertocamera_post = kernel_data.cam.perspective_post; + float3 Pcamera_post = transform_perspective(&rastertocamera_post, raster); + Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f); + } + } #endif - float3 P = make_float3(0.0f, 0.0f, 0.0f); - float3 D = Pcamera; + float3 P = make_float3(0.0f, 0.0f, 0.0f); + float3 D = Pcamera; - /* modify ray for depth of field */ - float aperturesize = kernel_data.cam.aperturesize; + /* modify ray for depth of field */ + float aperturesize = kernel_data.cam.aperturesize; - if(aperturesize > 0.0f) { - /* sample point on aperture */ - float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v)*aperturesize; + if (aperturesize > 0.0f) { + /* sample point on aperture */ + float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v) * aperturesize; - /* compute point on plane of focus */ - float ft = kernel_data.cam.focaldistance/D.z; - float3 Pfocus = D*ft; + /* compute point on plane of focus */ + float ft = kernel_data.cam.focaldistance / D.z; + float3 Pfocus = D * ft; - /* update ray for effect of lens */ - P = make_float3(lensuv.x, lensuv.y, 0.0f); - D = normalize(Pfocus - P); - } + /* update ray for effect of lens */ + P = make_float3(lensuv.x, lensuv.y, 0.0f); + D = normalize(Pfocus - P); + } - /* transform ray from camera to world */ - Transform cameratoworld = kernel_data.cam.cameratoworld; + /* transform ray from camera to world */ + Transform cameratoworld = kernel_data.cam.cameratoworld; #ifdef __CAMERA_MOTION__ - if(kernel_data.cam.num_motion_steps) { - transform_motion_array_interpolate( - &cameratoworld, - kernel_tex_array(__camera_motion), - kernel_data.cam.num_motion_steps, - ray->time); - } + if (kernel_data.cam.num_motion_steps) { + transform_motion_array_interpolate(&cameratoworld, + kernel_tex_array(__camera_motion), + kernel_data.cam.num_motion_steps, + ray->time); + } #endif - P = transform_point(&cameratoworld, P); - D = normalize(transform_direction(&cameratoworld, D)); + P = transform_point(&cameratoworld, P); + D = normalize(transform_direction(&cameratoworld, D)); - bool use_stereo = kernel_data.cam.interocular_offset != 0.0f; - if(!use_stereo) { - /* No stereo */ - ray->P = P; - ray->D = D; + bool use_stereo = kernel_data.cam.interocular_offset != 0.0f; + if (!use_stereo) { + /* No stereo */ + ray->P = P; + ray->D = D; #ifdef __RAY_DIFFERENTIALS__ - float3 Dcenter = transform_direction(&cameratoworld, Pcamera); + float3 Dcenter = transform_direction(&cameratoworld, Pcamera); - ray->dP = differential3_zero(); - ray->dD.dx = normalize(Dcenter + float4_to_float3(kernel_data.cam.dx)) - normalize(Dcenter); - ray->dD.dy = normalize(Dcenter + float4_to_float3(kernel_data.cam.dy)) - normalize(Dcenter); + ray->dP = differential3_zero(); + ray->dD.dx = normalize(Dcenter + float4_to_float3(kernel_data.cam.dx)) - normalize(Dcenter); + ray->dD.dy = normalize(Dcenter + float4_to_float3(kernel_data.cam.dy)) - normalize(Dcenter); #endif - } - else { - /* Spherical stereo */ - spherical_stereo_transform(&kernel_data.cam, &P, &D); - ray->P = P; - ray->D = D; + } + else { + /* Spherical stereo */ + spherical_stereo_transform(&kernel_data.cam, &P, &D); + ray->P = P; + ray->D = D; #ifdef __RAY_DIFFERENTIALS__ - /* Ray differentials, computed from scratch using the raster coordinates - * because we don't want to be affected by depth of field. We compute - * ray origin and direction for the center and two neighbouring pixels - * and simply take their differences. */ - float3 Pnostereo = transform_point(&cameratoworld, make_float3(0.0f, 0.0f, 0.0f)); - - float3 Pcenter = Pnostereo; - float3 Dcenter = Pcamera; - Dcenter = normalize(transform_direction(&cameratoworld, Dcenter)); - spherical_stereo_transform(&kernel_data.cam, &Pcenter, &Dcenter); - - float3 Px = Pnostereo; - float3 Dx = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f)); - Dx = normalize(transform_direction(&cameratoworld, Dx)); - spherical_stereo_transform(&kernel_data.cam, &Px, &Dx); - - ray->dP.dx = Px - Pcenter; - ray->dD.dx = Dx - Dcenter; - - float3 Py = Pnostereo; - float3 Dy = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f)); - Dy = normalize(transform_direction(&cameratoworld, Dy)); - spherical_stereo_transform(&kernel_data.cam, &Py, &Dy); - - ray->dP.dy = Py - Pcenter; - ray->dD.dy = Dy - Dcenter; + /* Ray differentials, computed from scratch using the raster coordinates + * because we don't want to be affected by depth of field. We compute + * ray origin and direction for the center and two neighbouring pixels + * and simply take their differences. */ + float3 Pnostereo = transform_point(&cameratoworld, make_float3(0.0f, 0.0f, 0.0f)); + + float3 Pcenter = Pnostereo; + float3 Dcenter = Pcamera; + Dcenter = normalize(transform_direction(&cameratoworld, Dcenter)); + spherical_stereo_transform(&kernel_data.cam, &Pcenter, &Dcenter); + + float3 Px = Pnostereo; + float3 Dx = transform_perspective(&rastertocamera, + make_float3(raster_x + 1.0f, raster_y, 0.0f)); + Dx = normalize(transform_direction(&cameratoworld, Dx)); + spherical_stereo_transform(&kernel_data.cam, &Px, &Dx); + + ray->dP.dx = Px - Pcenter; + ray->dD.dx = Dx - Dcenter; + + float3 Py = Pnostereo; + float3 Dy = transform_perspective(&rastertocamera, + make_float3(raster_x, raster_y + 1.0f, 0.0f)); + Dy = normalize(transform_direction(&cameratoworld, Dy)); + spherical_stereo_transform(&kernel_data.cam, &Py, &Dy); + + ray->dP.dy = Py - Pcenter; + ray->dD.dy = Dy - Dcenter; #endif - } + } #ifdef __CAMERA_CLIPPING__ - /* clipping */ - float z_inv = 1.0f / normalize(Pcamera).z; - float nearclip = kernel_data.cam.nearclip * z_inv; - ray->P += nearclip * ray->D; - ray->dP.dx += nearclip * ray->dD.dx; - ray->dP.dy += nearclip * ray->dD.dy; - ray->t = kernel_data.cam.cliplength * z_inv; + /* clipping */ + float z_inv = 1.0f / normalize(Pcamera).z; + float nearclip = kernel_data.cam.nearclip * z_inv; + ray->P += nearclip * ray->D; + ray->dP.dx += nearclip * ray->dD.dx; + ray->dP.dy += nearclip * ray->dD.dy; + ray->t = kernel_data.cam.cliplength * z_inv; #else - ray->t = FLT_MAX; + ray->t = FLT_MAX; #endif } /* Orthographic Camera */ -ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray) +ccl_device void camera_sample_orthographic(KernelGlobals *kg, + float raster_x, + float raster_y, + float lens_u, + float lens_v, + ccl_addr_space Ray *ray) { - /* create ray form raster position */ - ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera; - float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); - - float3 P; - float3 D = make_float3(0.0f, 0.0f, 1.0f); - - /* modify ray for depth of field */ - float aperturesize = kernel_data.cam.aperturesize; - - if(aperturesize > 0.0f) { - /* sample point on aperture */ - float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v)*aperturesize; - - /* compute point on plane of focus */ - float3 Pfocus = D * kernel_data.cam.focaldistance; - - /* update ray for effect of lens */ - float3 lensuvw = make_float3(lensuv.x, lensuv.y, 0.0f); - P = Pcamera + lensuvw; - D = normalize(Pfocus - lensuvw); - } - else { - P = Pcamera; - } - /* transform ray from camera to world */ - Transform cameratoworld = kernel_data.cam.cameratoworld; + /* create ray form raster position */ + ProjectionTransform rastertocamera = kernel_data.cam.rastertocamera; + float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); + + float3 P; + float3 D = make_float3(0.0f, 0.0f, 1.0f); + + /* modify ray for depth of field */ + float aperturesize = kernel_data.cam.aperturesize; + + if (aperturesize > 0.0f) { + /* sample point on aperture */ + float2 lensuv = camera_sample_aperture(&kernel_data.cam, lens_u, lens_v) * aperturesize; + + /* compute point on plane of focus */ + float3 Pfocus = D * kernel_data.cam.focaldistance; + + /* update ray for effect of lens */ + float3 lensuvw = make_float3(lensuv.x, lensuv.y, 0.0f); + P = Pcamera + lensuvw; + D = normalize(Pfocus - lensuvw); + } + else { + P = Pcamera; + } + /* transform ray from camera to world */ + Transform cameratoworld = kernel_data.cam.cameratoworld; #ifdef __CAMERA_MOTION__ - if(kernel_data.cam.num_motion_steps) { - transform_motion_array_interpolate( - &cameratoworld, - kernel_tex_array(__camera_motion), - kernel_data.cam.num_motion_steps, - ray->time); - } + if (kernel_data.cam.num_motion_steps) { + transform_motion_array_interpolate(&cameratoworld, + kernel_tex_array(__camera_motion), + kernel_data.cam.num_motion_steps, + ray->time); + } #endif - ray->P = transform_point(&cameratoworld, P); - ray->D = normalize(transform_direction(&cameratoworld, D)); + ray->P = transform_point(&cameratoworld, P); + ray->D = normalize(transform_direction(&cameratoworld, D)); #ifdef __RAY_DIFFERENTIALS__ - /* ray differential */ - ray->dP.dx = float4_to_float3(kernel_data.cam.dx); - ray->dP.dy = float4_to_float3(kernel_data.cam.dy); + /* ray differential */ + ray->dP.dx = float4_to_float3(kernel_data.cam.dx); + ray->dP.dy = float4_to_float3(kernel_data.cam.dy); - ray->dD = differential3_zero(); + ray->dD = differential3_zero(); #endif #ifdef __CAMERA_CLIPPING__ - /* clipping */ - ray->t = kernel_data.cam.cliplength; + /* clipping */ + ray->t = kernel_data.cam.cliplength; #else - ray->t = FLT_MAX; + ray->t = FLT_MAX; #endif } @@ -230,242 +238,244 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, const ccl_global DecomposedTransform *cam_motion, - float raster_x, float raster_y, - float lens_u, float lens_v, + float raster_x, + float raster_y, + float lens_u, + float lens_v, ccl_addr_space Ray *ray) { - ProjectionTransform rastertocamera = cam->rastertocamera; - float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); + ProjectionTransform rastertocamera = cam->rastertocamera; + float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f)); - /* create ray form raster position */ - float3 P = make_float3(0.0f, 0.0f, 0.0f); - float3 D = panorama_to_direction(cam, Pcamera.x, Pcamera.y); + /* create ray form raster position */ + float3 P = make_float3(0.0f, 0.0f, 0.0f); + float3 D = panorama_to_direction(cam, Pcamera.x, Pcamera.y); - /* indicates ray should not receive any light, outside of the lens */ - if(is_zero(D)) { - ray->t = 0.0f; - return; - } + /* indicates ray should not receive any light, outside of the lens */ + if (is_zero(D)) { + ray->t = 0.0f; + return; + } - /* modify ray for depth of field */ - float aperturesize = cam->aperturesize; + /* modify ray for depth of field */ + float aperturesize = cam->aperturesize; - if(aperturesize > 0.0f) { - /* sample point on aperture */ - float2 lensuv = camera_sample_aperture(cam, lens_u, lens_v)*aperturesize; + if (aperturesize > 0.0f) { + /* sample point on aperture */ + float2 lensuv = camera_sample_aperture(cam, lens_u, lens_v) * aperturesize; - /* compute point on plane of focus */ - float3 Dfocus = normalize(D); - float3 Pfocus = Dfocus * cam->focaldistance; + /* compute point on plane of focus */ + float3 Dfocus = normalize(D); + float3 Pfocus = Dfocus * cam->focaldistance; - /* calculate orthonormal coordinates perpendicular to Dfocus */ - float3 U, V; - U = normalize(make_float3(1.0f, 0.0f, 0.0f) - Dfocus.x * Dfocus); - V = normalize(cross(Dfocus, U)); + /* calculate orthonormal coordinates perpendicular to Dfocus */ + float3 U, V; + U = normalize(make_float3(1.0f, 0.0f, 0.0f) - Dfocus.x * Dfocus); + V = normalize(cross(Dfocus, U)); - /* update ray for effect of lens */ - P = U * lensuv.x + V * lensuv.y; - D = normalize(Pfocus - P); - } + /* update ray for effect of lens */ + P = U * lensuv.x + V * lensuv.y; + D = normalize(Pfocus - P); + } - /* transform ray from camera to world */ - Transform cameratoworld = cam->cameratoworld; + /* transform ray from camera to world */ + Transform cameratoworld = cam->cameratoworld; #ifdef __CAMERA_MOTION__ - if(cam->num_motion_steps) { - transform_motion_array_interpolate( - &cameratoworld, - cam_motion, - cam->num_motion_steps, - ray->time); - } + if (cam->num_motion_steps) { + transform_motion_array_interpolate( + &cameratoworld, cam_motion, cam->num_motion_steps, ray->time); + } #endif - P = transform_point(&cameratoworld, P); - D = normalize(transform_direction(&cameratoworld, D)); + P = transform_point(&cameratoworld, P); + D = normalize(transform_direction(&cameratoworld, D)); - /* Stereo transform */ - bool use_stereo = cam->interocular_offset != 0.0f; - if(use_stereo) { - spherical_stereo_transform(cam, &P, &D); - } + /* Stereo transform */ + bool use_stereo = cam->interocular_offset != 0.0f; + if (use_stereo) { + spherical_stereo_transform(cam, &P, &D); + } - ray->P = P; - ray->D = D; + ray->P = P; + ray->D = D; #ifdef __RAY_DIFFERENTIALS__ - /* Ray differentials, computed from scratch using the raster coordinates - * because we don't want to be affected by depth of field. We compute - * ray origin and direction for the center and two neighbouring pixels - * and simply take their differences. */ - float3 Pcenter = Pcamera; - float3 Dcenter = panorama_to_direction(cam, Pcenter.x, Pcenter.y); - Pcenter = transform_point(&cameratoworld, Pcenter); - Dcenter = normalize(transform_direction(&cameratoworld, Dcenter)); - if(use_stereo) { - spherical_stereo_transform(cam, &Pcenter, &Dcenter); - } - - float3 Px = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f)); - float3 Dx = panorama_to_direction(cam, Px.x, Px.y); - Px = transform_point(&cameratoworld, Px); - Dx = normalize(transform_direction(&cameratoworld, Dx)); - if(use_stereo) { - spherical_stereo_transform(cam, &Px, &Dx); - } - - ray->dP.dx = Px - Pcenter; - ray->dD.dx = Dx - Dcenter; - - float3 Py = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f)); - float3 Dy = panorama_to_direction(cam, Py.x, Py.y); - Py = transform_point(&cameratoworld, Py); - Dy = normalize(transform_direction(&cameratoworld, Dy)); - if(use_stereo) { - spherical_stereo_transform(cam, &Py, &Dy); - } - - ray->dP.dy = Py - Pcenter; - ray->dD.dy = Dy - Dcenter; + /* Ray differentials, computed from scratch using the raster coordinates + * because we don't want to be affected by depth of field. We compute + * ray origin and direction for the center and two neighbouring pixels + * and simply take their differences. */ + float3 Pcenter = Pcamera; + float3 Dcenter = panorama_to_direction(cam, Pcenter.x, Pcenter.y); + Pcenter = transform_point(&cameratoworld, Pcenter); + Dcenter = normalize(transform_direction(&cameratoworld, Dcenter)); + if (use_stereo) { + spherical_stereo_transform(cam, &Pcenter, &Dcenter); + } + + float3 Px = transform_perspective(&rastertocamera, make_float3(raster_x + 1.0f, raster_y, 0.0f)); + float3 Dx = panorama_to_direction(cam, Px.x, Px.y); + Px = transform_point(&cameratoworld, Px); + Dx = normalize(transform_direction(&cameratoworld, Dx)); + if (use_stereo) { + spherical_stereo_transform(cam, &Px, &Dx); + } + + ray->dP.dx = Px - Pcenter; + ray->dD.dx = Dx - Dcenter; + + float3 Py = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y + 1.0f, 0.0f)); + float3 Dy = panorama_to_direction(cam, Py.x, Py.y); + Py = transform_point(&cameratoworld, Py); + Dy = normalize(transform_direction(&cameratoworld, Dy)); + if (use_stereo) { + spherical_stereo_transform(cam, &Py, &Dy); + } + + ray->dP.dy = Py - Pcenter; + ray->dD.dy = Dy - Dcenter; #endif #ifdef __CAMERA_CLIPPING__ - /* clipping */ - float nearclip = cam->nearclip; - ray->P += nearclip * ray->D; - ray->dP.dx += nearclip * ray->dD.dx; - ray->dP.dy += nearclip * ray->dD.dy; - ray->t = cam->cliplength; + /* clipping */ + float nearclip = cam->nearclip; + ray->P += nearclip * ray->D; + ray->dP.dx += nearclip * ray->dD.dx; + ray->dP.dy += nearclip * ray->dD.dy; + ray->t = cam->cliplength; #else - ray->t = FLT_MAX; + ray->t = FLT_MAX; #endif } /* Common */ ccl_device_inline void camera_sample(KernelGlobals *kg, - int x, int y, - float filter_u, float filter_v, - float lens_u, float lens_v, + int x, + int y, + float filter_u, + float filter_v, + float lens_u, + float lens_v, float time, ccl_addr_space Ray *ray) { - /* pixel filter */ - int filter_table_offset = kernel_data.film.filter_table_offset; - float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE); - float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE); + /* pixel filter */ + int filter_table_offset = kernel_data.film.filter_table_offset; + float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE); + float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE); #ifdef __CAMERA_MOTION__ - /* motion blur */ - if(kernel_data.cam.shuttertime == -1.0f) { - ray->time = 0.5f; - } - else { - /* TODO(sergey): Such lookup is unneeded when there's rolling shutter - * effect in use but rolling shutter duration is set to 0.0. - */ - const int shutter_table_offset = kernel_data.cam.shutter_table_offset; - ray->time = lookup_table_read(kg, time, shutter_table_offset, SHUTTER_TABLE_SIZE); - /* TODO(sergey): Currently single rolling shutter effect type only - * where scanlines are acquired from top to bottom and whole scanline - * is acquired at once (no delay in acquisition happens between pixels - * of single scanline). - * - * Might want to support more models in the future. - */ - if(kernel_data.cam.rolling_shutter_type) { - /* Time corresponding to a fully rolling shutter only effect: - * top of the frame is time 0.0, bottom of the frame is time 1.0. - */ - const float time = 1.0f - (float)y / kernel_data.cam.height; - const float duration = kernel_data.cam.rolling_shutter_duration; - if(duration != 0.0f) { - /* This isn't fully physical correct, but lets us to have simple - * controls in the interface. The idea here is basically sort of - * linear interpolation between how much rolling shutter effect - * exist on the frame and how much of it is a motion blur effect. - */ - ray->time = (ray->time - 0.5f) * duration; - ray->time += (time - 0.5f) * (1.0f - duration) + 0.5f; - } - else { - ray->time = time; - } - } - } + /* motion blur */ + if (kernel_data.cam.shuttertime == -1.0f) { + ray->time = 0.5f; + } + else { + /* TODO(sergey): Such lookup is unneeded when there's rolling shutter + * effect in use but rolling shutter duration is set to 0.0. + */ + const int shutter_table_offset = kernel_data.cam.shutter_table_offset; + ray->time = lookup_table_read(kg, time, shutter_table_offset, SHUTTER_TABLE_SIZE); + /* TODO(sergey): Currently single rolling shutter effect type only + * where scanlines are acquired from top to bottom and whole scanline + * is acquired at once (no delay in acquisition happens between pixels + * of single scanline). + * + * Might want to support more models in the future. + */ + if (kernel_data.cam.rolling_shutter_type) { + /* Time corresponding to a fully rolling shutter only effect: + * top of the frame is time 0.0, bottom of the frame is time 1.0. + */ + const float time = 1.0f - (float)y / kernel_data.cam.height; + const float duration = kernel_data.cam.rolling_shutter_duration; + if (duration != 0.0f) { + /* This isn't fully physical correct, but lets us to have simple + * controls in the interface. The idea here is basically sort of + * linear interpolation between how much rolling shutter effect + * exist on the frame and how much of it is a motion blur effect. + */ + ray->time = (ray->time - 0.5f) * duration; + ray->time += (time - 0.5f) * (1.0f - duration) + 0.5f; + } + else { + ray->time = time; + } + } + } #endif - /* sample */ - if(kernel_data.cam.type == CAMERA_PERSPECTIVE) { - camera_sample_perspective(kg, raster_x, raster_y, lens_u, lens_v, ray); - } - else if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { - camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray); - } - else { - const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion); - camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray); - } + /* sample */ + if (kernel_data.cam.type == CAMERA_PERSPECTIVE) { + camera_sample_perspective(kg, raster_x, raster_y, lens_u, lens_v, ray); + } + else if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { + camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray); + } + else { + const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion); + camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray); + } } /* Utilities */ ccl_device_inline float3 camera_position(KernelGlobals *kg) { - Transform cameratoworld = kernel_data.cam.cameratoworld; - return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); + Transform cameratoworld = kernel_data.cam.cameratoworld; + return make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); } ccl_device_inline float camera_distance(KernelGlobals *kg, float3 P) { - Transform cameratoworld = kernel_data.cam.cameratoworld; - float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); - - if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { - float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z); - return fabsf(dot((P - camP), camD)); - } - else - return len(P - camP); + Transform cameratoworld = kernel_data.cam.cameratoworld; + float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); + + if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { + float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z); + return fabsf(dot((P - camP), camD)); + } + else + return len(P - camP); } ccl_device_inline float3 camera_direction_from_point(KernelGlobals *kg, float3 P) { - Transform cameratoworld = kernel_data.cam.cameratoworld; - - if(kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { - float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z); - return -camD; - } - else { - float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); - return normalize(camP - P); - } + Transform cameratoworld = kernel_data.cam.cameratoworld; + + if (kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { + float3 camD = make_float3(cameratoworld.x.z, cameratoworld.y.z, cameratoworld.z.z); + return -camD; + } + else { + float3 camP = make_float3(cameratoworld.x.w, cameratoworld.y.w, cameratoworld.z.w); + return normalize(camP - P); + } } ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, float3 P) { - if(kernel_data.cam.type != CAMERA_PANORAMA) { - /* perspective / ortho */ - if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE) - P += camera_position(kg); - - ProjectionTransform tfm = kernel_data.cam.worldtondc; - return transform_perspective(&tfm, P); - } - else { - /* panorama */ - Transform tfm = kernel_data.cam.worldtocamera; - - if(sd->object != OBJECT_NONE) - P = normalize(transform_point(&tfm, P)); - else - P = normalize(transform_direction(&tfm, P)); - - float2 uv = direction_to_panorama(&kernel_data.cam, P); - - return make_float3(uv.x, uv.y, 0.0f); - } + if (kernel_data.cam.type != CAMERA_PANORAMA) { + /* perspective / ortho */ + if (sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE) + P += camera_position(kg); + + ProjectionTransform tfm = kernel_data.cam.worldtondc; + return transform_perspective(&tfm, P); + } + else { + /* panorama */ + Transform tfm = kernel_data.cam.worldtocamera; + + if (sd->object != OBJECT_NONE) + P = normalize(transform_point(&tfm, P)); + else + P = normalize(transform_direction(&tfm, P)); + + float2 uv = direction_to_panorama(&kernel_data.cam, P); + + return make_float3(uv.x, uv.y, 0.0f); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_color.h b/intern/cycles/kernel/kernel_color.h index ea478a8a5d3..5eb1bdad02e 100644 --- a/intern/cycles/kernel/kernel_color.h +++ b/intern/cycles/kernel/kernel_color.h @@ -23,16 +23,16 @@ CCL_NAMESPACE_BEGIN ccl_device float3 xyz_to_rgb(KernelGlobals *kg, float3 xyz) { - return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz), - dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz), - dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz)); + return make_float3(dot(float4_to_float3(kernel_data.film.xyz_to_r), xyz), + dot(float4_to_float3(kernel_data.film.xyz_to_g), xyz), + dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz)); } ccl_device float linear_rgb_to_gray(KernelGlobals *kg, float3 c) { - return dot(c, float4_to_float3(kernel_data.film.rgb_to_y)); + return dot(c, float4_to_float3(kernel_data.film.rgb_to_y)); } CCL_NAMESPACE_END -#endif /* __KERNEL_COLOR_H__ */ +#endif /* __KERNEL_COLOR_H__ */ diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 4ee80850402..e8fedca4489 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -55,9 +55,9 @@ /* On x86_64, versions of glibc < 2.16 have an issue where expf is * much slower than the double version. This was fixed in glibc 2.16. */ -#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \ - defined(__GNU_LIBRARY__) && defined(__GLIBC__ ) && defined(__GLIBC_MINOR__) && \ - (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16) +#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \ + defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \ + (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16) # define expf(x) ((float)exp((double)(x))) #endif @@ -71,41 +71,41 @@ CCL_NAMESPACE_BEGIN /* Texture types to be compatible with CUDA textures. These are really just * simple arrays and after inlining fetch hopefully revert to being a simple * pointer lookup. */ -template<typename T> struct texture { - ccl_always_inline const T& fetch(int index) - { - kernel_assert(index >= 0 && index < width); - return data[index]; - } +template<typename T> struct texture { + ccl_always_inline const T &fetch(int index) + { + kernel_assert(index >= 0 && index < width); + return data[index]; + } #if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) - /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain - * compatibility with existing indicies and data structures. - */ - ccl_always_inline avxf fetch_avxf(const int index) - { - kernel_assert(index >= 0 && (index+1) < width); - ssef *ssef_data = (ssef*)data; - ssef *ssef_node_data = &ssef_data[index]; - return _mm256_loadu_ps((float *)ssef_node_data); - } + /* Reads 256 bytes but indexes in blocks of 128 bytes to maintain + * compatibility with existing indicies and data structures. + */ + ccl_always_inline avxf fetch_avxf(const int index) + { + kernel_assert(index >= 0 && (index + 1) < width); + ssef *ssef_data = (ssef *)data; + ssef *ssef_node_data = &ssef_data[index]; + return _mm256_loadu_ps((float *)ssef_node_data); + } #endif #ifdef __KERNEL_SSE2__ - ccl_always_inline ssef fetch_ssef(int index) - { - kernel_assert(index >= 0 && index < width); - return ((ssef*)data)[index]; - } - - ccl_always_inline ssei fetch_ssei(int index) - { - kernel_assert(index >= 0 && index < width); - return ((ssei*)data)[index]; - } + ccl_always_inline ssef fetch_ssef(int index) + { + kernel_assert(index >= 0 && index < width); + return ((ssef *)data)[index]; + } + + ccl_always_inline ssei fetch_ssei(int index) + { + kernel_assert(index >= 0 && index < width); + return ((ssei *)data)[index]; + } #endif - T *data; - int width; + T *data; + int width; }; /* Macros to handle different memory storage on different devices */ @@ -124,33 +124,33 @@ typedef vector3<sseb> sse3b; typedef vector3<ssef> sse3f; typedef vector3<ssei> sse3i; -ccl_device_inline void print_sse3b(const char *label, sse3b& a) +ccl_device_inline void print_sse3b(const char *label, sse3b &a) { - print_sseb(label, a.x); - print_sseb(label, a.y); - print_sseb(label, a.z); + print_sseb(label, a.x); + print_sseb(label, a.y); + print_sseb(label, a.z); } -ccl_device_inline void print_sse3f(const char *label, sse3f& a) +ccl_device_inline void print_sse3f(const char *label, sse3f &a) { - print_ssef(label, a.x); - print_ssef(label, a.y); - print_ssef(label, a.z); + print_ssef(label, a.x); + print_ssef(label, a.y); + print_ssef(label, a.z); } -ccl_device_inline void print_sse3i(const char *label, sse3i& a) +ccl_device_inline void print_sse3i(const char *label, sse3i &a) { - print_ssei(label, a.x); - print_ssei(label, a.y); - print_ssei(label, a.z); + print_ssei(label, a.x); + print_ssei(label, a.y); + print_ssei(label, a.z); } -#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) +# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) typedef vector3<avxf> avx3f; -#endif +# endif #endif CCL_NAMESPACE_END -#endif /* __KERNEL_COMPAT_CPU_H__ */ +#endif /* __KERNEL_COMPAT_CPU_H__ */ diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 8ed96bbae64..469b81d120b 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -42,22 +42,22 @@ typedef unsigned long long CUtexObject; __device__ half __float2half(const float f) { - half val; - asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); - return val; + half val; + asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); + return val; } /* Qualifier wrappers for different names on different devices */ -#define ccl_device __device__ __inline__ +#define ccl_device __device__ __inline__ #if __CUDA_ARCH__ < 500 -# define ccl_device_inline __device__ __forceinline__ -# define ccl_device_forceinline __device__ __forceinline__ +# define ccl_device_inline __device__ __forceinline__ +# define ccl_device_forceinline __device__ __forceinline__ #else -# define ccl_device_inline __device__ __inline__ -# define ccl_device_forceinline __device__ __forceinline__ +# define ccl_device_inline __device__ __inline__ +# define ccl_device_forceinline __device__ __forceinline__ #endif -#define ccl_device_noinline __device__ __noinline__ +#define ccl_device_noinline __device__ __noinline__ #define ccl_global #define ccl_static_constant __constant__ #define ccl_constant const @@ -75,8 +75,7 @@ __device__ half __float2half(const float f) #define ATTR_FALLTHROUGH -#define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH) - +#define CCL_MAX_LOCAL_SIZE (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH) /* No assert supported for CUDA */ @@ -91,46 +90,62 @@ __device__ half __float2half(const float f) ccl_device_inline uint ccl_local_id(uint d) { - switch(d) { - case 0: return threadIdx.x; - case 1: return threadIdx.y; - case 2: return threadIdx.z; - default: return 0; - } + switch (d) { + case 0: + return threadIdx.x; + case 1: + return threadIdx.y; + case 2: + return threadIdx.z; + default: + return 0; + } } #define ccl_global_id(d) (ccl_group_id(d) * ccl_local_size(d) + ccl_local_id(d)) ccl_device_inline uint ccl_local_size(uint d) { - switch(d) { - case 0: return blockDim.x; - case 1: return blockDim.y; - case 2: return blockDim.z; - default: return 0; - } + switch (d) { + case 0: + return blockDim.x; + case 1: + return blockDim.y; + case 2: + return blockDim.z; + default: + return 0; + } } #define ccl_global_size(d) (ccl_num_groups(d) * ccl_local_size(d)) ccl_device_inline uint ccl_group_id(uint d) { - switch(d) { - case 0: return blockIdx.x; - case 1: return blockIdx.y; - case 2: return blockIdx.z; - default: return 0; - } + switch (d) { + case 0: + return blockIdx.x; + case 1: + return blockIdx.y; + case 2: + return blockIdx.z; + default: + return 0; + } } ccl_device_inline uint ccl_num_groups(uint d) { - switch(d) { - case 0: return gridDim.x; - case 1: return gridDim.y; - case 2: return gridDim.z; - default: return 0; - } + switch (d) { + case 0: + return gridDim.x; + case 1: + return gridDim.y; + case 2: + return gridDim.z; + default: + return 0; + } } /* Textures */ @@ -150,4 +165,4 @@ ccl_device_inline uint ccl_num_groups(uint d) #define logf(x) __logf(((float)(x))) #define expf(x) __expf(((float)(x))) -#endif /* __KERNEL_COMPAT_CUDA_H__ */ +#endif /* __KERNEL_COMPAT_CUDA_H__ */ diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index d3d0934a626..e040ea88d7c 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -134,7 +134,7 @@ # define expf(x) native_exp(((float)(x))) # define sqrtf(x) native_sqrt(((float)(x))) # define logf(x) native_log(((float)(x))) -# define rcp(x) native_recip(x) +# define rcp(x) native_recip(x) #else # define sinf(x) sin(((float)(x))) # define cosf(x) cos(((float)(x))) @@ -142,12 +142,13 @@ # define expf(x) exp(((float)(x))) # define sqrtf(x) sqrt(((float)(x))) # define logf(x) log(((float)(x))) -# define rcp(x) recip(x) +# define rcp(x) recip(x) #endif /* data lookup defines */ #define kernel_data (*kg->data) -#define kernel_tex_array(tex) ((const ccl_global tex##_t*)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data)) +#define kernel_tex_array(tex) \ + ((const ccl_global tex##_t *)(kg->buffers[kg->tex.cl_buffer] + kg->tex.data)) #define kernel_tex_fetch(tex, index) kernel_tex_array(tex)[(index)] /* define NULL */ @@ -155,10 +156,10 @@ /* enable extensions */ #ifdef __KERNEL_CL_KHR_FP16__ -#pragma OPENCL EXTENSION cl_khr_fp16 : enable +# pragma OPENCL EXTENSION cl_khr_fp16 : enable #endif #include "util/util_half.h" #include "util/util_types.h" -#endif /* __KERNEL_COMPAT_OPENCL_H__ */ +#endif /* __KERNEL_COMPAT_OPENCL_H__ */ diff --git a/intern/cycles/kernel/kernel_differential.h b/intern/cycles/kernel/kernel_differential.h index 924d96c72e5..8513de0d843 100644 --- a/intern/cycles/kernel/kernel_differential.h +++ b/intern/cycles/kernel/kernel_differential.h @@ -18,88 +18,98 @@ CCL_NAMESPACE_BEGIN /* See "Tracing Ray Differentials", Homan Igehy, 1999. */ -ccl_device void differential_transfer(ccl_addr_space differential3 *dP_, const differential3 dP, float3 D, const differential3 dD, float3 Ng, float t) +ccl_device void differential_transfer(ccl_addr_space differential3 *dP_, + const differential3 dP, + float3 D, + const differential3 dD, + float3 Ng, + float t) { - /* ray differential transfer through homogeneous medium, to - * compute dPdx/dy at a shading point from the incoming ray */ + /* ray differential transfer through homogeneous medium, to + * compute dPdx/dy at a shading point from the incoming ray */ - float3 tmp = D/dot(D, Ng); - float3 tmpx = dP.dx + t*dD.dx; - float3 tmpy = dP.dy + t*dD.dy; + float3 tmp = D / dot(D, Ng); + float3 tmpx = dP.dx + t * dD.dx; + float3 tmpy = dP.dy + t * dD.dy; - dP_->dx = tmpx - dot(tmpx, Ng)*tmp; - dP_->dy = tmpy - dot(tmpy, Ng)*tmp; + dP_->dx = tmpx - dot(tmpx, Ng) * tmp; + dP_->dy = tmpy - dot(tmpy, Ng) * tmp; } ccl_device void differential_incoming(ccl_addr_space differential3 *dI, const differential3 dD) { - /* compute dIdx/dy at a shading point, we just need to negate the - * differential of the ray direction */ + /* compute dIdx/dy at a shading point, we just need to negate the + * differential of the ray direction */ - dI->dx = -dD.dx; - dI->dy = -dD.dy; + dI->dx = -dD.dx; + dI->dy = -dD.dy; } -ccl_device void differential_dudv(ccl_addr_space differential *du, ccl_addr_space differential *dv, float3 dPdu, float3 dPdv, differential3 dP, float3 Ng) +ccl_device void differential_dudv(ccl_addr_space differential *du, + ccl_addr_space differential *dv, + float3 dPdu, + float3 dPdv, + differential3 dP, + float3 Ng) { - /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv - * from the primitive, we can compute dudx/dy and dvdx/dy. these are - * mainly used for differentials of arbitrary mesh attributes. */ - - /* find most stable axis to project to 2D */ - float xn = fabsf(Ng.x); - float yn = fabsf(Ng.y); - float zn = fabsf(Ng.z); - - if(zn < xn || zn < yn) { - if(yn < xn || yn < zn) { - dPdu.x = dPdu.y; - dPdv.x = dPdv.y; - dP.dx.x = dP.dx.y; - dP.dy.x = dP.dy.y; - } - - dPdu.y = dPdu.z; - dPdv.y = dPdv.z; - dP.dx.y = dP.dx.z; - dP.dy.y = dP.dy.z; - } - - /* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system, - * and the same for dudy and dvdy. the denominator is the same for both - * solutions, so we compute it only once. - * - * dP.dx = dPdu * dudx + dPdv * dvdx; - * dP.dy = dPdu * dudy + dPdv * dvdy; */ - - float det = (dPdu.x*dPdv.y - dPdv.x*dPdu.y); - - if(det != 0.0f) - det = 1.0f/det; - - du->dx = (dP.dx.x*dPdv.y - dP.dx.y*dPdv.x)*det; - dv->dx = (dP.dx.y*dPdu.x - dP.dx.x*dPdu.y)*det; - - du->dy = (dP.dy.x*dPdv.y - dP.dy.y*dPdv.x)*det; - dv->dy = (dP.dy.y*dPdu.x - dP.dy.x*dPdu.y)*det; + /* now we have dPdx/dy from the ray differential transfer, and dPdu/dv + * from the primitive, we can compute dudx/dy and dvdx/dy. these are + * mainly used for differentials of arbitrary mesh attributes. */ + + /* find most stable axis to project to 2D */ + float xn = fabsf(Ng.x); + float yn = fabsf(Ng.y); + float zn = fabsf(Ng.z); + + if (zn < xn || zn < yn) { + if (yn < xn || yn < zn) { + dPdu.x = dPdu.y; + dPdv.x = dPdv.y; + dP.dx.x = dP.dx.y; + dP.dy.x = dP.dy.y; + } + + dPdu.y = dPdu.z; + dPdv.y = dPdv.z; + dP.dx.y = dP.dx.z; + dP.dy.y = dP.dy.z; + } + + /* using Cramer's rule, we solve for dudx and dvdx in a 2x2 linear system, + * and the same for dudy and dvdy. the denominator is the same for both + * solutions, so we compute it only once. + * + * dP.dx = dPdu * dudx + dPdv * dvdx; + * dP.dy = dPdu * dudy + dPdv * dvdy; */ + + float det = (dPdu.x * dPdv.y - dPdv.x * dPdu.y); + + if (det != 0.0f) + det = 1.0f / det; + + du->dx = (dP.dx.x * dPdv.y - dP.dx.y * dPdv.x) * det; + dv->dx = (dP.dx.y * dPdu.x - dP.dx.x * dPdu.y) * det; + + du->dy = (dP.dy.x * dPdv.y - dP.dy.y * dPdv.x) * det; + dv->dy = (dP.dy.y * dPdu.x - dP.dy.x * dPdu.y) * det; } ccl_device differential differential_zero() { - differential d; - d.dx = 0.0f; - d.dy = 0.0f; + differential d; + d.dx = 0.0f; + d.dy = 0.0f; - return d; + return d; } ccl_device differential3 differential3_zero() { - differential3 d; - d.dx = make_float3(0.0f, 0.0f, 0.0f); - d.dy = make_float3(0.0f, 0.0f, 0.0f); + differential3 d; + d.dx = make_float3(0.0f, 0.0f, 0.0f); + d.dy = make_float3(0.0f, 0.0f, 0.0f); - return d; + return d; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 80bb8d48caf..f2eaa7b50a5 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -26,61 +26,71 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float t, float time) { - /* setup shading at emitter */ - float3 eval; - - if(shader_constant_emission_eval(kg, ls->shader, &eval)) { - if((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) { - ls->Ng = -ls->Ng; - } - } - else { - /* Setup shader data and call shader_eval_surface once, better - * for GPU coherence and compile times. */ + /* setup shading at emitter */ + float3 eval; + + if (shader_constant_emission_eval(kg, ls->shader, &eval)) { + if ((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) { + ls->Ng = -ls->Ng; + } + } + else { + /* Setup shader data and call shader_eval_surface once, better + * for GPU coherence and compile times. */ #ifdef __BACKGROUND_MIS__ - if(ls->type == LIGHT_BACKGROUND) { - Ray ray; - ray.D = ls->D; - ray.P = ls->P; - ray.t = 1.0f; - ray.time = time; - ray.dP = differential3_zero(); - ray.dD = dI; - - shader_setup_from_background(kg, emission_sd, &ray); - } - else + if (ls->type == LIGHT_BACKGROUND) { + Ray ray; + ray.D = ls->D; + ray.P = ls->P; + ray.t = 1.0f; + ray.time = time; + ray.dP = differential3_zero(); + ray.dD = dI; + + shader_setup_from_background(kg, emission_sd, &ray); + } + else #endif - { - shader_setup_from_sample(kg, emission_sd, - ls->P, ls->Ng, I, - ls->shader, ls->object, ls->prim, - ls->u, ls->v, t, time, false, ls->lamp); - - ls->Ng = emission_sd->Ng; - } - - /* No proper path flag, we're evaluating this for all closures. that's - * weak but we'd have to do multiple evaluations otherwise. */ - path_state_modify_bounce(state, true); - shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION); - path_state_modify_bounce(state, false); - - /* Evaluate closures. */ + { + shader_setup_from_sample(kg, + emission_sd, + ls->P, + ls->Ng, + I, + ls->shader, + ls->object, + ls->prim, + ls->u, + ls->v, + t, + time, + false, + ls->lamp); + + ls->Ng = emission_sd->Ng; + } + + /* No proper path flag, we're evaluating this for all closures. that's + * weak but we'd have to do multiple evaluations otherwise. */ + path_state_modify_bounce(state, true); + shader_eval_surface(kg, emission_sd, state, PATH_RAY_EMISSION); + path_state_modify_bounce(state, false); + + /* Evaluate closures. */ #ifdef __BACKGROUND_MIS__ - if (ls->type == LIGHT_BACKGROUND) { - eval = shader_background_eval(emission_sd); - } - else + if (ls->type == LIGHT_BACKGROUND) { + eval = shader_background_eval(emission_sd); + } + else #endif - { - eval = shader_emissive_eval(emission_sd); - } - } + { + eval = shader_emissive_eval(emission_sd); + } + } - eval *= ls->eval_fac; + eval *= ls->eval_fac; - return eval; + return eval; } ccl_device_noinline bool direct_emission(KernelGlobals *kg, @@ -93,132 +103,128 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, bool *is_lamp, float rand_terminate) { - if(ls->pdf == 0.0f) - return false; + if (ls->pdf == 0.0f) + return false; - /* todo: implement */ - differential3 dD = differential3_zero(); + /* todo: implement */ + differential3 dD = differential3_zero(); - /* evaluate closure */ + /* evaluate closure */ - float3 light_eval = direct_emissive_eval(kg, - emission_sd, - ls, - state, - -ls->D, - dD, - ls->t, - sd->time); + float3 light_eval = direct_emissive_eval( + kg, emission_sd, ls, state, -ls->D, dD, ls->t, sd->time); - if(is_zero(light_eval)) - return false; + if (is_zero(light_eval)) + return false; - /* evaluate BSDF at shading point */ + /* evaluate BSDF at shading point */ #ifdef __VOLUME__ - if(sd->prim != PRIM_NONE) - shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS); - else { - float bsdf_pdf; - shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf); - if(ls->shader & SHADER_USE_MIS) { - /* Multiple importance sampling. */ - float mis_weight = power_heuristic(ls->pdf, bsdf_pdf); - light_eval *= mis_weight; - } - } + if (sd->prim != PRIM_NONE) + shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS); + else { + float bsdf_pdf; + shader_volume_phase_eval(kg, sd, ls->D, eval, &bsdf_pdf); + if (ls->shader & SHADER_USE_MIS) { + /* Multiple importance sampling. */ + float mis_weight = power_heuristic(ls->pdf, bsdf_pdf); + light_eval *= mis_weight; + } + } #else - shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS); + shader_bsdf_eval(kg, sd, ls->D, eval, ls->pdf, ls->shader & SHADER_USE_MIS); #endif - bsdf_eval_mul3(eval, light_eval/ls->pdf); + bsdf_eval_mul3(eval, light_eval / ls->pdf); #ifdef __PASSES__ - /* use visibility flag to skip lights */ - if(ls->shader & SHADER_EXCLUDE_ANY) { - if(ls->shader & SHADER_EXCLUDE_DIFFUSE) { - eval->diffuse = make_float3(0.0f, 0.0f, 0.0f); - eval->subsurface = make_float3(0.0f, 0.0f, 0.0f); - } - if(ls->shader & SHADER_EXCLUDE_GLOSSY) - eval->glossy = make_float3(0.0f, 0.0f, 0.0f); - if(ls->shader & SHADER_EXCLUDE_TRANSMIT) - eval->transmission = make_float3(0.0f, 0.0f, 0.0f); - if(ls->shader & SHADER_EXCLUDE_SCATTER) - eval->scatter = make_float3(0.0f, 0.0f, 0.0f); - } + /* use visibility flag to skip lights */ + if (ls->shader & SHADER_EXCLUDE_ANY) { + if (ls->shader & SHADER_EXCLUDE_DIFFUSE) { + eval->diffuse = make_float3(0.0f, 0.0f, 0.0f); + eval->subsurface = make_float3(0.0f, 0.0f, 0.0f); + } + if (ls->shader & SHADER_EXCLUDE_GLOSSY) + eval->glossy = make_float3(0.0f, 0.0f, 0.0f); + if (ls->shader & SHADER_EXCLUDE_TRANSMIT) + eval->transmission = make_float3(0.0f, 0.0f, 0.0f); + if (ls->shader & SHADER_EXCLUDE_SCATTER) + eval->scatter = make_float3(0.0f, 0.0f, 0.0f); + } #endif - if(bsdf_eval_is_zero(eval)) - return false; + if (bsdf_eval_is_zero(eval)) + return false; - if(kernel_data.integrator.light_inv_rr_threshold > 0.0f + if (kernel_data.integrator.light_inv_rr_threshold > 0.0f #ifdef __SHADOW_TRICKS__ - && (state->flag & PATH_RAY_SHADOW_CATCHER) == 0 + && (state->flag & PATH_RAY_SHADOW_CATCHER) == 0 #endif - ) - { - float probability = max3(fabs(bsdf_eval_sum(eval))) * kernel_data.integrator.light_inv_rr_threshold; - if(probability < 1.0f) { - if(rand_terminate >= probability) { - return false; - } - bsdf_eval_mul(eval, 1.0f / probability); - } - } - - if(ls->shader & SHADER_CAST_SHADOW) { - /* setup ray */ - bool transmit = (dot(sd->Ng, ls->D) < 0.0f); - ray->P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng); - - if(ls->t == FLT_MAX) { - /* distant light */ - ray->D = ls->D; - ray->t = ls->t; - } - else { - /* other lights, avoid self-intersection */ - ray->D = ray_offset(ls->P, ls->Ng) - ray->P; - ray->D = normalize_len(ray->D, &ray->t); - } - - ray->dP = sd->dP; - ray->dD = differential3_zero(); - } - else { - /* signal to not cast shadow ray */ - ray->t = 0.0f; - } - - /* return if it's a lamp for shadow pass */ - *is_lamp = (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND); - - return true; + ) { + float probability = max3(fabs(bsdf_eval_sum(eval))) * + kernel_data.integrator.light_inv_rr_threshold; + if (probability < 1.0f) { + if (rand_terminate >= probability) { + return false; + } + bsdf_eval_mul(eval, 1.0f / probability); + } + } + + if (ls->shader & SHADER_CAST_SHADOW) { + /* setup ray */ + bool transmit = (dot(sd->Ng, ls->D) < 0.0f); + ray->P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng); + + if (ls->t == FLT_MAX) { + /* distant light */ + ray->D = ls->D; + ray->t = ls->t; + } + else { + /* other lights, avoid self-intersection */ + ray->D = ray_offset(ls->P, ls->Ng) - ray->P; + ray->D = normalize_len(ray->D, &ray->t); + } + + ray->dP = sd->dP; + ray->dD = differential3_zero(); + } + else { + /* signal to not cast shadow ray */ + ray->t = 0.0f; + } + + /* return if it's a lamp for shadow pass */ + *is_lamp = (ls->prim == PRIM_NONE && ls->type != LIGHT_BACKGROUND); + + return true; } /* Indirect Primitive Emission */ -ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf) +ccl_device_noinline float3 indirect_primitive_emission( + KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf) { - /* evaluate emissive closure */ - float3 L = shader_emissive_eval(sd); + /* evaluate emissive closure */ + float3 L = shader_emissive_eval(sd); #ifdef __HAIR__ - if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE)) + if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && + (sd->type & PRIMITIVE_ALL_TRIANGLE)) #else - if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) + if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) #endif - { - /* multiple importance sampling, get triangle light pdf, - * and compute weight with respect to BSDF pdf */ - float pdf = triangle_light_pdf(kg, sd, t); - float mis_weight = power_heuristic(bsdf_pdf, pdf); + { + /* multiple importance sampling, get triangle light pdf, + * and compute weight with respect to BSDF pdf */ + float pdf = triangle_light_pdf(kg, sd, t); + float mis_weight = power_heuristic(bsdf_pdf, pdf); - return L*mis_weight; - } + return L * mis_weight; + } - return L; + return L; } /* Indirect Lamp Emission */ @@ -229,60 +235,55 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, Ray *ray, float3 *emission) { - bool hit_lamp = false; + bool hit_lamp = false; - *emission = make_float3(0.0f, 0.0f, 0.0f); + *emission = make_float3(0.0f, 0.0f, 0.0f); - for(int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { - LightSample ls; + for (int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { + LightSample ls; - if(!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls)) - continue; + if (!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls)) + continue; #ifdef __PASSES__ - /* use visibility flag to skip lights */ - if(ls.shader & SHADER_EXCLUDE_ANY) { - if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || - ((ls.shader & SHADER_EXCLUDE_GLOSSY) && - ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) || - ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || - ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) - continue; - } + /* use visibility flag to skip lights */ + if (ls.shader & SHADER_EXCLUDE_ANY) { + if (((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || + ((ls.shader & SHADER_EXCLUDE_GLOSSY) && + ((state->flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) == + (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) || + ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || + ((ls.shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) + continue; + } #endif - float3 L = direct_emissive_eval(kg, - emission_sd, - &ls, - state, - -ray->D, - ray->dD, - ls.t, - ray->time); + float3 L = direct_emissive_eval( + kg, emission_sd, &ls, state, -ray->D, ray->dD, ls.t, ray->time); #ifdef __VOLUME__ - if(state->volume_stack[0].shader != SHADER_NONE) { - /* shadow attenuation */ - Ray volume_ray = *ray; - volume_ray.t = ls.t; - float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f); - kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp); - L *= volume_tp; - } + if (state->volume_stack[0].shader != SHADER_NONE) { + /* shadow attenuation */ + Ray volume_ray = *ray; + volume_ray.t = ls.t; + float3 volume_tp = make_float3(1.0f, 1.0f, 1.0f); + kernel_volume_shadow(kg, emission_sd, state, &volume_ray, &volume_tp); + L *= volume_tp; + } #endif - if(!(state->flag & PATH_RAY_MIS_SKIP)) { - /* multiple importance sampling, get regular light pdf, - * and compute weight with respect to BSDF pdf */ - float mis_weight = power_heuristic(state->ray_pdf, ls.pdf); - L *= mis_weight; - } + if (!(state->flag & PATH_RAY_MIS_SKIP)) { + /* multiple importance sampling, get regular light pdf, + * and compute weight with respect to BSDF pdf */ + float mis_weight = power_heuristic(state->ray_pdf, ls.pdf); + L *= mis_weight; + } - *emission += L; - hit_lamp = true; - } + *emission += L; + hit_lamp = true; + } - return hit_lamp; + return hit_lamp; } /* Indirect Background */ @@ -293,55 +294,55 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, ccl_addr_space Ray *ray) { #ifdef __BACKGROUND__ - int shader = kernel_data.background.surface_shader; - - /* Use visibility flag to skip lights. */ - if(shader & SHADER_EXCLUDE_ANY) { - if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || - ((shader & SHADER_EXCLUDE_GLOSSY) && - ((state->flag & (PATH_RAY_GLOSSY|PATH_RAY_REFLECT)) == (PATH_RAY_GLOSSY|PATH_RAY_REFLECT))) || - ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || - ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) || - ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) - return make_float3(0.0f, 0.0f, 0.0f); - } - - - /* Evaluate background shader. */ - float3 L; - if(!shader_constant_emission_eval(kg, shader, &L)) { + int shader = kernel_data.background.surface_shader; + + /* Use visibility flag to skip lights. */ + if (shader & SHADER_EXCLUDE_ANY) { + if (((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || + ((shader & SHADER_EXCLUDE_GLOSSY) && + ((state->flag & (PATH_RAY_GLOSSY | PATH_RAY_REFLECT)) == + (PATH_RAY_GLOSSY | PATH_RAY_REFLECT))) || + ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || + ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)) || + ((shader & SHADER_EXCLUDE_SCATTER) && (state->flag & PATH_RAY_VOLUME_SCATTER))) + return make_float3(0.0f, 0.0f, 0.0f); + } + + /* Evaluate background shader. */ + float3 L; + if (!shader_constant_emission_eval(kg, shader, &L)) { # ifdef __SPLIT_KERNEL__ - Ray priv_ray = *ray; - shader_setup_from_background(kg, emission_sd, &priv_ray); + Ray priv_ray = *ray; + shader_setup_from_background(kg, emission_sd, &priv_ray); # else - shader_setup_from_background(kg, emission_sd, ray); + shader_setup_from_background(kg, emission_sd, ray); # endif - path_state_modify_bounce(state, true); - shader_eval_surface(kg, emission_sd, state, state->flag | PATH_RAY_EMISSION); - path_state_modify_bounce(state, false); + path_state_modify_bounce(state, true); + shader_eval_surface(kg, emission_sd, state, state->flag | PATH_RAY_EMISSION); + path_state_modify_bounce(state, false); - L = shader_background_eval(emission_sd); - } + L = shader_background_eval(emission_sd); + } - /* Background MIS weights. */ -#ifdef __BACKGROUND_MIS__ - /* Check if background light exists or if we should skip pdf. */ - int res_x = kernel_data.integrator.pdf_background_res_x; + /* Background MIS weights. */ +# ifdef __BACKGROUND_MIS__ + /* Check if background light exists or if we should skip pdf. */ + int res_x = kernel_data.integrator.pdf_background_res_x; - if(!(state->flag & PATH_RAY_MIS_SKIP) && res_x) { - /* multiple importance sampling, get background light pdf for ray - * direction, and compute weight with respect to BSDF pdf */ - float pdf = background_light_pdf(kg, ray->P, ray->D); - float mis_weight = power_heuristic(state->ray_pdf, pdf); + if (!(state->flag & PATH_RAY_MIS_SKIP) && res_x) { + /* multiple importance sampling, get background light pdf for ray + * direction, and compute weight with respect to BSDF pdf */ + float pdf = background_light_pdf(kg, ray->P, ray->D); + float mis_weight = power_heuristic(state->ray_pdf, pdf); - return L*mis_weight; - } -#endif + return L * mis_weight; + } +# endif - return L; + return L; #else - return make_float3(0.8f, 0.8f, 0.8f); + return make_float3(0.8f, 0.8f, 0.8f); #endif } diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index b5f151d8663..d20f1adf663 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -18,72 +18,82 @@ CCL_NAMESPACE_BEGIN ccl_device float4 film_map(KernelGlobals *kg, float4 irradiance, float scale) { - float exposure = kernel_data.film.exposure; - float4 result = irradiance*scale; + float exposure = kernel_data.film.exposure; + float4 result = irradiance * scale; - /* conversion to srgb */ - result.x = color_linear_to_srgb(result.x*exposure); - result.y = color_linear_to_srgb(result.y*exposure); - result.z = color_linear_to_srgb(result.z*exposure); + /* conversion to srgb */ + result.x = color_linear_to_srgb(result.x * exposure); + result.y = color_linear_to_srgb(result.y * exposure); + result.z = color_linear_to_srgb(result.z * exposure); - /* clamp since alpha might be > 1.0 due to russian roulette */ - result.w = saturate(result.w); + /* clamp since alpha might be > 1.0 due to russian roulette */ + result.w = saturate(result.w); - return result; + return result; } ccl_device uchar4 film_float_to_byte(float4 color) { - uchar4 result; + uchar4 result; - /* simple float to byte conversion */ - result.x = (uchar)(saturate(color.x)*255.0f); - result.y = (uchar)(saturate(color.y)*255.0f); - result.z = (uchar)(saturate(color.z)*255.0f); - result.w = (uchar)(saturate(color.w)*255.0f); + /* simple float to byte conversion */ + result.x = (uchar)(saturate(color.x) * 255.0f); + result.y = (uchar)(saturate(color.y) * 255.0f); + result.z = (uchar)(saturate(color.z) * 255.0f); + result.w = (uchar)(saturate(color.w) * 255.0f); - return result; + return result; } ccl_device void kernel_film_convert_to_byte(KernelGlobals *kg, - ccl_global uchar4 *rgba, ccl_global float *buffer, - float sample_scale, int x, int y, int offset, int stride) + ccl_global uchar4 *rgba, + ccl_global float *buffer, + float sample_scale, + int x, + int y, + int offset, + int stride) { - /* buffer offset */ - int index = offset + x + y*stride; + /* buffer offset */ + int index = offset + x + y * stride; - rgba += index; - buffer += index*kernel_data.film.pass_stride; + rgba += index; + buffer += index * kernel_data.film.pass_stride; - /* map colors */ - float4 irradiance = *((ccl_global float4*)buffer); - float4 float_result = film_map(kg, irradiance, sample_scale); - uchar4 byte_result = film_float_to_byte(float_result); + /* map colors */ + float4 irradiance = *((ccl_global float4 *)buffer); + float4 float_result = film_map(kg, irradiance, sample_scale); + uchar4 byte_result = film_float_to_byte(float_result); - *rgba = byte_result; + *rgba = byte_result; } ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg, - ccl_global uchar4 *rgba, ccl_global float *buffer, - float sample_scale, int x, int y, int offset, int stride) + ccl_global uchar4 *rgba, + ccl_global float *buffer, + float sample_scale, + int x, + int y, + int offset, + int stride) { - /* buffer offset */ - int index = offset + x + y*stride; + /* buffer offset */ + int index = offset + x + y * stride; - ccl_global float4 *in = (ccl_global float4*)(buffer + index*kernel_data.film.pass_stride); - ccl_global half *out = (ccl_global half*)rgba + index*4; + ccl_global float4 *in = (ccl_global float4 *)(buffer + index * kernel_data.film.pass_stride); + ccl_global half *out = (ccl_global half *)rgba + index * 4; - float exposure = kernel_data.film.exposure; + float exposure = kernel_data.film.exposure; - float4 rgba_in = *in; + float4 rgba_in = *in; - if(exposure != 1.0f) { - rgba_in.x *= exposure; - rgba_in.y *= exposure; - rgba_in.z *= exposure; - } + if (exposure != 1.0f) { + rgba_in.x *= exposure; + rgba_in.y *= exposure; + rgba_in.z *= exposure; + } - float4_store_half(out, rgba_in, sample_scale); + float4_store_half(out, rgba_in, sample_scale); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index 59f1e252d21..9dbf3b7ea2e 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -54,41 +54,41 @@ typedef struct KernelGlobals { # define KERNEL_TEX(type, name) texture<type> name; # include "kernel/kernel_textures.h" - KernelData __data; + KernelData __data; # ifdef __OSL__ - /* On the CPU, we also have the OSL globals here. Most data structures are shared - * with SVM, the difference is in the shaders and object/mesh attributes. */ - OSLGlobals *osl; - OSLShadingSystem *osl_ss; - OSLThreadData *osl_tdata; + /* On the CPU, we also have the OSL globals here. Most data structures are shared + * with SVM, the difference is in the shaders and object/mesh attributes. */ + OSLGlobals *osl; + OSLShadingSystem *osl_ss; + OSLThreadData *osl_tdata; # endif - /* **** Run-time data **** */ + /* **** Run-time data **** */ - /* Heap-allocated storage for transparent shadows intersections. */ - Intersection *transparent_shadow_intersections; + /* Heap-allocated storage for transparent shadows intersections. */ + Intersection *transparent_shadow_intersections; - /* Storage for decoupled volume steps. */ - VolumeStep *decoupled_volume_steps[2]; - int decoupled_volume_steps_index; + /* Storage for decoupled volume steps. */ + VolumeStep *decoupled_volume_steps[2]; + int decoupled_volume_steps_index; - /* A buffer for storing per-pixel coverage for Cryptomatte. */ - CoverageMap *coverage_object; - CoverageMap *coverage_material; - CoverageMap *coverage_asset; + /* A buffer for storing per-pixel coverage for Cryptomatte. */ + CoverageMap *coverage_object; + CoverageMap *coverage_material; + CoverageMap *coverage_asset; - /* split kernel */ - SplitData split_data; - SplitParams split_param_data; + /* split kernel */ + SplitData split_data; + SplitParams split_param_data; - int2 global_size; - int2 global_id; + int2 global_size; + int2 global_id; - ProfilingState profiler; + ProfilingState profiler; } KernelGlobals; -#endif /* __KERNEL_CPU__ */ +#endif /* __KERNEL_CPU__ */ /* For CUDA, constant memory textures must be globals, so we can't put them * into a struct. As a result we don't actually use this struct and use actual @@ -99,124 +99,117 @@ typedef struct KernelGlobals { __constant__ KernelData __data; typedef struct KernelGlobals { - /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */ - Intersection hits_stack[64]; + /* NOTE: Keep the size in sync with SHADOW_STACK_MAX_HITS. */ + Intersection hits_stack[64]; } KernelGlobals; # define KERNEL_TEX(type, name) const __constant__ __device__ type *name; # include "kernel/kernel_textures.h" -#endif /* __KERNEL_CUDA__ */ +#endif /* __KERNEL_CUDA__ */ /* OpenCL */ #ifdef __KERNEL_OPENCL__ -# define KERNEL_TEX(type, name) \ -typedef type name##_t; +# define KERNEL_TEX(type, name) typedef type name##_t; # include "kernel/kernel_textures.h" typedef ccl_addr_space struct KernelGlobals { - ccl_constant KernelData *data; - ccl_global char *buffers[8]; + ccl_constant KernelData *data; + ccl_global char *buffers[8]; -# define KERNEL_TEX(type, name) \ - TextureInfo name; +# define KERNEL_TEX(type, name) TextureInfo name; # include "kernel/kernel_textures.h" # ifdef __SPLIT_KERNEL__ - SplitData split_data; - SplitParams split_param_data; + SplitData split_data; + SplitParams split_param_data; # endif } KernelGlobals; -#define KERNEL_BUFFER_PARAMS \ - ccl_global char *buffer0, \ - ccl_global char *buffer1, \ - ccl_global char *buffer2, \ - ccl_global char *buffer3, \ - ccl_global char *buffer4, \ - ccl_global char *buffer5, \ - ccl_global char *buffer6, \ - ccl_global char *buffer7 +# define KERNEL_BUFFER_PARAMS \ + ccl_global char *buffer0, ccl_global char *buffer1, ccl_global char *buffer2, \ + ccl_global char *buffer3, ccl_global char *buffer4, ccl_global char *buffer5, \ + ccl_global char *buffer6, ccl_global char *buffer7 -#define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7 +# define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7 ccl_device_inline void kernel_set_buffer_pointers(KernelGlobals *kg, KERNEL_BUFFER_PARAMS) { -#ifdef __SPLIT_KERNEL__ - if(ccl_local_id(0) + ccl_local_id(1) == 0) -#endif - { - kg->buffers[0] = buffer0; - kg->buffers[1] = buffer1; - kg->buffers[2] = buffer2; - kg->buffers[3] = buffer3; - kg->buffers[4] = buffer4; - kg->buffers[5] = buffer5; - kg->buffers[6] = buffer6; - kg->buffers[7] = buffer7; - } +# ifdef __SPLIT_KERNEL__ + if (ccl_local_id(0) + ccl_local_id(1) == 0) +# endif + { + kg->buffers[0] = buffer0; + kg->buffers[1] = buffer1; + kg->buffers[2] = buffer2; + kg->buffers[3] = buffer3; + kg->buffers[4] = buffer4; + kg->buffers[5] = buffer5; + kg->buffers[6] = buffer6; + kg->buffers[7] = buffer7; + } # ifdef __SPLIT_KERNEL__ - ccl_barrier(CCL_LOCAL_MEM_FENCE); + ccl_barrier(CCL_LOCAL_MEM_FENCE); # endif } ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg) { # ifdef __SPLIT_KERNEL__ - if(ccl_local_id(0) + ccl_local_id(1) == 0) + if (ccl_local_id(0) + ccl_local_id(1) == 0) # endif - { - ccl_global TextureInfo *info = (ccl_global TextureInfo*)kg->buffers[0]; + { + ccl_global TextureInfo *info = (ccl_global TextureInfo *)kg->buffers[0]; -# define KERNEL_TEX(type, name) \ - kg->name = *(info++); +# define KERNEL_TEX(type, name) kg->name = *(info++); # include "kernel/kernel_textures.h" - } + } # ifdef __SPLIT_KERNEL__ - ccl_barrier(CCL_LOCAL_MEM_FENCE); + ccl_barrier(CCL_LOCAL_MEM_FENCE); # endif } -#endif /* __KERNEL_OPENCL__ */ +#endif /* __KERNEL_OPENCL__ */ /* Interpolated lookup table access */ ccl_device float lookup_table_read(KernelGlobals *kg, float x, int offset, int size) { - x = saturate(x)*(size-1); + x = saturate(x) * (size - 1); - int index = min(float_to_int(x), size-1); - int nindex = min(index+1, size-1); - float t = x - index; + int index = min(float_to_int(x), size - 1); + int nindex = min(index + 1, size - 1); + float t = x - index; - float data0 = kernel_tex_fetch(__lookup_table, index + offset); - if(t == 0.0f) - return data0; + float data0 = kernel_tex_fetch(__lookup_table, index + offset); + if (t == 0.0f) + return data0; - float data1 = kernel_tex_fetch(__lookup_table, nindex + offset); - return (1.0f - t)*data0 + t*data1; + float data1 = kernel_tex_fetch(__lookup_table, nindex + offset); + return (1.0f - t) * data0 + t * data1; } -ccl_device float lookup_table_read_2D(KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize) +ccl_device float lookup_table_read_2D( + KernelGlobals *kg, float x, float y, int offset, int xsize, int ysize) { - y = saturate(y)*(ysize-1); + y = saturate(y) * (ysize - 1); - int index = min(float_to_int(y), ysize-1); - int nindex = min(index+1, ysize-1); - float t = y - index; + int index = min(float_to_int(y), ysize - 1); + int nindex = min(index + 1, ysize - 1); + float t = y - index; - float data0 = lookup_table_read(kg, x, offset + xsize*index, xsize); - if(t == 0.0f) - return data0; + float data0 = lookup_table_read(kg, x, offset + xsize * index, xsize); + if (t == 0.0f) + return data0; - float data1 = lookup_table_read(kg, x, offset + xsize*nindex, xsize); - return (1.0f - t)*data0 + t*data1; + float data1 = lookup_table_read(kg, x, offset + xsize * nindex, xsize); + return (1.0f - t) * data0 + t * data1; } CCL_NAMESPACE_END -#endif /* __KERNEL_GLOBALS_H__ */ +#endif /* __KERNEL_GLOBALS_H__ */ diff --git a/intern/cycles/kernel/kernel_id_passes.h b/intern/cycles/kernel/kernel_id_passes.h index 0cd65b1f2e8..c1f4e39e5e7 100644 --- a/intern/cycles/kernel/kernel_id_passes.h +++ b/intern/cycles/kernel/kernel_id_passes.h @@ -16,78 +16,83 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, int num_slots, float id, float weight) +ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer, + int num_slots, + float id, + float weight) { - kernel_assert(id != ID_NONE); - if(weight == 0.0f) { - return; - } + kernel_assert(id != ID_NONE); + if (weight == 0.0f) { + return; + } - for(int slot = 0; slot < num_slots; slot++) { - ccl_global float2 *id_buffer = (ccl_global float2*)buffer; + for (int slot = 0; slot < num_slots; slot++) { + ccl_global float2 *id_buffer = (ccl_global float2 *)buffer; #ifdef __ATOMIC_PASS_WRITE__ - /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ - if(id_buffer[slot].x == ID_NONE) { - /* Use an atomic to claim this slot. - * If a different thread got here first, try again from this slot on. */ - float old_id = atomic_compare_and_swap_float(buffer+slot*2, ID_NONE, id); - if(old_id != ID_NONE && old_id != id) { - continue; - } - atomic_add_and_fetch_float(buffer+slot*2+1, weight); - break; - } - /* If there already is a slot for that ID, add the weight. - * If no slot was found, add it to the last. */ - else if(id_buffer[slot].x == id || slot == num_slots - 1) { - atomic_add_and_fetch_float(buffer+slot*2+1, weight); - break; - } + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if (id_buffer[slot].x == ID_NONE) { + /* Use an atomic to claim this slot. + * If a different thread got here first, try again from this slot on. */ + float old_id = atomic_compare_and_swap_float(buffer + slot * 2, ID_NONE, id); + if (old_id != ID_NONE && old_id != id) { + continue; + } + atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if (id_buffer[slot].x == id || slot == num_slots - 1) { + atomic_add_and_fetch_float(buffer + slot * 2 + 1, weight); + break; + } #else /* __ATOMIC_PASS_WRITE__ */ - /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ - if(id_buffer[slot].x == ID_NONE) { - id_buffer[slot].x = id; - id_buffer[slot].y = weight; - break; - } - /* If there already is a slot for that ID, add the weight. - * If no slot was found, add it to the last. */ - else if(id_buffer[slot].x == id || slot == num_slots - 1) { - id_buffer[slot].y += weight; - break; - } -#endif /* __ATOMIC_PASS_WRITE__ */ - } + /* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */ + if (id_buffer[slot].x == ID_NONE) { + id_buffer[slot].x = id; + id_buffer[slot].y = weight; + break; + } + /* If there already is a slot for that ID, add the weight. + * If no slot was found, add it to the last. */ + else if (id_buffer[slot].x == id || slot == num_slots - 1) { + id_buffer[slot].y += weight; + break; + } +#endif /* __ATOMIC_PASS_WRITE__ */ + } } ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots) { - ccl_global float2 *id_buffer = (ccl_global float2*)buffer; - for(int slot = 1; slot < num_slots; ++slot) { - if(id_buffer[slot].x == ID_NONE) { - return; - } - /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ - int i = slot; - while(i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { - float2 swap = id_buffer[i]; - id_buffer[i] = id_buffer[i - 1]; - id_buffer[i - 1] = swap; - --i; - } - } + ccl_global float2 *id_buffer = (ccl_global float2 *)buffer; + for (int slot = 1; slot < num_slots; ++slot) { + if (id_buffer[slot].x == ID_NONE) { + return; + } + /* Since we're dealing with a tiny number of elements, insertion sort should be fine. */ + int i = slot; + while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) { + float2 swap = id_buffer[i]; + id_buffer[i] = id_buffer[i - 1]; + id_buffer[i - 1] = swap; + --i; + } + } } #ifdef __KERNEL_GPU__ /* post-sorting for Cryptomatte */ -ccl_device void kernel_cryptomatte_post(KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride) +ccl_device void kernel_cryptomatte_post( + KernelGlobals *kg, ccl_global float *buffer, uint sample, int x, int y, int offset, int stride) { - if(sample - 1 == kernel_data.integrator.aa_samples) { - int index = offset + x + y * stride; - int pass_stride = kernel_data.film.pass_stride; - ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + kernel_data.film.pass_cryptomatte; - kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); - } + if (sample - 1 == kernel_data.integrator.aa_samples) { + int index = offset + x + y * stride; + int pass_stride = kernel_data.film.pass_stride; + ccl_global float *cryptomatte_buffer = buffer + index * pass_stride + + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + } } #endif diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index 3bde96b078c..f7270a14940 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -26,202 +26,202 @@ CCL_NAMESPACE_BEGIN ccl_device_inline bool cmj_is_pow2(int i) { - return (i > 1) && ((i & (i - 1)) == 0); + return (i > 1) && ((i & (i - 1)) == 0); } ccl_device_inline int cmj_fast_mod_pow2(int a, int b) { - return (a & (b - 1)); + return (a & (b - 1)); } /* b must be > 1 */ ccl_device_inline int cmj_fast_div_pow2(int a, int b) { - kernel_assert(b > 1); + kernel_assert(b > 1); #if defined(__KERNEL_SSE2__) # ifdef _MSC_VER - unsigned long ctz; - _BitScanForward(&ctz, b); - return a >> ctz; + unsigned long ctz; + _BitScanForward(&ctz, b); + return a >> ctz; # else - return a >> __builtin_ctz(b); + return a >> __builtin_ctz(b); # endif #elif defined(__KERNEL_CUDA__) - return a >> (__ffs(b) - 1); + return a >> (__ffs(b) - 1); #else - return a/b; + return a / b; #endif } ccl_device_inline uint cmj_w_mask(uint w) { - kernel_assert(w > 1); + kernel_assert(w > 1); #if defined(__KERNEL_SSE2__) # ifdef _MSC_VER - unsigned long leading_zero; - _BitScanReverse(&leading_zero, w); - return ((1 << (1 + leading_zero)) - 1); + unsigned long leading_zero; + _BitScanReverse(&leading_zero, w); + return ((1 << (1 + leading_zero)) - 1); # else - return ((1 << (32 - __builtin_clz(w))) - 1); + return ((1 << (32 - __builtin_clz(w))) - 1); # endif #elif defined(__KERNEL_CUDA__) - return ((1 << (32 - __clz(w))) - 1); + return ((1 << (32 - __clz(w))) - 1); #else - w |= w >> 1; - w |= w >> 2; - w |= w >> 4; - w |= w >> 8; - w |= w >> 16; + w |= w >> 1; + w |= w >> 2; + w |= w >> 4; + w |= w >> 8; + w |= w >> 16; - return w; + return w; #endif } ccl_device_inline uint cmj_permute(uint i, uint l, uint p) { - uint w = l - 1; - - if((l & w) == 0) { - /* l is a power of two (fast) */ - i ^= p; - i *= 0xe170893d; - i ^= p >> 16; - i ^= (i & w) >> 4; - i ^= p >> 8; - i *= 0x0929eb3f; - i ^= p >> 23; - i ^= (i & w) >> 1; - i *= 1 | p >> 27; - i *= 0x6935fa69; - i ^= (i & w) >> 11; - i *= 0x74dcb303; - i ^= (i & w) >> 2; - i *= 0x9e501cc3; - i ^= (i & w) >> 2; - i *= 0xc860a3df; - i &= w; - i ^= i >> 5; - - return (i + p) & w; - } - else { - /* l is not a power of two (slow) */ - w = cmj_w_mask(w); - - do { - i ^= p; - i *= 0xe170893d; - i ^= p >> 16; - i ^= (i & w) >> 4; - i ^= p >> 8; - i *= 0x0929eb3f; - i ^= p >> 23; - i ^= (i & w) >> 1; - i *= 1 | p >> 27; - i *= 0x6935fa69; - i ^= (i & w) >> 11; - i *= 0x74dcb303; - i ^= (i & w) >> 2; - i *= 0x9e501cc3; - i ^= (i & w) >> 2; - i *= 0xc860a3df; - i &= w; - i ^= i >> 5; - } while(i >= l); - - return (i + p) % l; - } + uint w = l - 1; + + if ((l & w) == 0) { + /* l is a power of two (fast) */ + i ^= p; + i *= 0xe170893d; + i ^= p >> 16; + i ^= (i & w) >> 4; + i ^= p >> 8; + i *= 0x0929eb3f; + i ^= p >> 23; + i ^= (i & w) >> 1; + i *= 1 | p >> 27; + i *= 0x6935fa69; + i ^= (i & w) >> 11; + i *= 0x74dcb303; + i ^= (i & w) >> 2; + i *= 0x9e501cc3; + i ^= (i & w) >> 2; + i *= 0xc860a3df; + i &= w; + i ^= i >> 5; + + return (i + p) & w; + } + else { + /* l is not a power of two (slow) */ + w = cmj_w_mask(w); + + do { + i ^= p; + i *= 0xe170893d; + i ^= p >> 16; + i ^= (i & w) >> 4; + i ^= p >> 8; + i *= 0x0929eb3f; + i ^= p >> 23; + i ^= (i & w) >> 1; + i *= 1 | p >> 27; + i *= 0x6935fa69; + i ^= (i & w) >> 11; + i *= 0x74dcb303; + i ^= (i & w) >> 2; + i *= 0x9e501cc3; + i ^= (i & w) >> 2; + i *= 0xc860a3df; + i &= w; + i ^= i >> 5; + } while (i >= l); + + return (i + p) % l; + } } ccl_device_inline uint cmj_hash(uint i, uint p) { - i ^= p; - i ^= i >> 17; - i ^= i >> 10; - i *= 0xb36534e5; - i ^= i >> 12; - i ^= i >> 21; - i *= 0x93fc4795; - i ^= 0xdf6e307f; - i ^= i >> 17; - i *= 1 | p >> 18; - - return i; + i ^= p; + i ^= i >> 17; + i ^= i >> 10; + i *= 0xb36534e5; + i ^= i >> 12; + i ^= i >> 21; + i *= 0x93fc4795; + i ^= 0xdf6e307f; + i ^= i >> 17; + i *= 1 | p >> 18; + + return i; } ccl_device_inline uint cmj_hash_simple(uint i, uint p) { - i = (i ^ 61) ^ p; - i += i << 3; - i ^= i >> 4; - i *= 0x27d4eb2d; - return i; + i = (i ^ 61) ^ p; + i += i << 3; + i ^= i >> 4; + i *= 0x27d4eb2d; + return i; } ccl_device_inline float cmj_randfloat(uint i, uint p) { - return cmj_hash(i, p) * (1.0f / 4294967808.0f); + return cmj_hash(i, p) * (1.0f / 4294967808.0f); } #ifdef __CMJ__ ccl_device float cmj_sample_1D(int s, int N, int p) { - kernel_assert(s < N); + kernel_assert(s < N); - uint x = cmj_permute(s, N, p * 0x68bc21eb); - float jx = cmj_randfloat(s, p * 0x967a889b); + uint x = cmj_permute(s, N, p * 0x68bc21eb); + float jx = cmj_randfloat(s, p * 0x967a889b); - float invN = 1.0f/N; - return (x + jx)*invN; + float invN = 1.0f / N; + return (x + jx) * invN; } /* TODO(sergey): Do some extra tests and consider moving to util_math.h. */ ccl_device_inline int cmj_isqrt(int value) { -#if defined(__KERNEL_CUDA__) - return float_to_int(__fsqrt_ru(value)); -#elif defined(__KERNEL_GPU__) - return float_to_int(sqrtf(value)); -#else - /* This is a work around for fast-math on CPU which might replace sqrtf() - * with am approximated version. - */ - return float_to_int(sqrtf(value) + 1e-6f); -#endif +# if defined(__KERNEL_CUDA__) + return float_to_int(__fsqrt_ru(value)); +# elif defined(__KERNEL_GPU__) + return float_to_int(sqrtf(value)); +# else + /* This is a work around for fast-math on CPU which might replace sqrtf() + * with am approximated version. + */ + return float_to_int(sqrtf(value) + 1e-6f); +# endif } ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy) { - kernel_assert(s < N); + kernel_assert(s < N); - int m = cmj_isqrt(N); - int n = (N - 1)/m + 1; - float invN = 1.0f/N; - float invm = 1.0f/m; - float invn = 1.0f/n; + int m = cmj_isqrt(N); + int n = (N - 1) / m + 1; + float invN = 1.0f / N; + float invm = 1.0f / m; + float invn = 1.0f / n; - s = cmj_permute(s, N, p * 0x51633e2d); + s = cmj_permute(s, N, p * 0x51633e2d); - int sdivm, smodm; + int sdivm, smodm; - if(cmj_is_pow2(m)) { - sdivm = cmj_fast_div_pow2(s, m); - smodm = cmj_fast_mod_pow2(s, m); - } - else { - /* Doing s*inmv gives precision issues here. */ - sdivm = s / m; - smodm = s - sdivm*m; - } + if (cmj_is_pow2(m)) { + sdivm = cmj_fast_div_pow2(s, m); + smodm = cmj_fast_mod_pow2(s, m); + } + else { + /* Doing s*inmv gives precision issues here. */ + sdivm = s / m; + smodm = s - sdivm * m; + } - uint sx = cmj_permute(smodm, m, p * 0x68bc21eb); - uint sy = cmj_permute(sdivm, n, p * 0x02e5be93); + uint sx = cmj_permute(smodm, m, p * 0x68bc21eb); + uint sy = cmj_permute(sdivm, n, p * 0x02e5be93); - float jx = cmj_randfloat(s, p * 0x967a889b); - float jy = cmj_randfloat(s, p * 0x368cc8b7); + float jx = cmj_randfloat(s, p * 0x967a889b); + float jy = cmj_randfloat(s, p * 0x368cc8b7); - *fx = (sx + (sy + jx)*invn)*invm; - *fy = (s + jy)*invN; + *fx = (sx + (sy + jx) * invn) * invm; + *fy = (s + jy) * invN; } #endif diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 262d7df1364..5e24f8dedaf 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -19,18 +19,18 @@ CCL_NAMESPACE_BEGIN /* Light Sample result */ typedef struct LightSample { - float3 P; /* position on light, or direction for distant light */ - float3 Ng; /* normal on light */ - float3 D; /* direction from shading point to light */ - float t; /* distance to light (FLT_MAX for distant light) */ - float u, v; /* parametric coordinate on primitive */ - float pdf; /* light sampling probability density function */ - float eval_fac; /* intensity multiplier */ - int object; /* object id for triangle/curve lights */ - int prim; /* primitive id for triangle/curve lights */ - int shader; /* shader id */ - int lamp; /* lamp id */ - LightType type; /* type of light */ + float3 P; /* position on light, or direction for distant light */ + float3 Ng; /* normal on light */ + float3 D; /* direction from shading point to light */ + float t; /* distance to light (FLT_MAX for distant light) */ + float u, v; /* parametric coordinate on primitive */ + float pdf; /* light sampling probability density function */ + float eval_fac; /* intensity multiplier */ + int object; /* object id for triangle/curve lights */ + int prim; /* primitive id for triangle/curve lights */ + int shader; /* shader id */ + int lamp; /* lamp id */ + LightType type; /* type of light */ } LightSample; /* Area light sampling */ @@ -46,130 +46,136 @@ typedef struct LightSample { */ ccl_device_inline float rect_light_sample(float3 P, float3 *light_p, - float3 axisu, float3 axisv, - float randu, float randv, + float3 axisu, + float3 axisv, + float randu, + float randv, bool sample_coord) { - /* In our name system we're using P for the center, - * which is o in the paper. - */ - - float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f; - float axisu_len, axisv_len; - /* Compute local reference system R. */ - float3 x = normalize_len(axisu, &axisu_len); - float3 y = normalize_len(axisv, &axisv_len); - float3 z = cross(x, y); - /* Compute rectangle coords in local reference system. */ - float3 dir = corner - P; - float z0 = dot(dir, z); - /* Flip 'z' to make it point against Q. */ - if(z0 > 0.0f) { - z *= -1.0f; - z0 *= -1.0f; - } - float x0 = dot(dir, x); - float y0 = dot(dir, y); - float x1 = x0 + axisu_len; - float y1 = y0 + axisv_len; - /* Compute internal angles (gamma_i). */ - float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1); - float4 nz = make_float4(y0, x1, y1, x0) * diff; - nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz); - float g0 = safe_acosf(-nz.x * nz.y); - float g1 = safe_acosf(-nz.y * nz.z); - float g2 = safe_acosf(-nz.z * nz.w); - float g3 = safe_acosf(-nz.w * nz.x); - /* Compute predefined constants. */ - float b0 = nz.x; - float b1 = nz.z; - float b0sq = b0 * b0; - float k = M_2PI_F - g2 - g3; - /* Compute solid angle from internal angles. */ - float S = g0 + g1 - k; - - if(sample_coord) { - /* Compute cu. */ - float au = randu * S + k; - float fu = (cosf(au) * b0 - b1) / sinf(au); - float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f); - cu = clamp(cu, -1.0f, 1.0f); - /* Compute xu. */ - float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f); - xu = clamp(xu, x0, x1); - /* Compute yv. */ - float z0sq = z0 * z0; - float y0sq = y0 * y0; - float y1sq = y1 * y1; - float d = sqrtf(xu * xu + z0sq); - float h0 = y0 / sqrtf(d * d + y0sq); - float h1 = y1 / sqrtf(d * d + y1sq); - float hv = h0 + randv * (h1 - h0), hv2 = hv * hv; - float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1; - - /* Transform (xu, yv, z0) to world coords. */ - *light_p = P + xu * x + yv * y + z0 * z; - } - - /* return pdf */ - if(S != 0.0f) - return 1.0f / S; - else - return 0.0f; + /* In our name system we're using P for the center, + * which is o in the paper. + */ + + float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f; + float axisu_len, axisv_len; + /* Compute local reference system R. */ + float3 x = normalize_len(axisu, &axisu_len); + float3 y = normalize_len(axisv, &axisv_len); + float3 z = cross(x, y); + /* Compute rectangle coords in local reference system. */ + float3 dir = corner - P; + float z0 = dot(dir, z); + /* Flip 'z' to make it point against Q. */ + if (z0 > 0.0f) { + z *= -1.0f; + z0 *= -1.0f; + } + float x0 = dot(dir, x); + float y0 = dot(dir, y); + float x1 = x0 + axisu_len; + float y1 = y0 + axisv_len; + /* Compute internal angles (gamma_i). */ + float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1); + float4 nz = make_float4(y0, x1, y1, x0) * diff; + nz = nz / sqrt(z0 * z0 * diff * diff + nz * nz); + float g0 = safe_acosf(-nz.x * nz.y); + float g1 = safe_acosf(-nz.y * nz.z); + float g2 = safe_acosf(-nz.z * nz.w); + float g3 = safe_acosf(-nz.w * nz.x); + /* Compute predefined constants. */ + float b0 = nz.x; + float b1 = nz.z; + float b0sq = b0 * b0; + float k = M_2PI_F - g2 - g3; + /* Compute solid angle from internal angles. */ + float S = g0 + g1 - k; + + if (sample_coord) { + /* Compute cu. */ + float au = randu * S + k; + float fu = (cosf(au) * b0 - b1) / sinf(au); + float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f); + cu = clamp(cu, -1.0f, 1.0f); + /* Compute xu. */ + float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f); + xu = clamp(xu, x0, x1); + /* Compute yv. */ + float z0sq = z0 * z0; + float y0sq = y0 * y0; + float y1sq = y1 * y1; + float d = sqrtf(xu * xu + z0sq); + float h0 = y0 / sqrtf(d * d + y0sq); + float h1 = y1 / sqrtf(d * d + y1sq); + float hv = h0 + randv * (h1 - h0), hv2 = hv * hv; + float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1; + + /* Transform (xu, yv, z0) to world coords. */ + *light_p = P + xu * x + yv * y + z0 * z; + } + + /* return pdf */ + if (S != 0.0f) + return 1.0f / S; + else + return 0.0f; } ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv) { - to_unit_disk(&randu, &randv); - return ru*randu + rv*randv; + to_unit_disk(&randu, &randv); + return ru * randu + rv * randv; } ccl_device float3 disk_light_sample(float3 v, float randu, float randv) { - float3 ru, rv; + float3 ru, rv; - make_orthonormals(v, &ru, &rv); + make_orthonormals(v, &ru, &rv); - return ellipse_sample(ru, rv, randu, randv); + return ellipse_sample(ru, rv, randu, randv); } ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv) { - return normalize(D + disk_light_sample(D, randu, randv)*radius); + return normalize(D + disk_light_sample(D, randu, randv) * radius); } -ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv) +ccl_device float3 +sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv) { - return disk_light_sample(normalize(P - center), randu, randv)*radius; + return disk_light_sample(normalize(P - center), randu, randv) * radius; } -ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, LightSample *ls) +ccl_device float spot_light_attenuation(float3 dir, + float spot_angle, + float spot_smooth, + LightSample *ls) { - float3 I = ls->Ng; + float3 I = ls->Ng; - float attenuation = dot(dir, I); + float attenuation = dot(dir, I); - if(attenuation <= spot_angle) { - attenuation = 0.0f; - } - else { - float t = attenuation - spot_angle; + if (attenuation <= spot_angle) { + attenuation = 0.0f; + } + else { + float t = attenuation - spot_angle; - if(t < spot_smooth && spot_smooth != 0.0f) - attenuation *= smoothstepf(t/spot_smooth); - } + if (t < spot_smooth && spot_smooth != 0.0f) + attenuation *= smoothstepf(t / spot_smooth); + } - return attenuation; + return attenuation; } ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t) { - float cos_pi = dot(Ng, I); + float cos_pi = dot(Ng, I); - if(cos_pi <= 0.0f) - return 0.0f; + if (cos_pi <= 0.0f) + return 0.0f; - return t*t/cos_pi; + return t * t / cos_pi; } /* Background Light */ @@ -180,203 +186,219 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 * devices, but we're so close to the release so better not screw things * up for CPU at least. */ -#ifdef __KERNEL_GPU__ +# ifdef __KERNEL_GPU__ ccl_device_noinline -#else +# else ccl_device -#endif -float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf) +# endif + float3 + background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf) { - /* for the following, the CDF values are actually a pair of floats, with the - * function value as X and the actual CDF as Y. The last entry's function - * value is the CDF total. */ - int res_x = kernel_data.integrator.pdf_background_res_x; - int res_y = kernel_data.integrator.pdf_background_res_y; - int cdf_width = res_x + 1; - - /* this is basically std::lower_bound as used by pbrt */ - int first = 0; - int count = res_y; - - while(count > 0) { - int step = count >> 1; - int middle = first + step; - - if(kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) { - first = middle + 1; - count -= step + 1; - } - else - count = step; - } - - int index_v = max(0, first - 1); - kernel_assert(index_v >= 0 && index_v < res_y); - - float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v); - float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1); - float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y); - - /* importance-sampled V direction */ - float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv); - float v = (index_v + dv) / res_y; - - /* this is basically std::lower_bound as used by pbrt */ - first = 0; - count = res_x; - while(count > 0) { - int step = count >> 1; - int middle = first + step; - - if(kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y < randu) { - first = middle + 1; - count -= step + 1; - } - else - count = step; - } - - int index_u = max(0, first - 1); - kernel_assert(index_u >= 0 && index_u < res_x); - - float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u); - float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u + 1); - float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x); - - /* importance-sampled U direction */ - float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu); - float u = (index_u + du) / res_x; - - /* compute pdf */ - float denom = cdf_last_u.x * cdf_last_v.x; - float sin_theta = sinf(M_PI_F * v); - - if(sin_theta == 0.0f || denom == 0.0f) - *pdf = 0.0f; - else - *pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom); - - /* compute direction */ - return equirectangular_to_direction(u, v); + /* for the following, the CDF values are actually a pair of floats, with the + * function value as X and the actual CDF as Y. The last entry's function + * value is the CDF total. */ + int res_x = kernel_data.integrator.pdf_background_res_x; + int res_y = kernel_data.integrator.pdf_background_res_y; + int cdf_width = res_x + 1; + + /* this is basically std::lower_bound as used by pbrt */ + int first = 0; + int count = res_y; + + while (count > 0) { + int step = count >> 1; + int middle = first + step; + + if (kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) { + first = middle + 1; + count -= step + 1; + } + else + count = step; + } + + int index_v = max(0, first - 1); + kernel_assert(index_v >= 0 && index_v < res_y); + + float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v); + float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1); + float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y); + + /* importance-sampled V direction */ + float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv); + float v = (index_v + dv) / res_y; + + /* this is basically std::lower_bound as used by pbrt */ + first = 0; + count = res_x; + while (count > 0) { + int step = count >> 1; + int middle = first + step; + + if (kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y < + randu) { + first = middle + 1; + count -= step + 1; + } + else + count = step; + } + + int index_u = max(0, first - 1); + kernel_assert(index_u >= 0 && index_u < res_x); + + float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, + index_v * cdf_width + index_u); + float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf, + index_v * cdf_width + index_u + 1); + float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, + index_v * cdf_width + res_x); + + /* importance-sampled U direction */ + float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu); + float u = (index_u + du) / res_x; + + /* compute pdf */ + float denom = cdf_last_u.x * cdf_last_v.x; + float sin_theta = sinf(M_PI_F * v); + + if (sin_theta == 0.0f || denom == 0.0f) + *pdf = 0.0f; + else + *pdf = (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom); + + /* compute direction */ + return equirectangular_to_direction(u, v); } /* TODO(sergey): Same as above, after the release we should consider using * 'noinline' for all devices. */ -#ifdef __KERNEL_GPU__ +# ifdef __KERNEL_GPU__ ccl_device_noinline -#else +# else ccl_device -#endif -float background_map_pdf(KernelGlobals *kg, float3 direction) +# endif + float + background_map_pdf(KernelGlobals *kg, float3 direction) { - float2 uv = direction_to_equirectangular(direction); - int res_x = kernel_data.integrator.pdf_background_res_x; - int res_y = kernel_data.integrator.pdf_background_res_y; - int cdf_width = res_x + 1; + float2 uv = direction_to_equirectangular(direction); + int res_x = kernel_data.integrator.pdf_background_res_x; + int res_y = kernel_data.integrator.pdf_background_res_y; + int cdf_width = res_x + 1; - float sin_theta = sinf(uv.y * M_PI_F); + float sin_theta = sinf(uv.y * M_PI_F); - if(sin_theta == 0.0f) - return 0.0f; + if (sin_theta == 0.0f) + return 0.0f; - int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1); - int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1); + int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1); + int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1); - /* pdfs in V direction */ - float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x); - float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y); + /* pdfs in V direction */ + float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, + index_v * cdf_width + res_x); + float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y); - float denom = cdf_last_u.x * cdf_last_v.x; + float denom = cdf_last_u.x * cdf_last_v.x; - if(denom == 0.0f) - return 0.0f; + if (denom == 0.0f) + return 0.0f; - /* pdfs in U direction */ - float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u); - float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v); + /* pdfs in U direction */ + float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, + index_v * cdf_width + index_u); + float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v); - return (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom); + return (cdf_u.x * cdf_v.x) / (M_2PI_F * M_PI_F * sin_theta * denom); } -ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals *kg, - float3 P, - int index, - float3 *lightpos, - float3 *dir) +ccl_device_inline bool background_portal_data_fetch_and_check_side( + KernelGlobals *kg, float3 P, int index, float3 *lightpos, float3 *dir) { - int portal = kernel_data.integrator.portal_offset + index; - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + int portal = kernel_data.integrator.portal_offset + index; + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); - *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]); - *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); + *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]); + *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); - /* Check whether portal is on the right side. */ - if(dot(*dir, P - *lightpos) > 1e-4f) - return true; + /* Check whether portal is on the right side. */ + if (dot(*dir, P - *lightpos) > 1e-4f) + return true; - return false; + return false; } -ccl_device_inline float background_portal_pdf(KernelGlobals *kg, - float3 P, - float3 direction, - int ignore_portal, - bool *is_possible) +ccl_device_inline float background_portal_pdf( + KernelGlobals *kg, float3 P, float3 direction, int ignore_portal, bool *is_possible) { - float portal_pdf = 0.0f; - - int num_possible = 0; - for(int p = 0; p < kernel_data.integrator.num_portals; p++) { - if(p == ignore_portal) - continue; - - float3 lightpos, dir; - if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) - continue; - - /* There's a portal that could be sampled from this position. */ - if(is_possible) { - *is_possible = true; - } - num_possible++; - - int portal = kernel_data.integrator.portal_offset + p; - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); - float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); - float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); - bool is_round = (klight->area.invarea < 0.0f); - - if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL, is_round)) - continue; - - if(is_round) { - float t; - float3 D = normalize_len(lightpos - P, &t); - portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t); - } - else { - portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false); - } - } - - if(ignore_portal >= 0) { - /* We have skipped a portal that could be sampled as well. */ - num_possible++; - } - - return (num_possible > 0)? portal_pdf / num_possible: 0.0f; + float portal_pdf = 0.0f; + + int num_possible = 0; + for (int p = 0; p < kernel_data.integrator.num_portals; p++) { + if (p == ignore_portal) + continue; + + float3 lightpos, dir; + if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) + continue; + + /* There's a portal that could be sampled from this position. */ + if (is_possible) { + *is_possible = true; + } + num_possible++; + + int portal = kernel_data.integrator.portal_offset + p; + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + float3 axisu = make_float3( + klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); + float3 axisv = make_float3( + klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); + bool is_round = (klight->area.invarea < 0.0f); + + if (!ray_quad_intersect(P, + direction, + 1e-4f, + FLT_MAX, + lightpos, + axisu, + axisv, + dir, + NULL, + NULL, + NULL, + NULL, + is_round)) + continue; + + if (is_round) { + float t; + float3 D = normalize_len(lightpos - P, &t); + portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t); + } + else { + portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false); + } + } + + if (ignore_portal >= 0) { + /* We have skipped a portal that could be sampled as well. */ + num_possible++; + } + + return (num_possible > 0) ? portal_pdf / num_possible : 0.0f; } ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P) { - int num_possible_portals = 0; - for(int p = 0; p < kernel_data.integrator.num_portals; p++) { - float3 lightpos, dir; - if(background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) - num_possible_portals++; - } - return num_possible_portals; + int num_possible_portals = 0; + for (int p = 0; p < kernel_data.integrator.num_portals; p++) { + float3 lightpos, dir; + if (background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) + num_possible_portals++; + } + return num_possible_portals; } ccl_device float3 background_portal_sample(KernelGlobals *kg, @@ -387,774 +409,754 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg, int *sampled_portal, float *pdf) { - /* Pick a portal, then re-normalize randv. */ - randv *= num_possible; - int portal = (int)randv; - randv -= portal; - - /* TODO(sergey): Some smarter way of finding portal to sample - * is welcome. - */ - for(int p = 0; p < kernel_data.integrator.num_portals; p++) { - /* Search for the sampled portal. */ - float3 lightpos, dir; - if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) - continue; - - if(portal == 0) { - /* p is the portal to be sampled. */ - int portal = kernel_data.integrator.portal_offset + p; - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); - float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); - float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); - bool is_round = (klight->area.invarea < 0.0f); - - float3 D; - if(is_round) { - lightpos += ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv); - float t; - D = normalize_len(lightpos - P, &t); - *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t); - } - else { - *pdf = rect_light_sample(P, &lightpos, - axisu, axisv, - randu, randv, - true); - D = normalize(lightpos - P); - } - - *pdf /= num_possible; - *sampled_portal = p; - return D; - } - - portal--; - } - - return make_float3(0.0f, 0.0f, 0.0f); + /* Pick a portal, then re-normalize randv. */ + randv *= num_possible; + int portal = (int)randv; + randv -= portal; + + /* TODO(sergey): Some smarter way of finding portal to sample + * is welcome. + */ + for (int p = 0; p < kernel_data.integrator.num_portals; p++) { + /* Search for the sampled portal. */ + float3 lightpos, dir; + if (!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir)) + continue; + + if (portal == 0) { + /* p is the portal to be sampled. */ + int portal = kernel_data.integrator.portal_offset + p; + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal); + float3 axisu = make_float3( + klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); + float3 axisv = make_float3( + klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); + bool is_round = (klight->area.invarea < 0.0f); + + float3 D; + if (is_round) { + lightpos += ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv); + float t; + D = normalize_len(lightpos - P, &t); + *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t); + } + else { + *pdf = rect_light_sample(P, &lightpos, axisu, axisv, randu, randv, true); + D = normalize(lightpos - P); + } + + *pdf /= num_possible; + *sampled_portal = p; + return D; + } + + portal--; + } + + return make_float3(0.0f, 0.0f, 0.0f); } -ccl_device_inline float3 background_light_sample(KernelGlobals *kg, - float3 P, - float randu, float randv, - float *pdf) +ccl_device_inline float3 +background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf) { - /* Probability of sampling portals instead of the map. */ - float portal_sampling_pdf = kernel_data.integrator.portal_pdf; - - /* Check if there are portals in the scene which we can sample. */ - if(portal_sampling_pdf > 0.0f) { - int num_portals = background_num_possible_portals(kg, P); - if(num_portals > 0) { - if(portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) { - if(portal_sampling_pdf < 1.0f) { - randu /= portal_sampling_pdf; - } - int portal; - float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf); - if(num_portals > 1) { - /* Ignore the chosen portal, its pdf is already included. */ - *pdf += background_portal_pdf(kg, P, D, portal, NULL); - } - /* We could also have sampled the map, so combine with MIS. */ - if(portal_sampling_pdf < 1.0f) { - float cdf_pdf = background_map_pdf(kg, D); - *pdf = (portal_sampling_pdf * (*pdf) - + (1.0f - portal_sampling_pdf) * cdf_pdf); - } - return D; - } - else { - /* Sample map, but with nonzero portal_sampling_pdf for MIS. */ - randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf); - } - } - else { - /* We can't sample a portal. - * Check if we can sample the map instead. - */ - if(portal_sampling_pdf == 1.0f) { - /* Use uniform as a fallback if we can't sample the map. */ - *pdf = 1.0f / M_4PI_F; - return sample_uniform_sphere(randu, randv); - } - else { - portal_sampling_pdf = 0.0f; - } - } - } - - float3 D = background_map_sample(kg, randu, randv, pdf); - /* Use MIS if portals could be sampled as well. */ - if(portal_sampling_pdf > 0.0f) { - float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL); - *pdf = (portal_sampling_pdf * portal_pdf - + (1.0f - portal_sampling_pdf) * (*pdf)); - } - return D; + /* Probability of sampling portals instead of the map. */ + float portal_sampling_pdf = kernel_data.integrator.portal_pdf; + + /* Check if there are portals in the scene which we can sample. */ + if (portal_sampling_pdf > 0.0f) { + int num_portals = background_num_possible_portals(kg, P); + if (num_portals > 0) { + if (portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) { + if (portal_sampling_pdf < 1.0f) { + randu /= portal_sampling_pdf; + } + int portal; + float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf); + if (num_portals > 1) { + /* Ignore the chosen portal, its pdf is already included. */ + *pdf += background_portal_pdf(kg, P, D, portal, NULL); + } + /* We could also have sampled the map, so combine with MIS. */ + if (portal_sampling_pdf < 1.0f) { + float cdf_pdf = background_map_pdf(kg, D); + *pdf = (portal_sampling_pdf * (*pdf) + (1.0f - portal_sampling_pdf) * cdf_pdf); + } + return D; + } + else { + /* Sample map, but with nonzero portal_sampling_pdf for MIS. */ + randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf); + } + } + else { + /* We can't sample a portal. + * Check if we can sample the map instead. + */ + if (portal_sampling_pdf == 1.0f) { + /* Use uniform as a fallback if we can't sample the map. */ + *pdf = 1.0f / M_4PI_F; + return sample_uniform_sphere(randu, randv); + } + else { + portal_sampling_pdf = 0.0f; + } + } + } + + float3 D = background_map_sample(kg, randu, randv, pdf); + /* Use MIS if portals could be sampled as well. */ + if (portal_sampling_pdf > 0.0f) { + float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL); + *pdf = (portal_sampling_pdf * portal_pdf + (1.0f - portal_sampling_pdf) * (*pdf)); + } + return D; } ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction) { - /* Probability of sampling portals instead of the map. */ - float portal_sampling_pdf = kernel_data.integrator.portal_pdf; - - float portal_pdf = 0.0f, map_pdf = 0.0f; - if(portal_sampling_pdf > 0.0f) { - /* Evaluate PDF of sampling this direction by portal sampling. */ - bool is_possible = false; - portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf; - if(!is_possible) { - /* Portal sampling is not possible here because all portals point to the wrong side. - * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */ - if(portal_sampling_pdf == 1.0f) { - return kernel_data.integrator.pdf_lights / M_4PI_F; - } - else { - /* Force map sampling. */ - portal_sampling_pdf = 0.0f; - } - } - } - if(portal_sampling_pdf < 1.0f) { - /* Evaluate PDF of sampling this direction by map sampling. */ - map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf); - } - return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights; + /* Probability of sampling portals instead of the map. */ + float portal_sampling_pdf = kernel_data.integrator.portal_pdf; + + float portal_pdf = 0.0f, map_pdf = 0.0f; + if (portal_sampling_pdf > 0.0f) { + /* Evaluate PDF of sampling this direction by portal sampling. */ + bool is_possible = false; + portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf; + if (!is_possible) { + /* Portal sampling is not possible here because all portals point to the wrong side. + * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */ + if (portal_sampling_pdf == 1.0f) { + return kernel_data.integrator.pdf_lights / M_4PI_F; + } + else { + /* Force map sampling. */ + portal_sampling_pdf = 0.0f; + } + } + } + if (portal_sampling_pdf < 1.0f) { + /* Evaluate PDF of sampling this direction by map sampling. */ + map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf); + } + return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights; } #endif /* Regular Light */ -ccl_device_inline bool lamp_light_sample(KernelGlobals *kg, - int lamp, - float randu, float randv, - float3 P, - LightSample *ls) +ccl_device_inline bool lamp_light_sample( + KernelGlobals *kg, int lamp, float randu, float randv, float3 P, LightSample *ls) { - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); - LightType type = (LightType)klight->type; - ls->type = type; - ls->shader = klight->shader_id; - ls->object = PRIM_NONE; - ls->prim = PRIM_NONE; - ls->lamp = lamp; - ls->u = randu; - ls->v = randv; - - if(type == LIGHT_DISTANT) { - /* distant light */ - float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]); - float3 D = lightD; - float radius = klight->distant.radius; - float invarea = klight->distant.invarea; - - if(radius > 0.0f) - D = distant_light_sample(D, radius, randu, randv); - - ls->P = D; - ls->Ng = D; - ls->D = -D; - ls->t = FLT_MAX; - - float costheta = dot(lightD, D); - ls->pdf = invarea/(costheta*costheta*costheta); - ls->eval_fac = ls->pdf; - } + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + LightType type = (LightType)klight->type; + ls->type = type; + ls->shader = klight->shader_id; + ls->object = PRIM_NONE; + ls->prim = PRIM_NONE; + ls->lamp = lamp; + ls->u = randu; + ls->v = randv; + + if (type == LIGHT_DISTANT) { + /* distant light */ + float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]); + float3 D = lightD; + float radius = klight->distant.radius; + float invarea = klight->distant.invarea; + + if (radius > 0.0f) + D = distant_light_sample(D, radius, randu, randv); + + ls->P = D; + ls->Ng = D; + ls->D = -D; + ls->t = FLT_MAX; + + float costheta = dot(lightD, D); + ls->pdf = invarea / (costheta * costheta * costheta); + ls->eval_fac = ls->pdf; + } #ifdef __BACKGROUND_MIS__ - else if(type == LIGHT_BACKGROUND) { - /* infinite area light (e.g. light dome or env light) */ - float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf); - - ls->P = D; - ls->Ng = D; - ls->D = -D; - ls->t = FLT_MAX; - ls->eval_fac = 1.0f; - } + else if (type == LIGHT_BACKGROUND) { + /* infinite area light (e.g. light dome or env light) */ + float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf); + + ls->P = D; + ls->Ng = D; + ls->D = -D; + ls->t = FLT_MAX; + ls->eval_fac = 1.0f; + } #endif - else { - ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]); - - if(type == LIGHT_POINT || type == LIGHT_SPOT) { - float radius = klight->spot.radius; - - if(radius > 0.0f) - /* sphere light */ - ls->P += sphere_light_sample(P, ls->P, radius, randu, randv); - - ls->D = normalize_len(ls->P - P, &ls->t); - ls->Ng = -ls->D; - - float invarea = klight->spot.invarea; - ls->eval_fac = (0.25f*M_1_PI_F)*invarea; - ls->pdf = invarea; - - if(type == LIGHT_SPOT) { - /* spot light attenuation */ - float3 dir = make_float3(klight->spot.dir[0], - klight->spot.dir[1], - klight->spot.dir[2]); - ls->eval_fac *= spot_light_attenuation(dir, - klight->spot.spot_angle, - klight->spot.spot_smooth, - ls); - if(ls->eval_fac == 0.0f) { - return false; - } - } - float2 uv = map_to_sphere(ls->Ng); - ls->u = uv.x; - ls->v = uv.y; - - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); - } - else { - /* area light */ - float3 axisu = make_float3(klight->area.axisu[0], - klight->area.axisu[1], - klight->area.axisu[2]); - float3 axisv = make_float3(klight->area.axisv[0], - klight->area.axisv[1], - klight->area.axisv[2]); - float3 D = make_float3(klight->area.dir[0], - klight->area.dir[1], - klight->area.dir[2]); - float invarea = fabsf(klight->area.invarea); - bool is_round = (klight->area.invarea < 0.0f); - - if(dot(ls->P - P, D) > 0.0f) { - return false; - } - - float3 inplane; - - if(is_round) { - inplane = ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv); - ls->P += inplane; - ls->pdf = invarea; - } - else { - inplane = ls->P; - ls->pdf = rect_light_sample(P, &ls->P, - axisu, axisv, - randu, randv, - true); - inplane = ls->P - inplane; - } - - ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f; - ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f; - - ls->Ng = D; - ls->D = normalize_len(ls->P - P, &ls->t); - - ls->eval_fac = 0.25f*invarea; - if(is_round) { - ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t); - } - } - } - - ls->pdf *= kernel_data.integrator.pdf_lights; - - return (ls->pdf > 0.0f); + else { + ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]); + + if (type == LIGHT_POINT || type == LIGHT_SPOT) { + float radius = klight->spot.radius; + + if (radius > 0.0f) + /* sphere light */ + ls->P += sphere_light_sample(P, ls->P, radius, randu, randv); + + ls->D = normalize_len(ls->P - P, &ls->t); + ls->Ng = -ls->D; + + float invarea = klight->spot.invarea; + ls->eval_fac = (0.25f * M_1_PI_F) * invarea; + ls->pdf = invarea; + + if (type == LIGHT_SPOT) { + /* spot light attenuation */ + float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); + ls->eval_fac *= spot_light_attenuation( + dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls); + if (ls->eval_fac == 0.0f) { + return false; + } + } + float2 uv = map_to_sphere(ls->Ng); + ls->u = uv.x; + ls->v = uv.y; + + ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); + } + else { + /* area light */ + float3 axisu = make_float3( + klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); + float3 axisv = make_float3( + klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); + float3 D = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); + float invarea = fabsf(klight->area.invarea); + bool is_round = (klight->area.invarea < 0.0f); + + if (dot(ls->P - P, D) > 0.0f) { + return false; + } + + float3 inplane; + + if (is_round) { + inplane = ellipse_sample(axisu * 0.5f, axisv * 0.5f, randu, randv); + ls->P += inplane; + ls->pdf = invarea; + } + else { + inplane = ls->P; + ls->pdf = rect_light_sample(P, &ls->P, axisu, axisv, randu, randv, true); + inplane = ls->P - inplane; + } + + ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f; + ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f; + + ls->Ng = D; + ls->D = normalize_len(ls->P - P, &ls->t); + + ls->eval_fac = 0.25f * invarea; + if (is_round) { + ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t); + } + } + } + + ls->pdf *= kernel_data.integrator.pdf_lights; + + return (ls->pdf > 0.0f); } -ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls) +ccl_device bool lamp_light_eval( + KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls) { - const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); - LightType type = (LightType)klight->type; - ls->type = type; - ls->shader = klight->shader_id; - ls->object = PRIM_NONE; - ls->prim = PRIM_NONE; - ls->lamp = lamp; - /* todo: missing texture coordinates */ - ls->u = 0.0f; - ls->v = 0.0f; - - if(!(ls->shader & SHADER_USE_MIS)) - return false; - - if(type == LIGHT_DISTANT) { - /* distant light */ - float radius = klight->distant.radius; - - if(radius == 0.0f) - return false; - if(t != FLT_MAX) - return false; - - /* a distant light is infinitely far away, but equivalent to a disk - * shaped light exactly 1 unit away from the current shading point. - * - * radius t^2/cos(theta) - * <----------> t = sqrt(1^2 + tan(theta)^2) - * tan(th) area = radius*radius*pi - * <-----> - * \ | (1 + tan(theta)^2)/cos(theta) - * \ | (1 + tan(acos(cos(theta)))^2)/cos(theta) - * t \th| 1 simplifies to - * \-| 1/(cos(theta)^3) - * \| magic! - * P - */ - - float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]); - float costheta = dot(-lightD, D); - float cosangle = klight->distant.cosangle; - - if(costheta < cosangle) - return false; - - ls->P = -D; - ls->Ng = -D; - ls->D = D; - ls->t = FLT_MAX; - - /* compute pdf */ - float invarea = klight->distant.invarea; - ls->pdf = invarea/(costheta*costheta*costheta); - ls->eval_fac = ls->pdf; - } - else if(type == LIGHT_POINT || type == LIGHT_SPOT) { - float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); - - float radius = klight->spot.radius; - - /* sphere light */ - if(radius == 0.0f) - return false; - - if(!ray_aligned_disk_intersect(P, D, t, - lightP, radius, &ls->P, &ls->t)) - { - return false; - } - - ls->Ng = -D; - ls->D = D; - - float invarea = klight->spot.invarea; - ls->eval_fac = (0.25f*M_1_PI_F)*invarea; - ls->pdf = invarea; - - if(type == LIGHT_SPOT) { - /* spot light attenuation */ - float3 dir = make_float3(klight->spot.dir[0], - klight->spot.dir[1], - klight->spot.dir[2]); - ls->eval_fac *= spot_light_attenuation(dir, - klight->spot.spot_angle, - klight->spot.spot_smooth, - ls); - - if(ls->eval_fac == 0.0f) - return false; - } - float2 uv = map_to_sphere(ls->Ng); - ls->u = uv.x; - ls->v = uv.y; - - /* compute pdf */ - if(ls->t != FLT_MAX) - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); - } - else if(type == LIGHT_AREA) { - /* area light */ - float invarea = fabsf(klight->area.invarea); - bool is_round = (klight->area.invarea < 0.0f); - if(invarea == 0.0f) - return false; - - float3 axisu = make_float3(klight->area.axisu[0], - klight->area.axisu[1], - klight->area.axisu[2]); - float3 axisv = make_float3(klight->area.axisv[0], - klight->area.axisv[1], - klight->area.axisv[2]); - float3 Ng = make_float3(klight->area.dir[0], - klight->area.dir[1], - klight->area.dir[2]); - - /* one sided */ - if(dot(D, Ng) >= 0.0f) - return false; - - float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]); - - if(!ray_quad_intersect(P, D, 0.0f, t, light_P, - axisu, axisv, Ng, - &ls->P, &ls->t, - &ls->u, &ls->v, - is_round)) - { - return false; - } - - ls->D = D; - ls->Ng = Ng; - if(is_round) { - ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t); - } - else { - ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false); - } - ls->eval_fac = 0.25f*invarea; - } - else { - return false; - } - - ls->pdf *= kernel_data.integrator.pdf_lights; - - return true; + const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp); + LightType type = (LightType)klight->type; + ls->type = type; + ls->shader = klight->shader_id; + ls->object = PRIM_NONE; + ls->prim = PRIM_NONE; + ls->lamp = lamp; + /* todo: missing texture coordinates */ + ls->u = 0.0f; + ls->v = 0.0f; + + if (!(ls->shader & SHADER_USE_MIS)) + return false; + + if (type == LIGHT_DISTANT) { + /* distant light */ + float radius = klight->distant.radius; + + if (radius == 0.0f) + return false; + if (t != FLT_MAX) + return false; + + /* a distant light is infinitely far away, but equivalent to a disk + * shaped light exactly 1 unit away from the current shading point. + * + * radius t^2/cos(theta) + * <----------> t = sqrt(1^2 + tan(theta)^2) + * tan(th) area = radius*radius*pi + * <-----> + * \ | (1 + tan(theta)^2)/cos(theta) + * \ | (1 + tan(acos(cos(theta)))^2)/cos(theta) + * t \th| 1 simplifies to + * \-| 1/(cos(theta)^3) + * \| magic! + * P + */ + + float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]); + float costheta = dot(-lightD, D); + float cosangle = klight->distant.cosangle; + + if (costheta < cosangle) + return false; + + ls->P = -D; + ls->Ng = -D; + ls->D = D; + ls->t = FLT_MAX; + + /* compute pdf */ + float invarea = klight->distant.invarea; + ls->pdf = invarea / (costheta * costheta * costheta); + ls->eval_fac = ls->pdf; + } + else if (type == LIGHT_POINT || type == LIGHT_SPOT) { + float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]); + + float radius = klight->spot.radius; + + /* sphere light */ + if (radius == 0.0f) + return false; + + if (!ray_aligned_disk_intersect(P, D, t, lightP, radius, &ls->P, &ls->t)) { + return false; + } + + ls->Ng = -D; + ls->D = D; + + float invarea = klight->spot.invarea; + ls->eval_fac = (0.25f * M_1_PI_F) * invarea; + ls->pdf = invarea; + + if (type == LIGHT_SPOT) { + /* spot light attenuation */ + float3 dir = make_float3(klight->spot.dir[0], klight->spot.dir[1], klight->spot.dir[2]); + ls->eval_fac *= spot_light_attenuation( + dir, klight->spot.spot_angle, klight->spot.spot_smooth, ls); + + if (ls->eval_fac == 0.0f) + return false; + } + float2 uv = map_to_sphere(ls->Ng); + ls->u = uv.x; + ls->v = uv.y; + + /* compute pdf */ + if (ls->t != FLT_MAX) + ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); + } + else if (type == LIGHT_AREA) { + /* area light */ + float invarea = fabsf(klight->area.invarea); + bool is_round = (klight->area.invarea < 0.0f); + if (invarea == 0.0f) + return false; + + float3 axisu = make_float3( + klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]); + float3 axisv = make_float3( + klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]); + float3 Ng = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]); + + /* one sided */ + if (dot(D, Ng) >= 0.0f) + return false; + + float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]); + + if (!ray_quad_intersect( + P, D, 0.0f, t, light_P, axisu, axisv, Ng, &ls->P, &ls->t, &ls->u, &ls->v, is_round)) { + return false; + } + + ls->D = D; + ls->Ng = Ng; + if (is_round) { + ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t); + } + else { + ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false); + } + ls->eval_fac = 0.25f * invarea; + } + else { + return false; + } + + ls->pdf *= kernel_data.integrator.pdf_lights; + + return true; } /* Triangle Light */ /* returns true if the triangle is has motion blur or an instancing transform applied */ -ccl_device_inline bool triangle_world_space_vertices(KernelGlobals *kg, int object, int prim, float time, float3 V[3]) +ccl_device_inline bool triangle_world_space_vertices( + KernelGlobals *kg, int object, int prim, float time, float3 V[3]) { - bool has_motion = false; - const int object_flag = kernel_tex_fetch(__object_flag, object); + bool has_motion = false; + const int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) { - motion_triangle_vertices(kg, object, prim, time, V); - has_motion = true; - } - else { - triangle_vertices(kg, prim, V); - } + if (object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) { + motion_triangle_vertices(kg, object, prim, time, V); + has_motion = true; + } + else { + triangle_vertices(kg, prim, V); + } #ifdef __INSTANCING__ - if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { # ifdef __OBJECT_MOTION__ - float object_time = (time >= 0.0f) ? time : 0.5f; - Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL); + float object_time = (time >= 0.0f) ? time : 0.5f; + Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL); # else - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); # endif - V[0] = transform_point(&tfm, V[0]); - V[1] = transform_point(&tfm, V[1]); - V[2] = transform_point(&tfm, V[2]); - has_motion = true; - } + V[0] = transform_point(&tfm, V[0]); + V[1] = transform_point(&tfm, V[1]); + V[2] = transform_point(&tfm, V[2]); + has_motion = true; + } #endif - return has_motion; + return has_motion; } -ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg, const float3 Ng, const float3 I, float t) +ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg, + const float3 Ng, + const float3 I, + float t) { - float pdf = kernel_data.integrator.pdf_triangles; - float cos_pi = fabsf(dot(Ng, I)); + float pdf = kernel_data.integrator.pdf_triangles; + float cos_pi = fabsf(dot(Ng, I)); - if(cos_pi == 0.0f) - return 0.0f; + if (cos_pi == 0.0f) + return 0.0f; - return t*t*pdf/cos_pi; + return t * t * pdf / cos_pi; } ccl_device_forceinline float triangle_light_pdf(KernelGlobals *kg, ShaderData *sd, float t) { - /* A naive heuristic to decide between costly solid angle sampling - * and simple area sampling, comparing the distance to the triangle plane - * to the length of the edges of the triangle. */ - - float3 V[3]; - bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V); - - const float3 e0 = V[1] - V[0]; - const float3 e1 = V[2] - V[0]; - const float3 e2 = V[2] - V[1]; - const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2))); - const float3 N = cross(e0, e1); - const float distance_to_plane = fabsf(dot(N, sd->I * t))/dot(N, N); - - if(longest_edge_squared > distance_to_plane*distance_to_plane) { - /* sd contains the point on the light source - * calculate Px, the point that we're shading */ - const float3 Px = sd->P + sd->I * t; - const float3 v0_p = V[0] - Px; - const float3 v1_p = V[1] - Px; - const float3 v2_p = V[2] - Px; - - const float3 u01 = safe_normalize(cross(v0_p, v1_p)); - const float3 u02 = safe_normalize(cross(v0_p, v2_p)); - const float3 u12 = safe_normalize(cross(v1_p, v2_p)); - - const float alpha = fast_acosf(dot(u02, u01)); - const float beta = fast_acosf(-dot(u01, u12)); - const float gamma = fast_acosf(dot(u02, u12)); - const float solid_angle = alpha + beta + gamma - M_PI_F; - - /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */ - if(UNLIKELY(solid_angle == 0.0f)) { - return 0.0f; - } - else { - float area = 1.0f; - if(has_motion) { - /* get the center frame vertices, this is what the PDF was calculated from */ - triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V); - area = triangle_area(V[0], V[1], V[2]); - } - else { - area = 0.5f * len(N); - } - const float pdf = area * kernel_data.integrator.pdf_triangles; - return pdf / solid_angle; - } - } - else { - float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t); - if(has_motion) { - const float area = 0.5f * len(N); - if(UNLIKELY(area == 0.0f)) { - return 0.0f; - } - /* scale the PDF. - * area = the area the sample was taken from - * area_pre = the are from which pdf_triangles was calculated from */ - triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V); - const float area_pre = triangle_area(V[0], V[1], V[2]); - pdf = pdf * area_pre / area; - } - return pdf; - } + /* A naive heuristic to decide between costly solid angle sampling + * and simple area sampling, comparing the distance to the triangle plane + * to the length of the edges of the triangle. */ + + float3 V[3]; + bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V); + + const float3 e0 = V[1] - V[0]; + const float3 e1 = V[2] - V[0]; + const float3 e2 = V[2] - V[1]; + const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2))); + const float3 N = cross(e0, e1); + const float distance_to_plane = fabsf(dot(N, sd->I * t)) / dot(N, N); + + if (longest_edge_squared > distance_to_plane * distance_to_plane) { + /* sd contains the point on the light source + * calculate Px, the point that we're shading */ + const float3 Px = sd->P + sd->I * t; + const float3 v0_p = V[0] - Px; + const float3 v1_p = V[1] - Px; + const float3 v2_p = V[2] - Px; + + const float3 u01 = safe_normalize(cross(v0_p, v1_p)); + const float3 u02 = safe_normalize(cross(v0_p, v2_p)); + const float3 u12 = safe_normalize(cross(v1_p, v2_p)); + + const float alpha = fast_acosf(dot(u02, u01)); + const float beta = fast_acosf(-dot(u01, u12)); + const float gamma = fast_acosf(dot(u02, u12)); + const float solid_angle = alpha + beta + gamma - M_PI_F; + + /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */ + if (UNLIKELY(solid_angle == 0.0f)) { + return 0.0f; + } + else { + float area = 1.0f; + if (has_motion) { + /* get the center frame vertices, this is what the PDF was calculated from */ + triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V); + area = triangle_area(V[0], V[1], V[2]); + } + else { + area = 0.5f * len(N); + } + const float pdf = area * kernel_data.integrator.pdf_triangles; + return pdf / solid_angle; + } + } + else { + float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t); + if (has_motion) { + const float area = 0.5f * len(N); + if (UNLIKELY(area == 0.0f)) { + return 0.0f; + } + /* scale the PDF. + * area = the area the sample was taken from + * area_pre = the are from which pdf_triangles was calculated from */ + triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V); + const float area_pre = triangle_area(V[0], V[1], V[2]); + pdf = pdf * area_pre / area; + } + return pdf; + } } -ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg, int prim, int object, - float randu, float randv, float time, LightSample *ls, const float3 P) +ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg, + int prim, + int object, + float randu, + float randv, + float time, + LightSample *ls, + const float3 P) { - /* A naive heuristic to decide between costly solid angle sampling - * and simple area sampling, comparing the distance to the triangle plane - * to the length of the edges of the triangle. */ - - float3 V[3]; - bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V); - - const float3 e0 = V[1] - V[0]; - const float3 e1 = V[2] - V[0]; - const float3 e2 = V[2] - V[1]; - const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2))); - const float3 N0 = cross(e0, e1); - float Nl = 0.0f; - ls->Ng = safe_normalize_len(N0, &Nl); - float area = 0.5f * Nl; - - /* flip normal if necessary */ - const int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - ls->Ng = -ls->Ng; - } - ls->eval_fac = 1.0f; - ls->shader = kernel_tex_fetch(__tri_shader, prim); - ls->object = object; - ls->prim = prim; - ls->lamp = LAMP_NONE; - ls->shader |= SHADER_USE_MIS; - ls->type = LIGHT_TRIANGLE; - - float distance_to_plane = fabsf(dot(N0, V[0] - P)/dot(N0, N0)); - - if(longest_edge_squared > distance_to_plane*distance_to_plane) { - /* see James Arvo, "Stratified Sampling of Spherical Triangles" - * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */ - - /* project the triangle to the unit sphere - * and calculate its edges and angles */ - const float3 v0_p = V[0] - P; - const float3 v1_p = V[1] - P; - const float3 v2_p = V[2] - P; - - const float3 u01 = safe_normalize(cross(v0_p, v1_p)); - const float3 u02 = safe_normalize(cross(v0_p, v2_p)); - const float3 u12 = safe_normalize(cross(v1_p, v2_p)); - - const float3 A = safe_normalize(v0_p); - const float3 B = safe_normalize(v1_p); - const float3 C = safe_normalize(v2_p); - - const float cos_alpha = dot(u02, u01); - const float cos_beta = -dot(u01, u12); - const float cos_gamma = dot(u02, u12); - - /* calculate dihedral angles */ - const float alpha = fast_acosf(cos_alpha); - const float beta = fast_acosf(cos_beta); - const float gamma = fast_acosf(cos_gamma); - /* the area of the unit spherical triangle = solid angle */ - const float solid_angle = alpha + beta + gamma - M_PI_F; - - /* precompute a few things - * these could be re-used to take several samples - * as they are independent of randu/randv */ - const float cos_c = dot(A, B); - const float sin_alpha = fast_sinf(alpha); - const float product = sin_alpha * cos_c; - - /* Select a random sub-area of the spherical triangle - * and calculate the third vertex C_ of that new triangle */ - const float phi = randu * solid_angle - alpha; - float s, t; - fast_sincosf(phi, &s, &t); - const float u = t - cos_alpha; - const float v = s + product; - - const float3 U = safe_normalize(C - dot(C, A) * A); - - float q = 1.0f; - const float det = ((v * s + u * t) * sin_alpha); - if(det != 0.0f) { - q = ((v * t - u * s) * cos_alpha - v) / det; - } - const float temp = max(1.0f - q*q, 0.0f); - - const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U); - - /* Finally, select a random point along the edge of the new triangle - * That point on the spherical triangle is the sampled ray direction */ - const float z = 1.0f - randv * (1.0f - dot(C_, B)); - ls->D = z * B + safe_sqrtf(1.0f - z*z) * safe_normalize(C_ - dot(C_, B) * B); - - /* calculate intersection with the planar triangle */ - if(!ray_triangle_intersect(P, ls->D, FLT_MAX, + /* A naive heuristic to decide between costly solid angle sampling + * and simple area sampling, comparing the distance to the triangle plane + * to the length of the edges of the triangle. */ + + float3 V[3]; + bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V); + + const float3 e0 = V[1] - V[0]; + const float3 e1 = V[2] - V[0]; + const float3 e2 = V[2] - V[1]; + const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2))); + const float3 N0 = cross(e0, e1); + float Nl = 0.0f; + ls->Ng = safe_normalize_len(N0, &Nl); + float area = 0.5f * Nl; + + /* flip normal if necessary */ + const int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + ls->Ng = -ls->Ng; + } + ls->eval_fac = 1.0f; + ls->shader = kernel_tex_fetch(__tri_shader, prim); + ls->object = object; + ls->prim = prim; + ls->lamp = LAMP_NONE; + ls->shader |= SHADER_USE_MIS; + ls->type = LIGHT_TRIANGLE; + + float distance_to_plane = fabsf(dot(N0, V[0] - P) / dot(N0, N0)); + + if (longest_edge_squared > distance_to_plane * distance_to_plane) { + /* see James Arvo, "Stratified Sampling of Spherical Triangles" + * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */ + + /* project the triangle to the unit sphere + * and calculate its edges and angles */ + const float3 v0_p = V[0] - P; + const float3 v1_p = V[1] - P; + const float3 v2_p = V[2] - P; + + const float3 u01 = safe_normalize(cross(v0_p, v1_p)); + const float3 u02 = safe_normalize(cross(v0_p, v2_p)); + const float3 u12 = safe_normalize(cross(v1_p, v2_p)); + + const float3 A = safe_normalize(v0_p); + const float3 B = safe_normalize(v1_p); + const float3 C = safe_normalize(v2_p); + + const float cos_alpha = dot(u02, u01); + const float cos_beta = -dot(u01, u12); + const float cos_gamma = dot(u02, u12); + + /* calculate dihedral angles */ + const float alpha = fast_acosf(cos_alpha); + const float beta = fast_acosf(cos_beta); + const float gamma = fast_acosf(cos_gamma); + /* the area of the unit spherical triangle = solid angle */ + const float solid_angle = alpha + beta + gamma - M_PI_F; + + /* precompute a few things + * these could be re-used to take several samples + * as they are independent of randu/randv */ + const float cos_c = dot(A, B); + const float sin_alpha = fast_sinf(alpha); + const float product = sin_alpha * cos_c; + + /* Select a random sub-area of the spherical triangle + * and calculate the third vertex C_ of that new triangle */ + const float phi = randu * solid_angle - alpha; + float s, t; + fast_sincosf(phi, &s, &t); + const float u = t - cos_alpha; + const float v = s + product; + + const float3 U = safe_normalize(C - dot(C, A) * A); + + float q = 1.0f; + const float det = ((v * s + u * t) * sin_alpha); + if (det != 0.0f) { + q = ((v * t - u * s) * cos_alpha - v) / det; + } + const float temp = max(1.0f - q * q, 0.0f); + + const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U); + + /* Finally, select a random point along the edge of the new triangle + * That point on the spherical triangle is the sampled ray direction */ + const float z = 1.0f - randv * (1.0f - dot(C_, B)); + ls->D = z * B + safe_sqrtf(1.0f - z * z) * safe_normalize(C_ - dot(C_, B) * B); + + /* calculate intersection with the planar triangle */ + if (!ray_triangle_intersect(P, + ls->D, + FLT_MAX, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - (ssef*)V, + (ssef *)V, #else - V[0], V[1], V[2], + V[0], + V[1], + V[2], #endif - &ls->u, &ls->v, &ls->t)) { - ls->pdf = 0.0f; - return; - } - - ls->P = P + ls->D * ls->t; - - /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */ - if(UNLIKELY(solid_angle == 0.0f)) { - ls->pdf = 0.0f; - return; - } - else { - if(has_motion) { - /* get the center frame vertices, this is what the PDF was calculated from */ - triangle_world_space_vertices(kg, object, prim, -1.0f, V); - area = triangle_area(V[0], V[1], V[2]); - } - const float pdf = area * kernel_data.integrator.pdf_triangles; - ls->pdf = pdf / solid_angle; - } - } - else { - /* compute random point in triangle */ - randu = sqrtf(randu); - - const float u = 1.0f - randu; - const float v = randv*randu; - const float t = 1.0f - u - v; - ls->P = u * V[0] + v * V[1] + t * V[2]; - /* compute incoming direction, distance and pdf */ - ls->D = normalize_len(ls->P - P, &ls->t); - ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t); - if(has_motion && area != 0.0f) { - /* scale the PDF. - * area = the area the sample was taken from - * area_pre = the are from which pdf_triangles was calculated from */ - triangle_world_space_vertices(kg, object, prim, -1.0f, V); - const float area_pre = triangle_area(V[0], V[1], V[2]); - ls->pdf = ls->pdf * area_pre / area; - } - ls->u = u; - ls->v = v; - } + &ls->u, + &ls->v, + &ls->t)) { + ls->pdf = 0.0f; + return; + } + + ls->P = P + ls->D * ls->t; + + /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */ + if (UNLIKELY(solid_angle == 0.0f)) { + ls->pdf = 0.0f; + return; + } + else { + if (has_motion) { + /* get the center frame vertices, this is what the PDF was calculated from */ + triangle_world_space_vertices(kg, object, prim, -1.0f, V); + area = triangle_area(V[0], V[1], V[2]); + } + const float pdf = area * kernel_data.integrator.pdf_triangles; + ls->pdf = pdf / solid_angle; + } + } + else { + /* compute random point in triangle */ + randu = sqrtf(randu); + + const float u = 1.0f - randu; + const float v = randv * randu; + const float t = 1.0f - u - v; + ls->P = u * V[0] + v * V[1] + t * V[2]; + /* compute incoming direction, distance and pdf */ + ls->D = normalize_len(ls->P - P, &ls->t); + ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t); + if (has_motion && area != 0.0f) { + /* scale the PDF. + * area = the area the sample was taken from + * area_pre = the are from which pdf_triangles was calculated from */ + triangle_world_space_vertices(kg, object, prim, -1.0f, V); + const float area_pre = triangle_area(V[0], V[1], V[2]); + ls->pdf = ls->pdf * area_pre / area; + } + ls->u = u; + ls->v = v; + } } /* Light Distribution */ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu) { - /* This is basically std::upper_bound as used by pbrt, to find a point light or - * triangle to emit from, proportional to area. a good improvement would be to - * also sample proportional to power, though it's not so well defined with - * arbitrary shaders. */ - int first = 0; - int len = kernel_data.integrator.num_distribution + 1; - float r = *randu; - - while(len > 0) { - int half_len = len >> 1; - int middle = first + half_len; - - if(r < kernel_tex_fetch(__light_distribution, middle).totarea) { - len = half_len; - } - else { - first = middle + 1; - len = len - half_len - 1; - } - } - - /* Clamping should not be needed but float rounding errors seem to - * make this fail on rare occasions. */ - int index = clamp(first-1, 0, kernel_data.integrator.num_distribution-1); - - /* Rescale to reuse random number. this helps the 2D samples within - * each area light be stratified as well. */ - float distr_min = kernel_tex_fetch(__light_distribution, index).totarea; - float distr_max = kernel_tex_fetch(__light_distribution, index+1).totarea; - *randu = (r - distr_min)/(distr_max - distr_min); - - return index; + /* This is basically std::upper_bound as used by pbrt, to find a point light or + * triangle to emit from, proportional to area. a good improvement would be to + * also sample proportional to power, though it's not so well defined with + * arbitrary shaders. */ + int first = 0; + int len = kernel_data.integrator.num_distribution + 1; + float r = *randu; + + while (len > 0) { + int half_len = len >> 1; + int middle = first + half_len; + + if (r < kernel_tex_fetch(__light_distribution, middle).totarea) { + len = half_len; + } + else { + first = middle + 1; + len = len - half_len - 1; + } + } + + /* Clamping should not be needed but float rounding errors seem to + * make this fail on rare occasions. */ + int index = clamp(first - 1, 0, kernel_data.integrator.num_distribution - 1); + + /* Rescale to reuse random number. this helps the 2D samples within + * each area light be stratified as well. */ + float distr_min = kernel_tex_fetch(__light_distribution, index).totarea; + float distr_max = kernel_tex_fetch(__light_distribution, index + 1).totarea; + *randu = (r - distr_min) / (distr_max - distr_min); + + return index; } /* Generic Light */ ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce) { - return (bounce > kernel_tex_fetch(__lights, index).max_bounces); + return (bounce > kernel_tex_fetch(__lights, index).max_bounces); } -ccl_device_noinline bool light_sample(KernelGlobals *kg, - float randu, - float randv, - float time, - float3 P, - int bounce, - LightSample *ls) +ccl_device_noinline bool light_sample( + KernelGlobals *kg, float randu, float randv, float time, float3 P, int bounce, LightSample *ls) { - /* sample index */ - int index = light_distribution_sample(kg, &randu); - - /* fetch light data */ - const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, index); - int prim = kdistribution->prim; - - if(prim >= 0) { - int object = kdistribution->mesh_light.object_id; - int shader_flag = kdistribution->mesh_light.shader_flag; - - triangle_light_sample(kg, prim, object, randu, randv, time, ls, P); - ls->shader |= shader_flag; - return (ls->pdf > 0.0f); - } - else { - int lamp = -prim-1; - - if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) { - return false; - } - - return lamp_light_sample(kg, lamp, randu, randv, P, ls); - } + /* sample index */ + int index = light_distribution_sample(kg, &randu); + + /* fetch light data */ + const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, + index); + int prim = kdistribution->prim; + + if (prim >= 0) { + int object = kdistribution->mesh_light.object_id; + int shader_flag = kdistribution->mesh_light.shader_flag; + + triangle_light_sample(kg, prim, object, randu, randv, time, ls, P); + ls->shader |= shader_flag; + return (ls->pdf > 0.0f); + } + else { + int lamp = -prim - 1; + + if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) { + return false; + } + + return lamp_light_sample(kg, lamp, randu, randv, P, ls); + } } ccl_device int light_select_num_samples(KernelGlobals *kg, int index) { - return kernel_tex_fetch(__lights, index).samples; + return kernel_tex_fetch(__lights, index).samples; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_math.h b/intern/cycles/kernel/kernel_math.h index a8a43f3ea4a..96391db7649 100644 --- a/intern/cycles/kernel/kernel_math.h +++ b/intern/cycles/kernel/kernel_math.h @@ -25,4 +25,4 @@ #include "util/util_texture.h" #include "util/util_transform.h" -#endif /* __KERNEL_MATH_H__ */ +#endif /* __KERNEL_MATH_H__ */ diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index dde93844dd3..a933be970c2 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -38,248 +38,245 @@ CCL_NAMESPACE_BEGIN /* distribute uniform xy on [0,1] over unit disk [-1,1] */ ccl_device void to_unit_disk(float *x, float *y) { - float phi = M_2PI_F * (*x); - float r = sqrtf(*y); + float phi = M_2PI_F * (*x); + float r = sqrtf(*y); - *x = r * cosf(phi); - *y = r * sinf(phi); + *x = r * cosf(phi); + *y = r * sinf(phi); } /* return an orthogonal tangent and bitangent given a normal and tangent that * may not be exactly orthogonal */ ccl_device void make_orthonormals_tangent(const float3 N, const float3 T, float3 *a, float3 *b) { - *b = normalize(cross(N, T)); - *a = cross(*b, N); + *b = normalize(cross(N, T)); + *a = cross(*b, N); } /* sample direction with cosine weighted distributed in hemisphere */ -ccl_device_inline void sample_cos_hemisphere(const float3 N, - float randu, float randv, float3 *omega_in, float *pdf) +ccl_device_inline void sample_cos_hemisphere( + const float3 N, float randu, float randv, float3 *omega_in, float *pdf) { - to_unit_disk(&randu, &randv); - float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f)); - float3 T, B; - make_orthonormals(N, &T, &B); - *omega_in = randu * T + randv * B + costheta * N; - *pdf = costheta *M_1_PI_F; + to_unit_disk(&randu, &randv); + float costheta = sqrtf(max(1.0f - randu * randu - randv * randv, 0.0f)); + float3 T, B; + make_orthonormals(N, &T, &B); + *omega_in = randu * T + randv * B + costheta * N; + *pdf = costheta * M_1_PI_F; } /* sample direction uniformly distributed in hemisphere */ -ccl_device_inline void sample_uniform_hemisphere(const float3 N, - float randu, float randv, - float3 *omega_in, float *pdf) +ccl_device_inline void sample_uniform_hemisphere( + const float3 N, float randu, float randv, float3 *omega_in, float *pdf) { - float z = randu; - float r = sqrtf(max(0.0f, 1.0f - z*z)); - float phi = M_2PI_F * randv; - float x = r * cosf(phi); - float y = r * sinf(phi); - - float3 T, B; - make_orthonormals (N, &T, &B); - *omega_in = x * T + y * B + z * N; - *pdf = 0.5f * M_1_PI_F; + float z = randu; + float r = sqrtf(max(0.0f, 1.0f - z * z)); + float phi = M_2PI_F * randv; + float x = r * cosf(phi); + float y = r * sinf(phi); + + float3 T, B; + make_orthonormals(N, &T, &B); + *omega_in = x * T + y * B + z * N; + *pdf = 0.5f * M_1_PI_F; } /* sample direction uniformly distributed in cone */ -ccl_device_inline void sample_uniform_cone(const float3 N, float angle, - float randu, float randv, - float3 *omega_in, float *pdf) +ccl_device_inline void sample_uniform_cone( + const float3 N, float angle, float randu, float randv, float3 *omega_in, float *pdf) { - float z = cosf(angle*randu); - float r = sqrtf(max(0.0f, 1.0f - z*z)); - float phi = M_2PI_F * randv; - float x = r * cosf(phi); - float y = r * sinf(phi); - - float3 T, B; - make_orthonormals (N, &T, &B); - *omega_in = x * T + y * B + z * N; - *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(angle)); + float z = cosf(angle * randu); + float r = sqrtf(max(0.0f, 1.0f - z * z)); + float phi = M_2PI_F * randv; + float x = r * cosf(phi); + float y = r * sinf(phi); + + float3 T, B; + make_orthonormals(N, &T, &B); + *omega_in = x * T + y * B + z * N; + *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(angle)); } /* sample uniform point on the surface of a sphere */ ccl_device float3 sample_uniform_sphere(float u1, float u2) { - float z = 1.0f - 2.0f*u1; - float r = sqrtf(fmaxf(0.0f, 1.0f - z*z)); - float phi = M_2PI_F*u2; - float x = r*cosf(phi); - float y = r*sinf(phi); + float z = 1.0f - 2.0f * u1; + float r = sqrtf(fmaxf(0.0f, 1.0f - z * z)); + float phi = M_2PI_F * u2; + float x = r * cosf(phi); + float y = r * sinf(phi); - return make_float3(x, y, z); + return make_float3(x, y, z); } ccl_device float balance_heuristic(float a, float b) { - return (a)/(a + b); + return (a) / (a + b); } ccl_device float balance_heuristic_3(float a, float b, float c) { - return (a)/(a + b + c); + return (a) / (a + b + c); } ccl_device float power_heuristic(float a, float b) { - return (a*a)/(a*a + b*b); + return (a * a) / (a * a + b * b); } ccl_device float power_heuristic_3(float a, float b, float c) { - return (a*a)/(a*a + b*b + c*c); + return (a * a) / (a * a + b * b + c * c); } ccl_device float max_heuristic(float a, float b) { - return (a > b)? 1.0f: 0.0f; + return (a > b) ? 1.0f : 0.0f; } /* distribute uniform xy on [0,1] over unit disk [-1,1], with concentric mapping * to better preserve stratification for some RNG sequences */ ccl_device float2 concentric_sample_disk(float u1, float u2) { - float phi, r; - float a = 2.0f*u1 - 1.0f; - float b = 2.0f*u2 - 1.0f; - - if(a == 0.0f && b == 0.0f) { - return make_float2(0.0f, 0.0f); - } - else if(a*a > b*b) { - r = a; - phi = M_PI_4_F * (b/a); - } - else { - r = b; - phi = M_PI_2_F - M_PI_4_F * (a/b); - } - - return make_float2(r*cosf(phi), r*sinf(phi)); + float phi, r; + float a = 2.0f * u1 - 1.0f; + float b = 2.0f * u2 - 1.0f; + + if (a == 0.0f && b == 0.0f) { + return make_float2(0.0f, 0.0f); + } + else if (a * a > b * b) { + r = a; + phi = M_PI_4_F * (b / a); + } + else { + r = b; + phi = M_PI_2_F - M_PI_4_F * (a / b); + } + + return make_float2(r * cosf(phi), r * sinf(phi)); } /* sample point in unit polygon with given number of corners and rotation */ ccl_device float2 regular_polygon_sample(float corners, float rotation, float u, float v) { - /* sample corner number and reuse u */ - float corner = floorf(u*corners); - u = u*corners - corner; + /* sample corner number and reuse u */ + float corner = floorf(u * corners); + u = u * corners - corner; - /* uniform sampled triangle weights */ - u = sqrtf(u); - v = v*u; - u = 1.0f - u; + /* uniform sampled triangle weights */ + u = sqrtf(u); + v = v * u; + u = 1.0f - u; - /* point in triangle */ - float angle = M_PI_F/corners; - float2 p = make_float2((u + v)*cosf(angle), (u - v)*sinf(angle)); + /* point in triangle */ + float angle = M_PI_F / corners; + float2 p = make_float2((u + v) * cosf(angle), (u - v) * sinf(angle)); - /* rotate */ - rotation += corner*2.0f*angle; + /* rotate */ + rotation += corner * 2.0f * angle; - float cr = cosf(rotation); - float sr = sinf(rotation); + float cr = cosf(rotation); + float sr = sinf(rotation); - return make_float2(cr*p.x - sr*p.y, sr*p.x + cr*p.y); + return make_float2(cr * p.x - sr * p.y, sr * p.x + cr * p.y); } ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N) { - float3 R = 2*dot(N, I)*N - I; - - /* Reflection rays may always be at least as shallow as the incoming ray. */ - float threshold = min(0.9f*dot(Ng, I), 0.01f); - if(dot(Ng, R) >= threshold) { - return N; - } - - /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane. - * The X axis is found by normalizing the component of N that's orthogonal to Ng. - * The Y axis isn't actually needed. - */ - float NdotNg = dot(N, Ng); - float3 X = normalize(N - NdotNg*Ng); - - /* Calculate N.z and N.x in the local coordinate system. - * - * The goal of this computation is to find a N' that is rotated towards Ng just enough - * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t. - * - * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. - * - * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t. - * - * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that - * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . - * - * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2). - * - * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. - * - * The only unknown here is N'.z, so we can solve for that. - * - * The equation has four solutions in general: - * - * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2)) - * We can simplify this expression a bit by grouping terms: - * - * a = I.x^2 + I.z^2 - * b = sqrt(I.x^2 * (a - t^2)) - * c = I.z*t + a - * N'.z = +-sqrt(0.5*(+-b + c)/a) - * - * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere. - */ - float Ix = dot(I, X), Iz = dot(I, Ng); - float Ix2 = sqr(Ix), Iz2 = sqr(Iz); - float a = Ix2 + Iz2; - - float b = safe_sqrtf(Ix2*(a - sqr(threshold))); - float c = Iz*threshold + a; - - /* Evaluate both solutions. - * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first. - * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */ - float fac = 0.5f/a; - float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c); - bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f)); - bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f)); - - float2 N_new; - if(valid1 && valid2) { - /* If both are possible, do the expensive reflection-based check. */ - float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2)); - float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2)); - - float R1 = 2*(N1.x*Ix + N1.y*Iz)*N1.y - Iz; - float R2 = 2*(N2.x*Ix + N2.y*Iz)*N2.y - Iz; - - valid1 = (R1 >= 1e-5f); - valid2 = (R2 >= 1e-5f); - if(valid1 && valid2) { - /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input - * (if the original reflection wasn't shallow, we would not be in this part of the function). */ - N_new = (R1 < R2)? N1 : N2; - } - else { - /* If only one reflection is valid (= positive), pick that one. */ - N_new = (R1 > R2)? N1 : N2; - } - - } - else if(valid1 || valid2) { - /* Only one solution passes the N'.z criterium, so pick that one. */ - float Nz2 = valid1? N1_z2 : N2_z2; - N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2)); - } - else { - return Ng; - } - - return N_new.x*X + N_new.y*Ng; + float3 R = 2 * dot(N, I) * N - I; + + /* Reflection rays may always be at least as shallow as the incoming ray. */ + float threshold = min(0.9f * dot(Ng, I), 0.01f); + if (dot(Ng, R) >= threshold) { + return N; + } + + /* Form coordinate system with Ng as the Z axis and N inside the X-Z-plane. + * The X axis is found by normalizing the component of N that's orthogonal to Ng. + * The Y axis isn't actually needed. + */ + float NdotNg = dot(N, Ng); + float3 X = normalize(N - NdotNg * Ng); + + /* Calculate N.z and N.x in the local coordinate system. + * + * The goal of this computation is to find a N' that is rotated towards Ng just enough + * to lift R' above the threshold (here called t), therefore dot(R', Ng) = t. + * + * According to the standard reflection equation, this means that we want dot(2*dot(N', I)*N' - I, Ng) = t. + * + * Since the Z axis of our local coordinate system is Ng, dot(x, Ng) is just x.z, so we get 2*dot(N', I)*N'.z - I.z = t. + * + * The rotation is simple to express in the coordinate system we formed - since N lies in the X-Z-plane, we know that + * N' will also lie in the X-Z-plane, so N'.y = 0 and therefore dot(N', I) = N'.x*I.x + N'.z*I.z . + * + * Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2). + * + * With these simplifications, we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t. + * + * The only unknown here is N'.z, so we can solve for that. + * + * The equation has four solutions in general: + * + * N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2)) + * We can simplify this expression a bit by grouping terms: + * + * a = I.x^2 + I.z^2 + * b = sqrt(I.x^2 * (a - t^2)) + * c = I.z*t + a + * N'.z = +-sqrt(0.5*(+-b + c)/a) + * + * Two solutions can immediately be discarded because they're negative so N' would lie in the lower hemisphere. + */ + float Ix = dot(I, X), Iz = dot(I, Ng); + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; + + float b = safe_sqrtf(Ix2 * (a - sqr(threshold))); + float c = Iz * threshold + a; + + /* Evaluate both solutions. + * In many cases one can be immediately discarded (if N'.z would be imaginary or larger than one), so check for that first. + * If no option is viable (might happen in extreme cases like N being in the wrong hemisphere), give up and return Ng. */ + float fac = 0.5f / a; + float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); + bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f)); + bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f)); + + float2 N_new; + if (valid1 && valid2) { + /* If both are possible, do the expensive reflection-based check. */ + float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2)); + float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2)); + + float R1 = 2 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz; + float R2 = 2 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz; + + valid1 = (R1 >= 1e-5f); + valid2 = (R2 >= 1e-5f); + if (valid1 && valid2) { + /* If both solutions are valid, return the one with the shallower reflection since it will be closer to the input + * (if the original reflection wasn't shallow, we would not be in this part of the function). */ + N_new = (R1 < R2) ? N1 : N2; + } + else { + /* If only one reflection is valid (= positive), pick that one. */ + N_new = (R1 > R2) ? N1 : N2; + } + } + else if (valid1 || valid2) { + /* Only one solution passes the N'.z criterium, so pick that one. */ + float Nz2 = valid1 ? N1_z2 : N2_z2; + N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2)); + } + else { + return Ng; + } + + return N_new.x * X + N_new.y * Ng; } CCL_NAMESPACE_END -#endif /* __KERNEL_MONTECARLO_CL__ */ +#endif /* __KERNEL_MONTECARLO_CL__ */ diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 08e9db05c39..462ec037ee7 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -15,7 +15,7 @@ */ #if defined(__SPLIT_KERNEL__) || defined(__KERNEL_CUDA__) -#define __ATOMIC_PASS_WRITE__ +# define __ATOMIC_PASS_WRITE__ #endif #include "kernel/kernel_id_passes.h" @@ -24,56 +24,56 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_write_pass_float(ccl_global float *buffer, float value) { - ccl_global float *buf = buffer; + ccl_global float *buf = buffer; #ifdef __ATOMIC_PASS_WRITE__ - atomic_add_and_fetch_float(buf, value); + atomic_add_and_fetch_float(buf, value); #else - *buf += value; + *buf += value; #endif } ccl_device_inline void kernel_write_pass_float3(ccl_global float *buffer, float3 value) { #ifdef __ATOMIC_PASS_WRITE__ - ccl_global float *buf_x = buffer + 0; - ccl_global float *buf_y = buffer + 1; - ccl_global float *buf_z = buffer + 2; + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; - atomic_add_and_fetch_float(buf_x, value.x); - atomic_add_and_fetch_float(buf_y, value.y); - atomic_add_and_fetch_float(buf_z, value.z); + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); #else - ccl_global float3 *buf = (ccl_global float3*)buffer; - *buf += value; + ccl_global float3 *buf = (ccl_global float3 *)buffer; + *buf += value; #endif } ccl_device_inline void kernel_write_pass_float4(ccl_global float *buffer, float4 value) { #ifdef __ATOMIC_PASS_WRITE__ - ccl_global float *buf_x = buffer + 0; - ccl_global float *buf_y = buffer + 1; - ccl_global float *buf_z = buffer + 2; - ccl_global float *buf_w = buffer + 3; - - atomic_add_and_fetch_float(buf_x, value.x); - atomic_add_and_fetch_float(buf_y, value.y); - atomic_add_and_fetch_float(buf_z, value.z); - atomic_add_and_fetch_float(buf_w, value.w); + ccl_global float *buf_x = buffer + 0; + ccl_global float *buf_y = buffer + 1; + ccl_global float *buf_z = buffer + 2; + ccl_global float *buf_w = buffer + 3; + + atomic_add_and_fetch_float(buf_x, value.x); + atomic_add_and_fetch_float(buf_y, value.y); + atomic_add_and_fetch_float(buf_z, value.z); + atomic_add_and_fetch_float(buf_w, value.w); #else - ccl_global float4 *buf = (ccl_global float4*)buffer; - *buf += value; + ccl_global float4 *buf = (ccl_global float4 *)buffer; + *buf += value; #endif } #ifdef __DENOISING_FEATURES__ ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer, float value) { - kernel_write_pass_float(buffer, value); + kernel_write_pass_float(buffer, value); - /* The online one-pass variance update that's used for the megakernel can't easily be implemented - * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ - kernel_write_pass_float(buffer+1, value*value); + /* The online one-pass variance update that's used for the megakernel can't easily be implemented + * with atomics, so for the split kernel the E[x^2] - 1/N * (E[x])^2 fallback is used. */ + kernel_write_pass_float(buffer + 1, value * value); } # ifdef __ATOMIC_PASS_WRITE__ @@ -81,36 +81,39 @@ ccl_device_inline void kernel_write_pass_float_variance(ccl_global float *buffer # else ccl_device_inline void kernel_write_pass_float3_unaligned(ccl_global float *buffer, float3 value) { - buffer[0] += value.x; - buffer[1] += value.y; - buffer[2] += value.z; + buffer[0] += value.x; + buffer[1] += value.y; + buffer[2] += value.z; } # endif ccl_device_inline void kernel_write_pass_float3_variance(ccl_global float *buffer, float3 value) { - kernel_write_pass_float3_unaligned(buffer, value); - kernel_write_pass_float3_unaligned(buffer+3, value*value); + kernel_write_pass_float3_unaligned(buffer, value); + kernel_write_pass_float3_unaligned(buffer + 3, value * value); } -ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer, - int sample, float path_total, float path_total_shaded) +ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, + ccl_global float *buffer, + int sample, + float path_total, + float path_total_shaded) { - if(kernel_data.film.pass_denoising_data == 0) - return; + if (kernel_data.film.pass_denoising_data == 0) + return; - buffer += (sample & 1)? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A; + buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A; - path_total = ensure_finite(path_total); - path_total_shaded = ensure_finite(path_total_shaded); + path_total = ensure_finite(path_total); + path_total_shaded = ensure_finite(path_total_shaded); - kernel_write_pass_float(buffer, path_total); - kernel_write_pass_float(buffer+1, path_total_shaded); + kernel_write_pass_float(buffer, path_total); + kernel_write_pass_float(buffer + 1, path_total_shaded); - float value = path_total_shaded / max(path_total, 1e-7f); - kernel_write_pass_float(buffer+2, value*value); + float value = path_total_shaded / max(path_total, 1e-7f); + kernel_write_pass_float(buffer + 2, value * value); } -#endif /* __DENOISING_FEATURES__ */ +#endif /* __DENOISING_FEATURES__ */ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, ShaderData *sd, @@ -118,52 +121,52 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, PathRadiance *L) { #ifdef __DENOISING_FEATURES__ - if(state->denoising_feature_weight == 0.0f) { - return; - } - - L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length); - - /* Skip implicitly transparent surfaces. */ - if(sd->flag & SD_HAS_ONLY_VOLUME) { - return; - } - - float3 normal = make_float3(0.0f, 0.0f, 0.0f); - float3 albedo = make_float3(0.0f, 0.0f, 0.0f); - float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; - - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) - continue; - - /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ - normal += sc->N * sc->sample_weight; - sum_weight += sc->sample_weight; - if(bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { - albedo += sc->weight; - sum_nonspecular_weight += sc->sample_weight; - } - } - - /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ - if((sum_weight == 0.0f) || (sum_nonspecular_weight*4.0f > sum_weight)) { - if(sum_weight != 0.0f) { - normal /= sum_weight; - } - L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal); - L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo); - - state->denoising_feature_weight = 0.0f; - } + if (state->denoising_feature_weight == 0.0f) { + return; + } + + L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length); + + /* Skip implicitly transparent surfaces. */ + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return; + } + + float3 normal = make_float3(0.0f, 0.0f, 0.0f); + float3 albedo = make_float3(0.0f, 0.0f, 0.0f); + float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) + continue; + + /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */ + normal += sc->N * sc->sample_weight; + sum_weight += sc->sample_weight; + if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) { + albedo += sc->weight; + sum_nonspecular_weight += sc->sample_weight; + } + } + + /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */ + if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) { + if (sum_weight != 0.0f) { + normal /= sum_weight; + } + L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal); + L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo); + + state->denoising_feature_weight = 0.0f; + } #else - (void) kg; - (void) sd; - (void) state; - (void) L; -#endif /* __DENOISING_FEATURES__ */ + (void)kg; + (void)sd; + (void)state; + (void)L; +#endif /* __DENOISING_FEATURES__ */ } #ifdef __KERNEL_DEBUG__ @@ -171,203 +174,221 @@ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L) { - int flag = kernel_data.film.pass_flag; - if(flag & PASSMASK(BVH_TRAVERSED_NODES)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes, - L->debug_data.num_bvh_traversed_nodes); - } - if(flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, - L->debug_data.num_bvh_traversed_instances); - } - if(flag & PASSMASK(BVH_INTERSECTIONS)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections, - L->debug_data.num_bvh_intersections); - } - if(flag & PASSMASK(RAY_BOUNCES)) { - kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, - L->debug_data.num_ray_bounces); - } + int flag = kernel_data.film.pass_flag; + if (flag & PASSMASK(BVH_TRAVERSED_NODES)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes, + L->debug_data.num_bvh_traversed_nodes); + } + if (flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances, + L->debug_data.num_bvh_traversed_instances); + } + if (flag & PASSMASK(BVH_INTERSECTIONS)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections, + L->debug_data.num_bvh_intersections); + } + if (flag & PASSMASK(RAY_BOUNCES)) { + kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces, + L->debug_data.num_ray_bounces); + } } -#endif /* __KERNEL_DEBUG__ */ +#endif /* __KERNEL_DEBUG__ */ #ifdef __KERNEL_CPU__ -#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name) -ccl_device_inline size_t kernel_write_id_pass_cpu(float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map) +# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \ + kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name) +ccl_device_inline size_t kernel_write_id_pass_cpu( + float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map) { - if(map) { - (*map)[id] += matte_weight; - return 0; - } -#else /* __KERNEL_CPU__ */ -#define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight) -ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, size_t depth, float id, float matte_weight) + if (map) { + (*map)[id] += matte_weight; + return 0; + } +#else /* __KERNEL_CPU__ */ +# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \ + kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight) +ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer, + size_t depth, + float id, + float matte_weight) { -#endif /* __KERNEL_CPU__ */ - kernel_write_id_slots(buffer, depth, id, matte_weight); - return depth * 2; +#endif /* __KERNEL_CPU__ */ + kernel_write_id_slots(buffer, depth, id, matte_weight); + return depth * 2; } -ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L, - ShaderData *sd, ccl_addr_space PathState *state, float3 throughput) +ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, + ccl_global float *buffer, + PathRadiance *L, + ShaderData *sd, + ccl_addr_space PathState *state, + float3 throughput) { #ifdef __PASSES__ - int path_flag = state->flag; - - if(!(path_flag & PATH_RAY_CAMERA)) - return; - - int flag = kernel_data.film.pass_flag; - int light_flag = kernel_data.film.light_pass_flag; - - if(!((flag | light_flag) & PASS_ANY)) - return; - - if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { - if(!(sd->flag & SD_TRANSPARENT) || - kernel_data.film.pass_alpha_threshold == 0.0f || - average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) - { - if(state->sample == 0) { - if(flag & PASSMASK(DEPTH)) { - float depth = camera_distance(kg, sd->P); - kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); - } - if(flag & PASSMASK(OBJECT_ID)) { - float id = object_pass_id(kg, sd->object); - kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); - } - if(flag & PASSMASK(MATERIAL_ID)) { - float id = shader_pass_id(kg, sd); - kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); - } - } - - if(flag & PASSMASK(NORMAL)) { - float3 normal = shader_bsdf_average_normal(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); - } - if(flag & PASSMASK(UV)) { - float3 uv = primitive_uv(kg, sd); - kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); - } - if(flag & PASSMASK(MOTION)) { - float4 speed = primitive_motion_vector(kg, sd); - kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); - kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); - } - - state->flag |= PATH_RAY_SINGLE_PASS_DONE; - } - } - - if(kernel_data.film.cryptomatte_passes) { - const float matte_weight = average(throughput) * (1.0f - average(shader_bsdf_transparency(kg, sd))); - if(matte_weight > 0.0f) { - ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; - if(kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { - float id = object_cryptomatte_id(kg, sd->object); - cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object); - } - if(kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { - float id = shader_cryptomatte_id(kg, sd->shader); - cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material); - } - if(kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { - float id = object_cryptomatte_asset_id(kg, sd->object); - cryptomatte_buffer += WRITE_ID_SLOT(cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset); - } - } - } - - - if(light_flag & PASSMASK_COMPONENT(DIFFUSE)) - L->color_diffuse += shader_bsdf_diffuse(kg, sd)*throughput; - if(light_flag & PASSMASK_COMPONENT(GLOSSY)) - L->color_glossy += shader_bsdf_glossy(kg, sd)*throughput; - if(light_flag & PASSMASK_COMPONENT(TRANSMISSION)) - L->color_transmission += shader_bsdf_transmission(kg, sd)*throughput; - if(light_flag & PASSMASK_COMPONENT(SUBSURFACE)) - L->color_subsurface += shader_bsdf_subsurface(kg, sd)*throughput; - - if(light_flag & PASSMASK(MIST)) { - /* bring depth into 0..1 range */ - float mist_start = kernel_data.film.mist_start; - float mist_inv_depth = kernel_data.film.mist_inv_depth; - - float depth = camera_distance(kg, sd->P); - float mist = saturate((depth - mist_start)*mist_inv_depth); - - /* falloff */ - float mist_falloff = kernel_data.film.mist_falloff; - - if(mist_falloff == 1.0f) - ; - else if(mist_falloff == 2.0f) - mist = mist*mist; - else if(mist_falloff == 0.5f) - mist = sqrtf(mist); - else - mist = powf(mist, mist_falloff); - - /* modulate by transparency */ - float3 alpha = shader_bsdf_alpha(kg, sd); - L->mist += (1.0f - mist)*average(throughput*alpha); - } + int path_flag = state->flag; + + if (!(path_flag & PATH_RAY_CAMERA)) + return; + + int flag = kernel_data.film.pass_flag; + int light_flag = kernel_data.film.light_pass_flag; + + if (!((flag | light_flag) & PASS_ANY)) + return; + + if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { + if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || + average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { + if (state->sample == 0) { + if (flag & PASSMASK(DEPTH)) { + float depth = camera_distance(kg, sd->P); + kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth); + } + if (flag & PASSMASK(OBJECT_ID)) { + float id = object_pass_id(kg, sd->object); + kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id); + } + if (flag & PASSMASK(MATERIAL_ID)) { + float id = shader_pass_id(kg, sd); + kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id); + } + } + + if (flag & PASSMASK(NORMAL)) { + float3 normal = shader_bsdf_average_normal(kg, sd); + kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal); + } + if (flag & PASSMASK(UV)) { + float3 uv = primitive_uv(kg, sd); + kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv); + } + if (flag & PASSMASK(MOTION)) { + float4 speed = primitive_motion_vector(kg, sd); + kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed); + kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f); + } + + state->flag |= PATH_RAY_SINGLE_PASS_DONE; + } + } + + if (kernel_data.film.cryptomatte_passes) { + const float matte_weight = average(throughput) * + (1.0f - average(shader_bsdf_transparency(kg, sd))); + if (matte_weight > 0.0f) { + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { + float id = object_cryptomatte_id(kg, sd->object); + cryptomatte_buffer += WRITE_ID_SLOT( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { + float id = shader_cryptomatte_id(kg, sd->shader); + cryptomatte_buffer += WRITE_ID_SLOT( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material); + } + if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { + float id = object_cryptomatte_asset_id(kg, sd->object); + cryptomatte_buffer += WRITE_ID_SLOT( + cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset); + } + } + } + + if (light_flag & PASSMASK_COMPONENT(DIFFUSE)) + L->color_diffuse += shader_bsdf_diffuse(kg, sd) * throughput; + if (light_flag & PASSMASK_COMPONENT(GLOSSY)) + L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput; + if (light_flag & PASSMASK_COMPONENT(TRANSMISSION)) + L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput; + if (light_flag & PASSMASK_COMPONENT(SUBSURFACE)) + L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput; + + if (light_flag & PASSMASK(MIST)) { + /* bring depth into 0..1 range */ + float mist_start = kernel_data.film.mist_start; + float mist_inv_depth = kernel_data.film.mist_inv_depth; + + float depth = camera_distance(kg, sd->P); + float mist = saturate((depth - mist_start) * mist_inv_depth); + + /* falloff */ + float mist_falloff = kernel_data.film.mist_falloff; + + if (mist_falloff == 1.0f) + ; + else if (mist_falloff == 2.0f) + mist = mist * mist; + else if (mist_falloff == 0.5f) + mist = sqrtf(mist); + else + mist = powf(mist, mist_falloff); + + /* modulate by transparency */ + float3 alpha = shader_bsdf_alpha(kg, sd); + L->mist += (1.0f - mist) * average(throughput * alpha); + } #endif } -ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L) +ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg, + ccl_global float *buffer, + PathRadiance *L) { #ifdef __PASSES__ - int light_flag = kernel_data.film.light_pass_flag; - - if(!kernel_data.film.use_light_pass) - return; - - if(light_flag & PASSMASK(DIFFUSE_INDIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse); - if(light_flag & PASSMASK(GLOSSY_INDIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy); - if(light_flag & PASSMASK(TRANSMISSION_INDIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, L->indirect_transmission); - if(light_flag & PASSMASK(SUBSURFACE_INDIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, L->indirect_subsurface); - if(light_flag & PASSMASK(VOLUME_INDIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter); - if(light_flag & PASSMASK(DIFFUSE_DIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse); - if(light_flag & PASSMASK(GLOSSY_DIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy); - if(light_flag & PASSMASK(TRANSMISSION_DIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, L->direct_transmission); - if(light_flag & PASSMASK(SUBSURFACE_DIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, L->direct_subsurface); - if(light_flag & PASSMASK(VOLUME_DIRECT)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter); - - if(light_flag & PASSMASK(EMISSION)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission); - if(light_flag & PASSMASK(BACKGROUND)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background); - if(light_flag & PASSMASK(AO)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao); - - if(light_flag & PASSMASK(DIFFUSE_COLOR)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse); - if(light_flag & PASSMASK(GLOSSY_COLOR)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy); - if(light_flag & PASSMASK(TRANSMISSION_COLOR)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, L->color_transmission); - if(light_flag & PASSMASK(SUBSURFACE_COLOR)) - kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface); - if(light_flag & PASSMASK(SHADOW)) { - float4 shadow = L->shadow; - shadow.w = kernel_data.film.pass_shadow_scale; - kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow); - } - if(light_flag & PASSMASK(MIST)) - kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist); + int light_flag = kernel_data.film.light_pass_flag; + + if (!kernel_data.film.use_light_pass) + return; + + if (light_flag & PASSMASK(DIFFUSE_INDIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse); + if (light_flag & PASSMASK(GLOSSY_INDIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy); + if (light_flag & PASSMASK(TRANSMISSION_INDIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect, + L->indirect_transmission); + if (light_flag & PASSMASK(SUBSURFACE_INDIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect, + L->indirect_subsurface); + if (light_flag & PASSMASK(VOLUME_INDIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter); + if (light_flag & PASSMASK(DIFFUSE_DIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse); + if (light_flag & PASSMASK(GLOSSY_DIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy); + if (light_flag & PASSMASK(TRANSMISSION_DIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct, + L->direct_transmission); + if (light_flag & PASSMASK(SUBSURFACE_DIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct, + L->direct_subsurface); + if (light_flag & PASSMASK(VOLUME_DIRECT)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter); + + if (light_flag & PASSMASK(EMISSION)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission); + if (light_flag & PASSMASK(BACKGROUND)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background); + if (light_flag & PASSMASK(AO)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao); + + if (light_flag & PASSMASK(DIFFUSE_COLOR)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse); + if (light_flag & PASSMASK(GLOSSY_COLOR)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy); + if (light_flag & PASSMASK(TRANSMISSION_COLOR)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color, + L->color_transmission); + if (light_flag & PASSMASK(SUBSURFACE_COLOR)) + kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface); + if (light_flag & PASSMASK(SHADOW)) { + float4 shadow = L->shadow; + shadow.w = kernel_data.film.pass_shadow_scale; + kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow); + } + if (light_flag & PASSMASK(MIST)) + kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist); #endif } @@ -376,60 +397,54 @@ ccl_device_inline void kernel_write_result(KernelGlobals *kg, int sample, PathRadiance *L) { - PROFILING_INIT(kg, PROFILING_WRITE_RESULT); - PROFILING_OBJECT(PRIM_NONE); + PROFILING_INIT(kg, PROFILING_WRITE_RESULT); + PROFILING_OBJECT(PRIM_NONE); - float alpha; - float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha); + float alpha; + float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha); - kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha)); + kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha)); - kernel_write_light_passes(kg, buffer, L); + kernel_write_light_passes(kg, buffer, L); #ifdef __DENOISING_FEATURES__ - if(kernel_data.film.pass_denoising_data) { + if (kernel_data.film.pass_denoising_data) { # ifdef __SHADOW_TRICKS__ - kernel_write_denoising_shadow(kg, - buffer + kernel_data.film.pass_denoising_data, - sample, - average(L->path_total), - average(L->path_total_shaded)); + kernel_write_denoising_shadow(kg, + buffer + kernel_data.film.pass_denoising_data, + sample, + average(L->path_total), + average(L->path_total_shaded)); # else - kernel_write_denoising_shadow(kg, - buffer + kernel_data.film.pass_denoising_data, - sample, - 0.0f, 0.0f); + kernel_write_denoising_shadow( + kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f); # endif - if(kernel_data.film.pass_denoising_clean) { - float3 noisy, clean; - path_radiance_split_denoising(kg, L, &noisy, &clean); - kernel_write_pass_float3_variance( - buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, - noisy); - kernel_write_pass_float3_unaligned( - buffer + kernel_data.film.pass_denoising_clean, - clean); - } - else { - kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, - ensure_finite3(L_sum)); - } - - kernel_write_pass_float3_variance( - buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, - L->denoising_normal); - kernel_write_pass_float3_variance( - buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, - L->denoising_albedo); - kernel_write_pass_float_variance( - buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, - L->denoising_depth); - } -#endif /* __DENOISING_FEATURES__ */ - + if (kernel_data.film.pass_denoising_clean) { + float3 noisy, clean; + path_radiance_split_denoising(kg, L, &noisy, &clean); + kernel_write_pass_float3_variance( + buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, noisy); + kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, clean); + } + else { + kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + + DENOISING_PASS_COLOR, + ensure_finite3(L_sum)); + } + + kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + + DENOISING_PASS_NORMAL, + L->denoising_normal); + kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data + + DENOISING_PASS_ALBEDO, + L->denoising_albedo); + kernel_write_pass_float_variance( + buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, L->denoising_depth); + } +#endif /* __DENOISING_FEATURES__ */ #ifdef __KERNEL_DEBUG__ - kernel_write_debug_passes(kg, buffer, L); + kernel_write_debug_passes(kg, buffer, L); #endif } diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index a1fc6028293..2be1b745632 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -50,309 +50,298 @@ CCL_NAMESPACE_BEGIN -ccl_device_forceinline bool kernel_path_scene_intersect( - KernelGlobals *kg, - ccl_addr_space PathState *state, - Ray *ray, - Intersection *isect, - PathRadiance *L) +ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + Intersection *isect, + PathRadiance *L) { - PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT); + PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT); - uint visibility = path_state_ray_visibility(kg, state); + uint visibility = path_state_ray_visibility(kg, state); - if(path_state_ao_bounce(kg, state)) { - visibility = PATH_RAY_SHADOW; - ray->t = kernel_data.background.ao_distance; - } + if (path_state_ao_bounce(kg, state)) { + visibility = PATH_RAY_SHADOW; + ray->t = kernel_data.background.ao_distance; + } #ifdef __HAIR__ - float difl = 0.0f, extmax = 0.0f; - uint lcg_state = 0; + float difl = 0.0f, extmax = 0.0f; + uint lcg_state = 0; - if(kernel_data.bvh.have_curves) { - if((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) { - float3 pixdiff = ray->dD.dx + ray->dD.dy; - /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ - difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; - } + if (kernel_data.bvh.have_curves) { + if ((kernel_data.cam.resolution == 1) && (state->flag & PATH_RAY_CAMERA)) { + float3 pixdiff = ray->dD.dx + ray->dD.dy; + /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/ + difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f; + } - extmax = kernel_data.curve.maximum_width; - lcg_state = lcg_state_init_addrspace(state, 0x51633e2d); - } + extmax = kernel_data.curve.maximum_width; + lcg_state = lcg_state_init_addrspace(state, 0x51633e2d); + } - bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax); + bool hit = scene_intersect(kg, *ray, visibility, isect, &lcg_state, difl, extmax); #else - bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f); -#endif /* __HAIR__ */ + bool hit = scene_intersect(kg, *ray, visibility, isect, NULL, 0.0f, 0.0f); +#endif /* __HAIR__ */ #ifdef __KERNEL_DEBUG__ - if(state->flag & PATH_RAY_CAMERA) { - L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes; - L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances; - L->debug_data.num_bvh_intersections += isect->num_intersections; - } - L->debug_data.num_ray_bounces++; -#endif /* __KERNEL_DEBUG__ */ - - return hit; + if (state->flag & PATH_RAY_CAMERA) { + L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes; + L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances; + L->debug_data.num_bvh_intersections += isect->num_intersections; + } + L->debug_data.num_ray_bounces++; +#endif /* __KERNEL_DEBUG__ */ + + return hit; } -ccl_device_forceinline void kernel_path_lamp_emission( - KernelGlobals *kg, - ccl_addr_space PathState *state, - Ray *ray, - float3 throughput, - ccl_addr_space Intersection *isect, - ShaderData *emission_sd, - PathRadiance *L) +ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + float3 throughput, + ccl_addr_space Intersection *isect, + ShaderData *emission_sd, + PathRadiance *L) { - PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION); + PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION); #ifdef __LAMP_MIS__ - if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) { - /* ray starting from previous non-transparent bounce */ - Ray light_ray; - - light_ray.P = ray->P - state->ray_t*ray->D; - state->ray_t += isect->t; - light_ray.D = ray->D; - light_ray.t = state->ray_t; - light_ray.time = ray->time; - light_ray.dD = ray->dD; - light_ray.dP = ray->dP; - - /* intersect with lamp */ - float3 emission; - - if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) - path_radiance_accum_emission(L, state, throughput, emission); - } -#endif /* __LAMP_MIS__ */ + if (kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) { + /* ray starting from previous non-transparent bounce */ + Ray light_ray; + + light_ray.P = ray->P - state->ray_t * ray->D; + state->ray_t += isect->t; + light_ray.D = ray->D; + light_ray.t = state->ray_t; + light_ray.time = ray->time; + light_ray.dD = ray->dD; + light_ray.dP = ray->dP; + + /* intersect with lamp */ + float3 emission; + + if (indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) + path_radiance_accum_emission(L, state, throughput, emission); + } +#endif /* __LAMP_MIS__ */ } -ccl_device_forceinline void kernel_path_background( - KernelGlobals *kg, - ccl_addr_space PathState *state, - ccl_addr_space Ray *ray, - float3 throughput, - ShaderData *sd, - PathRadiance *L) +ccl_device_forceinline void kernel_path_background(KernelGlobals *kg, + ccl_addr_space PathState *state, + ccl_addr_space Ray *ray, + float3 throughput, + ShaderData *sd, + PathRadiance *L) { - /* eval background shader if nothing hit */ - if(kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { - L->transparent += average(throughput); + /* eval background shader if nothing hit */ + if (kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { + L->transparent += average(throughput); #ifdef __PASSES__ - if(!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND))) -#endif /* __PASSES__ */ - return; - } + if (!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND))) +#endif /* __PASSES__ */ + return; + } - /* When using the ao bounces approximation, adjust background - * shader intensity with ao factor. */ - if(path_state_ao_bounce(kg, state)) { - throughput *= kernel_data.background.ao_bounces_factor; - } + /* When using the ao bounces approximation, adjust background + * shader intensity with ao factor. */ + if (path_state_ao_bounce(kg, state)) { + throughput *= kernel_data.background.ao_bounces_factor; + } #ifdef __BACKGROUND__ - /* sample background shader */ - float3 L_background = indirect_background(kg, sd, state, ray); - path_radiance_accum_background(L, state, throughput, L_background); -#endif /* __BACKGROUND__ */ + /* sample background shader */ + float3 L_background = indirect_background(kg, sd, state, ray); + path_radiance_accum_background(L, state, throughput, L_background); +#endif /* __BACKGROUND__ */ } #ifndef __SPLIT_KERNEL__ -#ifdef __VOLUME__ -ccl_device_forceinline VolumeIntegrateResult kernel_path_volume( - KernelGlobals *kg, - ShaderData *sd, - PathState *state, - Ray *ray, - float3 *throughput, - ccl_addr_space Intersection *isect, - bool hit, - ShaderData *emission_sd, - PathRadiance *L) +# ifdef __VOLUME__ +ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + Ray *ray, + float3 *throughput, + ccl_addr_space Intersection *isect, + bool hit, + ShaderData *emission_sd, + PathRadiance *L) { - PROFILING_INIT(kg, PROFILING_VOLUME); - - /* Sanitize volume stack. */ - if(!hit) { - kernel_volume_clean_stack(kg, state->volume_stack); - } - - if(state->volume_stack[0].shader == SHADER_NONE) { - return VOLUME_PATH_ATTENUATED; - } - - /* volume attenuation, emission, scatter */ - Ray volume_ray = *ray; - volume_ray.t = (hit)? isect->t: FLT_MAX; - - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); - -# ifdef __VOLUME_DECOUPLED__ - int sampling_method = volume_stack_sampling_method(kg, state->volume_stack); - bool direct = (state->flag & PATH_RAY_CAMERA) != 0; - bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method); - - if(decoupled) { - /* cache steps along volume for repeated sampling */ - VolumeSegment volume_segment; - - shader_setup_from_volume(kg, sd, &volume_ray); - kernel_volume_decoupled_record(kg, state, - &volume_ray, sd, &volume_segment, heterogeneous); - - volume_segment.sampling_method = sampling_method; - - /* emission */ - if(volume_segment.closure_flag & SD_EMISSION) - path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission); - - /* scattering */ - VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED; - - if(volume_segment.closure_flag & SD_SCATTER) { - int all = kernel_data.integrator.sample_all_lights_indirect; - - /* direct light sampling */ - kernel_branched_path_volume_connect_light(kg, sd, - emission_sd, *throughput, state, L, all, - &volume_ray, &volume_segment); - - /* indirect sample. if we use distance sampling and take just - * one sample for direct and indirect light, we could share - * this computation, but makes code a bit complex */ - float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); - - result = kernel_volume_decoupled_scatter(kg, - state, &volume_ray, sd, throughput, - rphase, rscatter, &volume_segment, NULL, true); - } - - /* free cached steps */ - kernel_volume_decoupled_free(kg, &volume_segment); - - if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) - return VOLUME_PATH_SCATTERED; - else - return VOLUME_PATH_MISSED; - } - else { - *throughput *= volume_segment.accum_transmittance; - } - } - else -# endif /* __VOLUME_DECOUPLED__ */ - { - /* integrate along volume segment with distance sampling */ - VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, sd, &volume_ray, L, throughput, heterogeneous); - -# ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* direct lighting */ - kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); - - /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) - return VOLUME_PATH_SCATTERED; - else - return VOLUME_PATH_MISSED; - } -# endif /* __VOLUME_SCATTER__ */ - } - - return VOLUME_PATH_ATTENUATED; + PROFILING_INIT(kg, PROFILING_VOLUME); + + /* Sanitize volume stack. */ + if (!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + + if (state->volume_stack[0].shader == SHADER_NONE) { + return VOLUME_PATH_ATTENUATED; + } + + /* volume attenuation, emission, scatter */ + Ray volume_ray = *ray; + volume_ray.t = (hit) ? isect->t : FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + +# ifdef __VOLUME_DECOUPLED__ + int sampling_method = volume_stack_sampling_method(kg, state->volume_stack); + bool direct = (state->flag & PATH_RAY_CAMERA) != 0; + bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method); + + if (decoupled) { + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; + + shader_setup_from_volume(kg, sd, &volume_ray); + kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous); + + volume_segment.sampling_method = sampling_method; + + /* emission */ + if (volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission); + + /* scattering */ + VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED; + + if (volume_segment.closure_flag & SD_SCATTER) { + int all = kernel_data.integrator.sample_all_lights_indirect; + + /* direct light sampling */ + kernel_branched_path_volume_connect_light( + kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment); + + /* indirect sample. if we use distance sampling and take just + * one sample for direct and indirect light, we could share + * this computation, but makes code a bit complex */ + float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); + float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); + + result = kernel_volume_decoupled_scatter( + kg, state, &volume_ray, sd, throughput, rphase, rscatter, &volume_segment, NULL, true); + } + + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); + + if (result == VOLUME_PATH_SCATTERED) { + if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) + return VOLUME_PATH_SCATTERED; + else + return VOLUME_PATH_MISSED; + } + else { + *throughput *= volume_segment.accum_transmittance; + } + } + else +# endif /* __VOLUME_DECOUPLED__ */ + { + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, state, sd, &volume_ray, L, throughput, heterogeneous); + +# ifdef __VOLUME_SCATTER__ + if (result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); + + /* indirect light bounce */ + if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) + return VOLUME_PATH_SCATTERED; + else + return VOLUME_PATH_MISSED; + } +# endif /* __VOLUME_SCATTER__ */ + } + + return VOLUME_PATH_ATTENUATED; } -#endif /* __VOLUME__ */ - -#endif /* __SPLIT_KERNEL__ */ - -ccl_device_forceinline bool kernel_path_shader_apply( - KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - ccl_addr_space Ray *ray, - float3 throughput, - ShaderData *emission_sd, - PathRadiance *L, - ccl_global float *buffer) +# endif /* __VOLUME__ */ + +#endif /* __SPLIT_KERNEL__ */ + +ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space PathState *state, + ccl_addr_space Ray *ray, + float3 throughput, + ShaderData *emission_sd, + PathRadiance *L, + ccl_global float *buffer) { - PROFILING_INIT(kg, PROFILING_SHADER_APPLY); + PROFILING_INIT(kg, PROFILING_SHADER_APPLY); #ifdef __SHADOW_TRICKS__ - if((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) { - if(state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) { - state->flag |= (PATH_RAY_SHADOW_CATCHER | - PATH_RAY_STORE_SHADOW_INFO); - - float3 bg = make_float3(0.0f, 0.0f, 0.0f); - if(!kernel_data.background.transparent) { - bg = indirect_background(kg, emission_sd, state, ray); - } - path_radiance_accum_shadowcatcher(L, throughput, bg); - } - } - else if(state->flag & PATH_RAY_SHADOW_CATCHER) { - /* Only update transparency after shadow catcher bounce. */ - L->shadow_transparency *= - average(shader_bsdf_transparency(kg, sd)); - } -#endif /* __SHADOW_TRICKS__ */ - - /* holdout */ + if ((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) { + if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) { + state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_STORE_SHADOW_INFO); + + float3 bg = make_float3(0.0f, 0.0f, 0.0f); + if (!kernel_data.background.transparent) { + bg = indirect_background(kg, emission_sd, state, ray); + } + path_radiance_accum_shadowcatcher(L, throughput, bg); + } + } + else if (state->flag & PATH_RAY_SHADOW_CATCHER) { + /* Only update transparency after shadow catcher bounce. */ + L->shadow_transparency *= average(shader_bsdf_transparency(kg, sd)); + } +#endif /* __SHADOW_TRICKS__ */ + + /* holdout */ #ifdef __HOLDOUT__ - if(((sd->flag & SD_HOLDOUT) || - (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && - (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) - { - if(kernel_data.background.transparent) { - float3 holdout_weight; - if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { - holdout_weight = make_float3(1.0f, 1.0f, 1.0f); - } - else { - holdout_weight = shader_holdout_eval(kg, sd); - } - /* any throughput is ok, should all be identical here */ - L->transparent += average(holdout_weight*throughput); - } - - if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { - return false; - } - } -#endif /* __HOLDOUT__ */ - - /* holdout mask objects do not write data passes */ - kernel_write_data_passes(kg, buffer, L, sd, state, throughput); - - /* blurring of bsdf after bounces, for rays that have a small likelihood - * of following this particular path (diffuse, rough glossy) */ - if(kernel_data.integrator.filter_glossy != FLT_MAX) { - float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf; - - if(blur_pdf < 1.0f) { - float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f; - shader_bsdf_blur(kg, sd, blur_roughness); - } - } + if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) && + (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) { + if (kernel_data.background.transparent) { + float3 holdout_weight; + if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { + holdout_weight = make_float3(1.0f, 1.0f, 1.0f); + } + else { + holdout_weight = shader_holdout_eval(kg, sd); + } + /* any throughput is ok, should all be identical here */ + L->transparent += average(holdout_weight * throughput); + } + + if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) { + return false; + } + } +#endif /* __HOLDOUT__ */ + + /* holdout mask objects do not write data passes */ + kernel_write_data_passes(kg, buffer, L, sd, state, throughput); + + /* blurring of bsdf after bounces, for rays that have a small likelihood + * of following this particular path (diffuse, rough glossy) */ + if (kernel_data.integrator.filter_glossy != FLT_MAX) { + float blur_pdf = kernel_data.integrator.filter_glossy * state->min_ray_pdf; + + if (blur_pdf < 1.0f) { + float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f; + shader_bsdf_blur(kg, sd, blur_roughness); + } + } #ifdef __EMISSION__ - /* emission */ - if(sd->flag & SD_EMISSION) { - float3 emission = indirect_primitive_emission(kg, sd, sd->ray_length, state->flag, state->ray_pdf); - path_radiance_accum_emission(L, state, throughput, emission); - } -#endif /* __EMISSION__ */ - - return true; + /* emission */ + if (sd->flag & SD_EMISSION) { + float3 emission = indirect_primitive_emission( + kg, sd, sd->ray_length, state->flag, state->ray_pdf); + path_radiance_accum_emission(L, state, throughput, emission); + } +#endif /* __EMISSION__ */ + + return true; } ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, @@ -363,44 +352,44 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, float3 throughput, float3 ao_alpha) { - PROFILING_INIT(kg, PROFILING_AO); - - /* todo: solve correlation */ - float bsdf_u, bsdf_v; - - path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - - float ao_factor = kernel_data.background.ao_factor; - float3 ao_N; - float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); - float3 ao_D; - float ao_pdf; - - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - - if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { - Ray light_ray; - float3 ao_shadow; - - light_ray.P = ray_offset(sd->P, sd->Ng); - light_ray.D = ao_D; - light_ray.t = kernel_data.background.ao_distance; - light_ray.time = sd->time; - light_ray.dP = sd->dP; - light_ray.dD = differential3_zero(); - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { - path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow); - } - else { - path_radiance_accum_total_ao(L, state, throughput, ao_bsdf); - } - } + PROFILING_INIT(kg, PROFILING_AO); + + /* todo: solve correlation */ + float bsdf_u, bsdf_v; + + path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + + float ao_factor = kernel_data.background.ao_factor; + float3 ao_N; + float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); + float3 ao_D; + float ao_pdf; + + sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + + if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { + Ray light_ray; + float3 ao_shadow; + + light_ray.P = ray_offset(sd->P, sd->Ng); + light_ray.D = ao_D; + light_ray.t = kernel_data.background.ao_distance; + light_ray.time = sd->time; + light_ray.dP = sd->dP; + light_ray.dD = differential3_zero(); + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { + path_radiance_accum_ao(L, state, throughput, ao_alpha, ao_bsdf, ao_shadow); + } + else { + path_radiance_accum_total_ao(L, state, throughput, ao_bsdf); + } + } } #ifndef __SPLIT_KERNEL__ -#if defined(__BRANCHED_PATH__) || defined(__BAKING__) +# if defined(__BRANCHED_PATH__) || defined(__BAKING__) ccl_device void kernel_path_indirect(KernelGlobals *kg, ShaderData *sd, @@ -410,369 +399,300 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, PathState *state, PathRadiance *L) { -#ifdef __SUBSURFACE__ - SubsurfaceIndirectRays ss_indirect; - kernel_path_subsurface_init_indirect(&ss_indirect); - - for(;;) { -#endif /* __SUBSURFACE__ */ - - /* path iteration */ - for(;;) { - /* Find intersection with objects in scene. */ - Intersection isect; - bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L); - - /* Find intersection with lamps and compute emission for MIS. */ - kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L); - -#ifdef __VOLUME__ - /* Volume integration. */ - VolumeIntegrateResult result = kernel_path_volume(kg, - sd, - state, - ray, - &throughput, - &isect, - hit, - emission_sd, - L); - - if(result == VOLUME_PATH_SCATTERED) { - continue; - } - else if(result == VOLUME_PATH_MISSED) { - break; - } -#endif /* __VOLUME__*/ - - /* Shade background. */ - if(!hit) { - kernel_path_background(kg, state, ray, throughput, sd, L); - break; - } - else if(path_state_ao_bounce(kg, state)) { - break; - } - - /* Setup shader data. */ - shader_setup_from_ray(kg, sd, &isect, ray); - - /* Skip most work for volume bounding surface. */ -#ifdef __VOLUME__ - if(!(sd->flag & SD_HAS_ONLY_VOLUME)) { -#endif - - /* Evaluate shader. */ - shader_eval_surface(kg, sd, state, state->flag); - shader_prepare_closures(sd, state); - - /* Apply shadow catcher, holdout, emission. */ - if(!kernel_path_shader_apply(kg, - sd, - state, - ray, - throughput, - emission_sd, - L, - NULL)) - { - break; - } - - /* path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate */ - float probability = path_state_continuation_probability(kg, state, throughput); - - if(probability == 0.0f) { - break; - } - else if(probability != 1.0f) { - float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); - - if(terminate >= probability) - break; - - throughput /= probability; - } - - kernel_update_denoising_features(kg, sd, state, L); - -#ifdef __AO__ - /* ambient occlusion */ - if(kernel_data.integrator.use_ambient_occlusion) { - kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f)); - } -#endif /* __AO__ */ - - -#ifdef __SUBSURFACE__ - /* bssrdf scatter to a different location on the same object, replacing - * the closures with a diffuse BSDF */ - if(sd->flag & SD_BSSRDF) { - if(kernel_path_subsurface_scatter(kg, - sd, - emission_sd, - L, - state, - ray, - &throughput, - &ss_indirect)) - { - break; - } - } -#endif /* __SUBSURFACE__ */ - -#if defined(__EMISSION__) - if(kernel_data.integrator.use_direct_light) { - int all = (kernel_data.integrator.sample_all_lights_indirect) || - (state->flag & PATH_RAY_SHADOW_CATCHER); - kernel_branched_path_surface_connect_light(kg, - sd, - emission_sd, - state, - throughput, - 1.0f, - L, - all); - } -#endif /* defined(__EMISSION__) */ - -#ifdef __VOLUME__ - } -#endif - - if(!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray)) - break; - } - -#ifdef __SUBSURFACE__ - /* Trace indirect subsurface rays by restarting the loop. this uses less - * stack memory than invoking kernel_path_indirect. - */ - if(ss_indirect.num_rays) { - kernel_path_subsurface_setup_indirect(kg, - &ss_indirect, - state, - ray, - L, - &throughput); - } - else { - break; - } - } -#endif /* __SUBSURFACE__ */ +# ifdef __SUBSURFACE__ + SubsurfaceIndirectRays ss_indirect; + kernel_path_subsurface_init_indirect(&ss_indirect); + + for (;;) { +# endif /* __SUBSURFACE__ */ + + /* path iteration */ + for (;;) { + /* Find intersection with objects in scene. */ + Intersection isect; + bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L); + + /* Find intersection with lamps and compute emission for MIS. */ + kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L); + +# ifdef __VOLUME__ + /* Volume integration. */ + VolumeIntegrateResult result = kernel_path_volume( + kg, sd, state, ray, &throughput, &isect, hit, emission_sd, L); + + if (result == VOLUME_PATH_SCATTERED) { + continue; + } + else if (result == VOLUME_PATH_MISSED) { + break; + } +# endif /* __VOLUME__*/ + + /* Shade background. */ + if (!hit) { + kernel_path_background(kg, state, ray, throughput, sd, L); + break; + } + else if (path_state_ao_bounce(kg, state)) { + break; + } + + /* Setup shader data. */ + shader_setup_from_ray(kg, sd, &isect, ray); + + /* Skip most work for volume bounding surface. */ +# ifdef __VOLUME__ + if (!(sd->flag & SD_HAS_ONLY_VOLUME)) { +# endif + + /* Evaluate shader. */ + shader_eval_surface(kg, sd, state, state->flag); + shader_prepare_closures(sd, state); + + /* Apply shadow catcher, holdout, emission. */ + if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, NULL)) { + break; + } + + /* path termination. this is a strange place to put the termination, it's + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate */ + float probability = path_state_continuation_probability(kg, state, throughput); + + if (probability == 0.0f) { + break; + } + else if (probability != 1.0f) { + float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); + + if (terminate >= probability) + break; + + throughput /= probability; + } + + kernel_update_denoising_features(kg, sd, state, L); + +# ifdef __AO__ + /* ambient occlusion */ + if (kernel_data.integrator.use_ambient_occlusion) { + kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f)); + } +# endif /* __AO__ */ + +# ifdef __SUBSURFACE__ + /* bssrdf scatter to a different location on the same object, replacing + * the closures with a diffuse BSDF */ + if (sd->flag & SD_BSSRDF) { + if (kernel_path_subsurface_scatter( + kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { + break; + } + } +# endif /* __SUBSURFACE__ */ + +# if defined(__EMISSION__) + if (kernel_data.integrator.use_direct_light) { + int all = (kernel_data.integrator.sample_all_lights_indirect) || + (state->flag & PATH_RAY_SHADOW_CATCHER); + kernel_branched_path_surface_connect_light( + kg, sd, emission_sd, state, throughput, 1.0f, L, all); + } +# endif /* defined(__EMISSION__) */ + +# ifdef __VOLUME__ + } +# endif + + if (!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray)) + break; + } + +# ifdef __SUBSURFACE__ + /* Trace indirect subsurface rays by restarting the loop. this uses less + * stack memory than invoking kernel_path_indirect. + */ + if (ss_indirect.num_rays) { + kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput); + } + else { + break; + } + } +# endif /* __SUBSURFACE__ */ } -#endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */ +# endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */ -ccl_device_forceinline void kernel_path_integrate( - KernelGlobals *kg, - PathState *state, - float3 throughput, - Ray *ray, - PathRadiance *L, - ccl_global float *buffer, - ShaderData *emission_sd) +ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg, + PathState *state, + float3 throughput, + Ray *ray, + PathRadiance *L, + ccl_global float *buffer, + ShaderData *emission_sd) { - PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE); - - /* Shader data memory used for both volumes and surfaces, saves stack space. */ - ShaderData sd; - -#ifdef __SUBSURFACE__ - SubsurfaceIndirectRays ss_indirect; - kernel_path_subsurface_init_indirect(&ss_indirect); - - for(;;) { -#endif /* __SUBSURFACE__ */ - - /* path iteration */ - for(;;) { - /* Find intersection with objects in scene. */ - Intersection isect; - bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L); - - /* Find intersection with lamps and compute emission for MIS. */ - kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L); - -#ifdef __VOLUME__ - /* Volume integration. */ - VolumeIntegrateResult result = kernel_path_volume(kg, - &sd, - state, - ray, - &throughput, - &isect, - hit, - emission_sd, - L); - - if(result == VOLUME_PATH_SCATTERED) { - continue; - } - else if(result == VOLUME_PATH_MISSED) { - break; - } -#endif /* __VOLUME__*/ - - /* Shade background. */ - if(!hit) { - kernel_path_background(kg, state, ray, throughput, &sd, L); - break; - } - else if(path_state_ao_bounce(kg, state)) { - break; - } - - /* Setup shader data. */ - shader_setup_from_ray(kg, &sd, &isect, ray); - - /* Skip most work for volume bounding surface. */ -#ifdef __VOLUME__ - if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { -#endif - - /* Evaluate shader. */ - shader_eval_surface(kg, &sd, state, state->flag); - shader_prepare_closures(&sd, state); - - /* Apply shadow catcher, holdout, emission. */ - if(!kernel_path_shader_apply(kg, - &sd, - state, - ray, - throughput, - emission_sd, - L, - buffer)) - { - break; - } - - /* path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate */ - float probability = path_state_continuation_probability(kg, state, throughput); - - if(probability == 0.0f) { - break; - } - else if(probability != 1.0f) { - float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); - if(terminate >= probability) - break; - - throughput /= probability; - } - - kernel_update_denoising_features(kg, &sd, state, L); - -#ifdef __AO__ - /* ambient occlusion */ - if(kernel_data.integrator.use_ambient_occlusion) { - kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd)); - } -#endif /* __AO__ */ - -#ifdef __SUBSURFACE__ - /* bssrdf scatter to a different location on the same object, replacing - * the closures with a diffuse BSDF */ - if(sd.flag & SD_BSSRDF) { - if(kernel_path_subsurface_scatter(kg, - &sd, - emission_sd, - L, - state, - ray, - &throughput, - &ss_indirect)) - { - break; - } - } -#endif /* __SUBSURFACE__ */ - - /* direct lighting */ - kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L); - -#ifdef __VOLUME__ - } -#endif - - /* compute direct lighting and next bounce */ - if(!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray)) - break; - } - -#ifdef __SUBSURFACE__ - /* Trace indirect subsurface rays by restarting the loop. this uses less - * stack memory than invoking kernel_path_indirect. - */ - if(ss_indirect.num_rays) { - kernel_path_subsurface_setup_indirect(kg, - &ss_indirect, - state, - ray, - L, - &throughput); - } - else { - break; - } - } -#endif /* __SUBSURFACE__ */ + PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE); + + /* Shader data memory used for both volumes and surfaces, saves stack space. */ + ShaderData sd; + +# ifdef __SUBSURFACE__ + SubsurfaceIndirectRays ss_indirect; + kernel_path_subsurface_init_indirect(&ss_indirect); + + for (;;) { +# endif /* __SUBSURFACE__ */ + + /* path iteration */ + for (;;) { + /* Find intersection with objects in scene. */ + Intersection isect; + bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L); + + /* Find intersection with lamps and compute emission for MIS. */ + kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L); + +# ifdef __VOLUME__ + /* Volume integration. */ + VolumeIntegrateResult result = kernel_path_volume( + kg, &sd, state, ray, &throughput, &isect, hit, emission_sd, L); + + if (result == VOLUME_PATH_SCATTERED) { + continue; + } + else if (result == VOLUME_PATH_MISSED) { + break; + } +# endif /* __VOLUME__*/ + + /* Shade background. */ + if (!hit) { + kernel_path_background(kg, state, ray, throughput, &sd, L); + break; + } + else if (path_state_ao_bounce(kg, state)) { + break; + } + + /* Setup shader data. */ + shader_setup_from_ray(kg, &sd, &isect, ray); + + /* Skip most work for volume bounding surface. */ +# ifdef __VOLUME__ + if (!(sd.flag & SD_HAS_ONLY_VOLUME)) { +# endif + + /* Evaluate shader. */ + shader_eval_surface(kg, &sd, state, state->flag); + shader_prepare_closures(&sd, state); + + /* Apply shadow catcher, holdout, emission. */ + if (!kernel_path_shader_apply(kg, &sd, state, ray, throughput, emission_sd, L, buffer)) { + break; + } + + /* path termination. this is a strange place to put the termination, it's + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate */ + float probability = path_state_continuation_probability(kg, state, throughput); + + if (probability == 0.0f) { + break; + } + else if (probability != 1.0f) { + float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); + if (terminate >= probability) + break; + + throughput /= probability; + } + + kernel_update_denoising_features(kg, &sd, state, L); + +# ifdef __AO__ + /* ambient occlusion */ + if (kernel_data.integrator.use_ambient_occlusion) { + kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd)); + } +# endif /* __AO__ */ + +# ifdef __SUBSURFACE__ + /* bssrdf scatter to a different location on the same object, replacing + * the closures with a diffuse BSDF */ + if (sd.flag & SD_BSSRDF) { + if (kernel_path_subsurface_scatter( + kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) { + break; + } + } +# endif /* __SUBSURFACE__ */ + + /* direct lighting */ + kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L); + +# ifdef __VOLUME__ + } +# endif + + /* compute direct lighting and next bounce */ + if (!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray)) + break; + } + +# ifdef __SUBSURFACE__ + /* Trace indirect subsurface rays by restarting the loop. this uses less + * stack memory than invoking kernel_path_indirect. + */ + if (ss_indirect.num_rays) { + kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput); + } + else { + break; + } + } +# endif /* __SUBSURFACE__ */ } -ccl_device void kernel_path_trace(KernelGlobals *kg, - ccl_global float *buffer, - int sample, int x, int y, int offset, int stride) +ccl_device void kernel_path_trace( + KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride) { - PROFILING_INIT(kg, PROFILING_RAY_SETUP); + PROFILING_INIT(kg, PROFILING_RAY_SETUP); - /* buffer offset */ - int index = offset + x + y*stride; - int pass_stride = kernel_data.film.pass_stride; + /* buffer offset */ + int index = offset + x + y * stride; + int pass_stride = kernel_data.film.pass_stride; - buffer += index*pass_stride; + buffer += index * pass_stride; - /* Initialize random numbers and sample ray. */ - uint rng_hash; - Ray ray; + /* Initialize random numbers and sample ray. */ + uint rng_hash; + Ray ray; - kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); - if(ray.t == 0.0f) { - return; - } + if (ray.t == 0.0f) { + return; + } - /* Initialize state. */ - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + /* Initialize state. */ + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - PathRadiance L; - path_radiance_init(&L, kernel_data.film.use_light_pass); + PathRadiance L; + path_radiance_init(&L, kernel_data.film.use_light_pass); - ShaderDataTinyStorage emission_sd_storage; - ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - PathState state; - path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); + PathState state; + path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); - /* Integrate. */ - kernel_path_integrate(kg, - &state, - throughput, - &ray, - &L, - buffer, - emission_sd); + /* Integrate. */ + kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd); - kernel_write_result(kg, buffer, sample, &L); + kernel_write_result(kg, buffer, sample, &L); } -#endif /* __SPLIT_KERNEL__ */ +#endif /* __SPLIT_KERNEL__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 21da4d9308b..e8ce61024b3 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -25,297 +25,262 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, ccl_addr_space PathState *state, float3 throughput) { - int num_samples = kernel_data.integrator.ao_samples; - float num_samples_inv = 1.0f/num_samples; - float ao_factor = kernel_data.background.ao_factor; - float3 ao_N; - float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); - float3 ao_alpha = shader_bsdf_alpha(kg, sd); - - for(int j = 0; j < num_samples; j++) { - float bsdf_u, bsdf_v; - path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - - float3 ao_D; - float ao_pdf; - - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - - if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { - Ray light_ray; - float3 ao_shadow; - - light_ray.P = ray_offset(sd->P, sd->Ng); - light_ray.D = ao_D; - light_ray.t = kernel_data.background.ao_distance; - light_ray.time = sd->time; - light_ray.dP = sd->dP; - light_ray.dD = differential3_zero(); - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { - path_radiance_accum_ao(L, state, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow); - } - else { - path_radiance_accum_total_ao(L, state, throughput*num_samples_inv, ao_bsdf); - } - } - } + int num_samples = kernel_data.integrator.ao_samples; + float num_samples_inv = 1.0f / num_samples; + float ao_factor = kernel_data.background.ao_factor; + float3 ao_N; + float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); + float3 ao_alpha = shader_bsdf_alpha(kg, sd); + + for (int j = 0; j < num_samples; j++) { + float bsdf_u, bsdf_v; + path_branched_rng_2D( + kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + + float3 ao_D; + float ao_pdf; + + sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + + if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { + Ray light_ray; + float3 ao_shadow; + + light_ray.P = ray_offset(sd->P, sd->Ng); + light_ray.D = ao_D; + light_ray.t = kernel_data.background.ao_distance; + light_ray.time = sd->time; + light_ray.dP = sd->dP; + light_ray.dD = differential3_zero(); + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) { + path_radiance_accum_ao( + L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow); + } + else { + path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf); + } + } + } } -#ifndef __SPLIT_KERNEL__ - -#ifdef __VOLUME__ -ccl_device_forceinline void kernel_branched_path_volume( - KernelGlobals *kg, - ShaderData *sd, - PathState *state, - Ray *ray, - float3 *throughput, - ccl_addr_space Intersection *isect, - bool hit, - ShaderData *indirect_sd, - ShaderData *emission_sd, - PathRadiance *L) +# ifndef __SPLIT_KERNEL__ + +# ifdef __VOLUME__ +ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg, + ShaderData *sd, + PathState *state, + Ray *ray, + float3 *throughput, + ccl_addr_space Intersection *isect, + bool hit, + ShaderData *indirect_sd, + ShaderData *emission_sd, + PathRadiance *L) { - /* Sanitize volume stack. */ - if(!hit) { - kernel_volume_clean_stack(kg, state->volume_stack); - } - - if(state->volume_stack[0].shader == SHADER_NONE) { - return; - } - - /* volume attenuation, emission, scatter */ - Ray volume_ray = *ray; - volume_ray.t = (hit)? isect->t: FLT_MAX; - - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); - -# ifdef __VOLUME_DECOUPLED__ - /* decoupled ray marching only supported on CPU */ - if(kernel_data.integrator.volume_decoupled) { - /* cache steps along volume for repeated sampling */ - VolumeSegment volume_segment; - - shader_setup_from_volume(kg, sd, &volume_ray); - kernel_volume_decoupled_record(kg, state, - &volume_ray, sd, &volume_segment, heterogeneous); - - /* direct light sampling */ - if(volume_segment.closure_flag & SD_SCATTER) { - volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack); - - int all = kernel_data.integrator.sample_all_lights_direct; - - kernel_branched_path_volume_connect_light(kg, sd, - emission_sd, *throughput, state, L, all, - &volume_ray, &volume_segment); - - /* indirect light sampling */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; - - for(int j = 0; j < num_samples; j++) { - PathState ps = *state; - Ray pray = *ray; - float3 tp = *throughput; - - /* branch RNG state */ - path_state_branch(&ps, j, num_samples); - - /* scatter sample. if we use distance sampling and take just one - * sample for direct and indirect light, we could share this - * computation, but makes code a bit complex */ - float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL); - float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false); - - if(result == VOLUME_PATH_SCATTERED && - kernel_path_volume_bounce(kg, - sd, - &tp, - &ps, - &L->state, - &pray)) - { - kernel_path_indirect(kg, - indirect_sd, - emission_sd, - &pray, - tp*num_samples_inv, - &ps, - L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - } - } - } - - /* emission and transmittance */ - if(volume_segment.closure_flag & SD_EMISSION) - path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission); - *throughput *= volume_segment.accum_transmittance; - - /* free cached steps */ - kernel_volume_decoupled_free(kg, &volume_segment); - } - else -# endif /* __VOLUME_DECOUPLED__ */ - { - /* GPU: no decoupled ray marching, scatter probalistically */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; - - /* todo: we should cache the shader evaluations from stepping - * through the volume, for now we redo them multiple times */ - - for(int j = 0; j < num_samples; j++) { - PathState ps = *state; - Ray pray = *ray; - float3 tp = (*throughput) * num_samples_inv; - - /* branch RNG state */ - path_state_branch(&ps, j, num_samples); - - VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, sd, &volume_ray, L, &tp, heterogeneous); - -# ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* todo: support equiangular, MIS and all light sampling. - * alternatively get decoupled ray marching working on the GPU */ - kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L); - - if(kernel_path_volume_bounce(kg, - sd, - &tp, - &ps, - &L->state, - &pray)) - { - kernel_path_indirect(kg, - indirect_sd, - emission_sd, - &pray, - tp, - &ps, - L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - } - } -# endif /* __VOLUME_SCATTER__ */ - } - - /* todo: avoid this calculation using decoupled ray marching */ - kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput); - } + /* Sanitize volume stack. */ + if (!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + + if (state->volume_stack[0].shader == SHADER_NONE) { + return; + } + + /* volume attenuation, emission, scatter */ + Ray volume_ray = *ray; + volume_ray.t = (hit) ? isect->t : FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + +# ifdef __VOLUME_DECOUPLED__ + /* decoupled ray marching only supported on CPU */ + if (kernel_data.integrator.volume_decoupled) { + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; + + shader_setup_from_volume(kg, sd, &volume_ray); + kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous); + + /* direct light sampling */ + if (volume_segment.closure_flag & SD_SCATTER) { + volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack); + + int all = kernel_data.integrator.sample_all_lights_direct; + + kernel_branched_path_volume_connect_light( + kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment); + + /* indirect light sampling */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f / num_samples; + + for (int j = 0; j < num_samples; j++) { + PathState ps = *state; + Ray pray = *ray; + float3 tp = *throughput; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + /* scatter sample. if we use distance sampling and take just one + * sample for direct and indirect light, we could share this + * computation, but makes code a bit complex */ + float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL); + float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter( + kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false); + + if (result == VOLUME_PATH_SCATTERED && + kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) { + kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + } + } + } + + /* emission and transmittance */ + if (volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission); + *throughput *= volume_segment.accum_transmittance; + + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); + } + else +# endif /* __VOLUME_DECOUPLED__ */ + { + /* GPU: no decoupled ray marching, scatter probalistically */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f / num_samples; + + /* todo: we should cache the shader evaluations from stepping + * through the volume, for now we redo them multiple times */ + + for (int j = 0; j < num_samples; j++) { + PathState ps = *state; + Ray pray = *ray; + float3 tp = (*throughput) * num_samples_inv; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + VolumeIntegrateResult result = kernel_volume_integrate( + kg, &ps, sd, &volume_ray, L, &tp, heterogeneous); + +# ifdef __VOLUME_SCATTER__ + if (result == VOLUME_PATH_SCATTERED) { + /* todo: support equiangular, MIS and all light sampling. + * alternatively get decoupled ray marching working on the GPU */ + kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L); + + if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) { + kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + } + } +# endif /* __VOLUME_SCATTER__ */ + } + + /* todo: avoid this calculation using decoupled ray marching */ + kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput); + } } -#endif /* __VOLUME__ */ +# endif /* __VOLUME__ */ /* bounce off surface and integrate indirect light */ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, - ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd, - float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L) + ShaderData *sd, + ShaderData *indirect_sd, + ShaderData *emission_sd, + float3 throughput, + float num_samples_adjust, + PathState *state, + PathRadiance *L) { - float sum_sample_weight = 0.0f; -#ifdef __DENOISING_FEATURES__ - if(state->denoising_feature_weight > 0.0f) { - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - /* transparency is not handled here, but in outer loop */ - if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { - continue; - } - - sum_sample_weight += sc->sample_weight; - } - } - else { - sum_sample_weight = 1.0f; - } -#endif /* __DENOISING_FEATURES__ */ - - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - /* transparency is not handled here, but in outer loop */ - if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { - continue; - } - - int num_samples; - - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) - num_samples = kernel_data.integrator.diffuse_samples; - else if(CLOSURE_IS_BSDF_BSSRDF(sc->type)) - num_samples = 1; - else if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) - num_samples = kernel_data.integrator.glossy_samples; - else - num_samples = kernel_data.integrator.transmission_samples; - - num_samples = ceil_to_int(num_samples_adjust*num_samples); - - float num_samples_inv = num_samples_adjust/num_samples; - - for(int j = 0; j < num_samples; j++) { - PathState ps = *state; - float3 tp = throughput; - Ray bsdf_ray; -#ifdef __SHADOW_TRICKS__ - float shadow_transparency = L->shadow_transparency; -#endif - - ps.rng_hash = cmj_hash(state->rng_hash, i); - - if(!kernel_branched_path_surface_bounce(kg, - sd, - sc, - j, - num_samples, - &tp, - &ps, - &L->state, - &bsdf_ray, - sum_sample_weight)) - { - continue; - } - - ps.rng_hash = state->rng_hash; - - kernel_path_indirect(kg, - indirect_sd, - emission_sd, - &bsdf_ray, - tp*num_samples_inv, - &ps, - L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - -#ifdef __SHADOW_TRICKS__ - L->shadow_transparency = shadow_transparency; -#endif - } - } + float sum_sample_weight = 0.0f; +# ifdef __DENOISING_FEATURES__ + if (state->denoising_feature_weight > 0.0f) { + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + /* transparency is not handled here, but in outer loop */ + if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + continue; + } + + sum_sample_weight += sc->sample_weight; + } + } + else { + sum_sample_weight = 1.0f; + } +# endif /* __DENOISING_FEATURES__ */ + + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + /* transparency is not handled here, but in outer loop */ + if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + continue; + } + + int num_samples; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) + num_samples = kernel_data.integrator.diffuse_samples; + else if (CLOSURE_IS_BSDF_BSSRDF(sc->type)) + num_samples = 1; + else if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + num_samples = kernel_data.integrator.glossy_samples; + else + num_samples = kernel_data.integrator.transmission_samples; + + num_samples = ceil_to_int(num_samples_adjust * num_samples); + + float num_samples_inv = num_samples_adjust / num_samples; + + for (int j = 0; j < num_samples; j++) { + PathState ps = *state; + float3 tp = throughput; + Ray bsdf_ray; +# ifdef __SHADOW_TRICKS__ + float shadow_transparency = L->shadow_transparency; +# endif + + ps.rng_hash = cmj_hash(state->rng_hash, i); + + if (!kernel_branched_path_surface_bounce( + kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) { + continue; + } + + ps.rng_hash = state->rng_hash; + + kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + +# ifdef __SHADOW_TRICKS__ + L->shadow_transparency = shadow_transparency; +# endif + } + } } -#ifdef __SUBSURFACE__ +# ifdef __SUBSURFACE__ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, ShaderData *indirect_sd, @@ -325,111 +290,81 @@ ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg, Ray *ray, float3 throughput) { - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSSRDF(sc->type)) - continue; - - /* set up random number generator */ - uint lcg_state = lcg_state_init(state, 0x68bc21eb); - int num_samples = kernel_data.integrator.subsurface_samples * 3; - float num_samples_inv = 1.0f/num_samples; - uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i); - - /* do subsurface scatter step with copy of shader data, this will - * replace the BSSRDF with a diffuse BSDF closure */ - for(int j = 0; j < num_samples; j++) { - PathState hit_state = *state; - path_state_branch(&hit_state, j, num_samples); - hit_state.rng_hash = bssrdf_rng_hash; - - LocalIntersection ss_isect; - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - int num_hits = subsurface_scatter_multi_intersect(kg, - &ss_isect, - sd, - &hit_state, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - true); - - hit_state.rng_offset += PRNG_BOUNCE_NUM; - -#ifdef __VOLUME__ - Ray volume_ray = *ray; - bool need_update_volume_stack = - kernel_data.integrator.use_volumes && - sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; -#endif /* __VOLUME__ */ - - /* compute lighting with the BSDF closure */ - for(int hit = 0; hit < num_hits; hit++) { - ShaderData bssrdf_sd = *sd; - Bssrdf *bssrdf = (Bssrdf *)sc; - ClosureType bssrdf_type = sc->type; - float bssrdf_roughness = bssrdf->roughness; - subsurface_scatter_multi_setup(kg, - &ss_isect, - hit, - &bssrdf_sd, - &hit_state, - bssrdf_type, - bssrdf_roughness); - -#ifdef __VOLUME__ - if(need_update_volume_stack) { - /* Setup ray from previous surface point to the new one. */ - float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng); - volume_ray.D = normalize_len(P - volume_ray.P, - &volume_ray.t); - - for(int k = 0; k < VOLUME_STACK_SIZE; k++) { - hit_state.volume_stack[k] = state->volume_stack[k]; - } - - kernel_volume_stack_update_for_subsurface( - kg, - emission_sd, - &volume_ray, - hit_state.volume_stack); - } -#endif /* __VOLUME__ */ - -#ifdef __EMISSION__ - /* direct light */ - if(kernel_data.integrator.use_direct_light) { - int all = (kernel_data.integrator.sample_all_lights_direct) || - (hit_state.flag & PATH_RAY_SHADOW_CATCHER); - kernel_branched_path_surface_connect_light( - kg, - &bssrdf_sd, - emission_sd, - &hit_state, - throughput, - num_samples_inv, - L, - all); - } -#endif /* __EMISSION__ */ - - /* indirect light */ - kernel_branched_path_surface_indirect_light( - kg, - &bssrdf_sd, - indirect_sd, - emission_sd, - throughput, - num_samples_inv, - &hit_state, - L); - } - } - } + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSSRDF(sc->type)) + continue; + + /* set up random number generator */ + uint lcg_state = lcg_state_init(state, 0x68bc21eb); + int num_samples = kernel_data.integrator.subsurface_samples * 3; + float num_samples_inv = 1.0f / num_samples; + uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i); + + /* do subsurface scatter step with copy of shader data, this will + * replace the BSSRDF with a diffuse BSDF closure */ + for (int j = 0; j < num_samples; j++) { + PathState hit_state = *state; + path_state_branch(&hit_state, j, num_samples); + hit_state.rng_hash = bssrdf_rng_hash; + + LocalIntersection ss_isect; + float bssrdf_u, bssrdf_v; + path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + int num_hits = subsurface_scatter_multi_intersect( + kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true); + + hit_state.rng_offset += PRNG_BOUNCE_NUM; + +# ifdef __VOLUME__ + Ray volume_ray = *ray; + bool need_update_volume_stack = kernel_data.integrator.use_volumes && + sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; +# endif /* __VOLUME__ */ + + /* compute lighting with the BSDF closure */ + for (int hit = 0; hit < num_hits; hit++) { + ShaderData bssrdf_sd = *sd; + Bssrdf *bssrdf = (Bssrdf *)sc; + ClosureType bssrdf_type = sc->type; + float bssrdf_roughness = bssrdf->roughness; + subsurface_scatter_multi_setup( + kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness); + +# ifdef __VOLUME__ + if (need_update_volume_stack) { + /* Setup ray from previous surface point to the new one. */ + float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng); + volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t); + + for (int k = 0; k < VOLUME_STACK_SIZE; k++) { + hit_state.volume_stack[k] = state->volume_stack[k]; + } + + kernel_volume_stack_update_for_subsurface( + kg, emission_sd, &volume_ray, hit_state.volume_stack); + } +# endif /* __VOLUME__ */ + +# ifdef __EMISSION__ + /* direct light */ + if (kernel_data.integrator.use_direct_light) { + int all = (kernel_data.integrator.sample_all_lights_direct) || + (hit_state.flag & PATH_RAY_SHADOW_CATCHER); + kernel_branched_path_surface_connect_light( + kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all); + } +# endif /* __EMISSION__ */ + + /* indirect light */ + kernel_branched_path_surface_indirect_light( + kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L); + } + } + } } -#endif /* __SUBSURFACE__ */ +# endif /* __SUBSURFACE__ */ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, uint rng_hash, @@ -438,188 +373,171 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, ccl_global float *buffer, PathRadiance *L) { - /* initialize */ - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - - path_radiance_init(L, kernel_data.film.use_light_pass); - - /* shader data memory used for both volumes and surfaces, saves stack space */ - ShaderData sd; - /* shader data used by emission, shadows, volume stacks, indirect path */ - ShaderDataTinyStorage emission_sd_storage; - ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); - ShaderData indirect_sd; - - PathState state; - path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); - - /* Main Loop - * Here we only handle transparency intersections from the camera ray. - * Indirect bounces are handled in kernel_branched_path_surface_indirect_light(). - */ - for(;;) { - /* Find intersection with objects in scene. */ - Intersection isect; - bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L); - -#ifdef __VOLUME__ - /* Volume integration. */ - kernel_branched_path_volume(kg, - &sd, - &state, - &ray, - &throughput, - &isect, - hit, - &indirect_sd, - emission_sd, - L); -#endif /* __VOLUME__ */ - - /* Shade background. */ - if(!hit) { - kernel_path_background(kg, &state, &ray, throughput, &sd, L); - break; - } - - /* Setup and evaluate shader. */ - shader_setup_from_ray(kg, &sd, &isect, &ray); - - /* Skip most work for volume bounding surface. */ -#ifdef __VOLUME__ - if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { -#endif - - shader_eval_surface(kg, &sd, &state, state.flag); - shader_merge_closures(&sd); - - /* Apply shadow catcher, holdout, emission. */ - if(!kernel_path_shader_apply(kg, - &sd, - &state, - &ray, - throughput, - emission_sd, - L, - buffer)) - { - break; - } - - /* transparency termination */ - if(state.flag & PATH_RAY_TRANSPARENT) { - /* path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate */ - float probability = path_state_continuation_probability(kg, &state, throughput); - - if(probability == 0.0f) { - break; - } - else if(probability != 1.0f) { - float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE); - - if(terminate >= probability) - break; - - throughput /= probability; - } - } - - kernel_update_denoising_features(kg, &sd, &state, L); - -#ifdef __AO__ - /* ambient occlusion */ - if(kernel_data.integrator.use_ambient_occlusion) { - kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput); - } -#endif /* __AO__ */ - -#ifdef __SUBSURFACE__ - /* bssrdf scatter to a different location on the same object */ - if(sd.flag & SD_BSSRDF) { - kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd, - L, &state, &ray, throughput); - } -#endif /* __SUBSURFACE__ */ - - PathState hit_state = state; - -#ifdef __EMISSION__ - /* direct light */ - if(kernel_data.integrator.use_direct_light) { - int all = (kernel_data.integrator.sample_all_lights_direct) || - (state.flag & PATH_RAY_SHADOW_CATCHER); - kernel_branched_path_surface_connect_light(kg, - &sd, emission_sd, &hit_state, throughput, 1.0f, L, all); - } -#endif /* __EMISSION__ */ - - /* indirect light */ - kernel_branched_path_surface_indirect_light(kg, - &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L); - - /* continue in case of transparency */ - throughput *= shader_bsdf_transparency(kg, &sd); - - if(is_zero(throughput)) - break; - - /* Update Path State */ - path_state_next(kg, &state, LABEL_TRANSPARENT); - -#ifdef __VOLUME__ - } - else { - if(!path_state_volume_next(kg, &state)) { - break; - } - } -#endif - - ray.P = ray_offset(sd.P, -sd.Ng); - ray.t -= sd.ray_length; /* clipping works through transparent */ - -#ifdef __RAY_DIFFERENTIALS__ - ray.dP = sd.dP; - ray.dD.dx = -sd.dI.dx; - ray.dD.dy = -sd.dI.dy; -#endif /* __RAY_DIFFERENTIALS__ */ - -#ifdef __VOLUME__ - /* enter/exit volume */ - kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); -#endif /* __VOLUME__ */ - } + /* initialize */ + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + + path_radiance_init(L, kernel_data.film.use_light_pass); + + /* shader data memory used for both volumes and surfaces, saves stack space */ + ShaderData sd; + /* shader data used by emission, shadows, volume stacks, indirect path */ + ShaderDataTinyStorage emission_sd_storage; + ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage); + ShaderData indirect_sd; + + PathState state; + path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); + + /* Main Loop + * Here we only handle transparency intersections from the camera ray. + * Indirect bounces are handled in kernel_branched_path_surface_indirect_light(). + */ + for (;;) { + /* Find intersection with objects in scene. */ + Intersection isect; + bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L); + +# ifdef __VOLUME__ + /* Volume integration. */ + kernel_branched_path_volume( + kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L); +# endif /* __VOLUME__ */ + + /* Shade background. */ + if (!hit) { + kernel_path_background(kg, &state, &ray, throughput, &sd, L); + break; + } + + /* Setup and evaluate shader. */ + shader_setup_from_ray(kg, &sd, &isect, &ray); + + /* Skip most work for volume bounding surface. */ +# ifdef __VOLUME__ + if (!(sd.flag & SD_HAS_ONLY_VOLUME)) { +# endif + + shader_eval_surface(kg, &sd, &state, state.flag); + shader_merge_closures(&sd); + + /* Apply shadow catcher, holdout, emission. */ + if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) { + break; + } + + /* transparency termination */ + if (state.flag & PATH_RAY_TRANSPARENT) { + /* path termination. this is a strange place to put the termination, it's + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate */ + float probability = path_state_continuation_probability(kg, &state, throughput); + + if (probability == 0.0f) { + break; + } + else if (probability != 1.0f) { + float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE); + + if (terminate >= probability) + break; + + throughput /= probability; + } + } + + kernel_update_denoising_features(kg, &sd, &state, L); + +# ifdef __AO__ + /* ambient occlusion */ + if (kernel_data.integrator.use_ambient_occlusion) { + kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput); + } +# endif /* __AO__ */ + +# ifdef __SUBSURFACE__ + /* bssrdf scatter to a different location on the same object */ + if (sd.flag & SD_BSSRDF) { + kernel_branched_path_subsurface_scatter( + kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput); + } +# endif /* __SUBSURFACE__ */ + + PathState hit_state = state; + +# ifdef __EMISSION__ + /* direct light */ + if (kernel_data.integrator.use_direct_light) { + int all = (kernel_data.integrator.sample_all_lights_direct) || + (state.flag & PATH_RAY_SHADOW_CATCHER); + kernel_branched_path_surface_connect_light( + kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all); + } +# endif /* __EMISSION__ */ + + /* indirect light */ + kernel_branched_path_surface_indirect_light( + kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L); + + /* continue in case of transparency */ + throughput *= shader_bsdf_transparency(kg, &sd); + + if (is_zero(throughput)) + break; + + /* Update Path State */ + path_state_next(kg, &state, LABEL_TRANSPARENT); + +# ifdef __VOLUME__ + } + else { + if (!path_state_volume_next(kg, &state)) { + break; + } + } +# endif + + ray.P = ray_offset(sd.P, -sd.Ng); + ray.t -= sd.ray_length; /* clipping works through transparent */ + +# ifdef __RAY_DIFFERENTIALS__ + ray.dP = sd.dP; + ray.dD.dx = -sd.dI.dx; + ray.dD.dy = -sd.dI.dy; +# endif /* __RAY_DIFFERENTIALS__ */ + +# ifdef __VOLUME__ + /* enter/exit volume */ + kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack); +# endif /* __VOLUME__ */ + } } -ccl_device void kernel_branched_path_trace(KernelGlobals *kg, - ccl_global float *buffer, - int sample, int x, int y, int offset, int stride) +ccl_device void kernel_branched_path_trace( + KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride) { - /* buffer offset */ - int index = offset + x + y*stride; - int pass_stride = kernel_data.film.pass_stride; + /* buffer offset */ + int index = offset + x + y * stride; + int pass_stride = kernel_data.film.pass_stride; - buffer += index*pass_stride; + buffer += index * pass_stride; - /* initialize random numbers and ray */ - uint rng_hash; - Ray ray; + /* initialize random numbers and ray */ + uint rng_hash; + Ray ray; - kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); - /* integrate */ - PathRadiance L; + /* integrate */ + PathRadiance L; - if(ray.t != 0.0f) { - kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L); - kernel_write_result(kg, buffer, sample, &L); - } + if (ray.t != 0.0f) { + kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L); + kernel_write_result(kg, buffer, sample, &L); + } } -#endif /* __SPLIT_KERNEL__ */ +# endif /* __SPLIT_KERNEL__ */ -#endif /* __BRANCHED_PATH__ */ +#endif /* __BRANCHED_PATH__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_common.h b/intern/cycles/kernel/kernel_path_common.h index d83fd474cde..815767595a9 100644 --- a/intern/cycles/kernel/kernel_path_common.h +++ b/intern/cycles/kernel/kernel_path_common.h @@ -18,34 +18,31 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline void kernel_path_trace_setup(KernelGlobals *kg, - int sample, - int x, int y, - uint *rng_hash, - ccl_addr_space Ray *ray) +ccl_device_inline void kernel_path_trace_setup( + KernelGlobals *kg, int sample, int x, int y, uint *rng_hash, ccl_addr_space Ray *ray) { - float filter_u; - float filter_v; + float filter_u; + float filter_v; - int num_samples = kernel_data.integrator.aa_samples; + int num_samples = kernel_data.integrator.aa_samples; - path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v); + path_rng_init(kg, sample, num_samples, rng_hash, x, y, &filter_u, &filter_v); - /* sample camera ray */ + /* sample camera ray */ - float lens_u = 0.0f, lens_v = 0.0f; + float lens_u = 0.0f, lens_v = 0.0f; - if(kernel_data.cam.aperturesize > 0.0f) - path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v); + if (kernel_data.cam.aperturesize > 0.0f) + path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v); - float time = 0.0f; + float time = 0.0f; #ifdef __CAMERA_MOTION__ - if(kernel_data.cam.shuttertime != -1.0f) - time = path_rng_1D(kg, *rng_hash, sample, num_samples, PRNG_TIME); + if (kernel_data.cam.shuttertime != -1.0f) + time = path_rng_1D(kg, *rng_hash, sample, num_samples, PRNG_TIME); #endif - camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); + camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index e85050df4bb..cdca0b1f9bf 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -23,249 +23,252 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, int sample, ccl_addr_space Ray *ray) { - state->flag = PATH_RAY_CAMERA|PATH_RAY_MIS_SKIP|PATH_RAY_TRANSPARENT_BACKGROUND; + state->flag = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP | PATH_RAY_TRANSPARENT_BACKGROUND; - state->rng_hash = rng_hash; - state->rng_offset = PRNG_BASE_NUM; - state->sample = sample; - state->num_samples = kernel_data.integrator.aa_samples; - state->branch_factor = 1.0f; + state->rng_hash = rng_hash; + state->rng_offset = PRNG_BASE_NUM; + state->sample = sample; + state->num_samples = kernel_data.integrator.aa_samples; + state->branch_factor = 1.0f; - state->bounce = 0; - state->diffuse_bounce = 0; - state->glossy_bounce = 0; - state->transmission_bounce = 0; - state->transparent_bounce = 0; + state->bounce = 0; + state->diffuse_bounce = 0; + state->glossy_bounce = 0; + state->transmission_bounce = 0; + state->transparent_bounce = 0; #ifdef __DENOISING_FEATURES__ - if(kernel_data.film.pass_denoising_data) { - state->flag |= PATH_RAY_STORE_SHADOW_INFO; - state->denoising_feature_weight = 1.0f; - } - else { - state->denoising_feature_weight = 0.0f; - } -#endif /* __DENOISING_FEATURES__ */ - - state->min_ray_pdf = FLT_MAX; - state->ray_pdf = 0.0f; + if (kernel_data.film.pass_denoising_data) { + state->flag |= PATH_RAY_STORE_SHADOW_INFO; + state->denoising_feature_weight = 1.0f; + } + else { + state->denoising_feature_weight = 0.0f; + } +#endif /* __DENOISING_FEATURES__ */ + + state->min_ray_pdf = FLT_MAX; + state->ray_pdf = 0.0f; #ifdef __LAMP_MIS__ - state->ray_t = 0.0f; + state->ray_t = 0.0f; #endif #ifdef __VOLUME__ - state->volume_bounce = 0; - state->volume_bounds_bounce = 0; - - if(kernel_data.integrator.use_volumes) { - /* Initialize volume stack with volume we are inside of. */ - kernel_volume_stack_init(kg, stack_sd, state, ray, state->volume_stack); - } - else { - state->volume_stack[0].shader = SHADER_NONE; - } + state->volume_bounce = 0; + state->volume_bounds_bounce = 0; + + if (kernel_data.integrator.use_volumes) { + /* Initialize volume stack with volume we are inside of. */ + kernel_volume_stack_init(kg, stack_sd, state, ray, state->volume_stack); + } + else { + state->volume_stack[0].shader = SHADER_NONE; + } #endif } -ccl_device_inline void path_state_next(KernelGlobals *kg, ccl_addr_space PathState *state, int label) +ccl_device_inline void path_state_next(KernelGlobals *kg, + ccl_addr_space PathState *state, + int label) { - /* ray through transparent keeps same flags from previous ray and is - * not counted as a regular bounce, transparent has separate max */ - if(label & LABEL_TRANSPARENT) { - state->flag |= PATH_RAY_TRANSPARENT; - state->transparent_bounce++; - if(state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { - state->flag |= PATH_RAY_TERMINATE_IMMEDIATE; - } + /* ray through transparent keeps same flags from previous ray and is + * not counted as a regular bounce, transparent has separate max */ + if (label & LABEL_TRANSPARENT) { + state->flag |= PATH_RAY_TRANSPARENT; + state->transparent_bounce++; + if (state->transparent_bounce >= kernel_data.integrator.transparent_max_bounce) { + state->flag |= PATH_RAY_TERMINATE_IMMEDIATE; + } - if(!kernel_data.integrator.transparent_shadows) - state->flag |= PATH_RAY_MIS_SKIP; + if (!kernel_data.integrator.transparent_shadows) + state->flag |= PATH_RAY_MIS_SKIP; - /* random number generator next bounce */ - state->rng_offset += PRNG_BOUNCE_NUM; + /* random number generator next bounce */ + state->rng_offset += PRNG_BOUNCE_NUM; - return; - } + return; + } - state->bounce++; - if(state->bounce >= kernel_data.integrator.max_bounce) { - state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; - } + state->bounce++; + if (state->bounce >= kernel_data.integrator.max_bounce) { + state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } - state->flag &= ~(PATH_RAY_ALL_VISIBILITY|PATH_RAY_MIS_SKIP); + state->flag &= ~(PATH_RAY_ALL_VISIBILITY | PATH_RAY_MIS_SKIP); #ifdef __VOLUME__ - if(label & LABEL_VOLUME_SCATTER) { - /* volume scatter */ - state->flag |= PATH_RAY_VOLUME_SCATTER; - state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; - - state->volume_bounce++; - if(state->volume_bounce >= kernel_data.integrator.max_volume_bounce) { - state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; - } - } - else + if (label & LABEL_VOLUME_SCATTER) { + /* volume scatter */ + state->flag |= PATH_RAY_VOLUME_SCATTER; + state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; + + state->volume_bounce++; + if (state->volume_bounce >= kernel_data.integrator.max_volume_bounce) { + state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + else #endif - { - /* surface reflection/transmission */ - if(label & LABEL_REFLECT) { - state->flag |= PATH_RAY_REFLECT; - state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; - - if(label & LABEL_DIFFUSE) { - state->diffuse_bounce++; - if(state->diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) { - state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; - } - } - else { - state->glossy_bounce++; - if(state->glossy_bounce >= kernel_data.integrator.max_glossy_bounce) { - state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; - } - } - } - else { - kernel_assert(label & LABEL_TRANSMIT); - - state->flag |= PATH_RAY_TRANSMIT; - - if(!(label & LABEL_TRANSMIT_TRANSPARENT)) { - state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; - } - - state->transmission_bounce++; - if(state->transmission_bounce >= kernel_data.integrator.max_transmission_bounce) { - state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; - } - } - - /* diffuse/glossy/singular */ - if(label & LABEL_DIFFUSE) { - state->flag |= PATH_RAY_DIFFUSE|PATH_RAY_DIFFUSE_ANCESTOR; - } - else if(label & LABEL_GLOSSY) { - state->flag |= PATH_RAY_GLOSSY; - } - else { - kernel_assert(label & LABEL_SINGULAR); - state->flag |= PATH_RAY_GLOSSY|PATH_RAY_SINGULAR|PATH_RAY_MIS_SKIP; - } - } - - /* random number generator next bounce */ - state->rng_offset += PRNG_BOUNCE_NUM; + { + /* surface reflection/transmission */ + if (label & LABEL_REFLECT) { + state->flag |= PATH_RAY_REFLECT; + state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; + + if (label & LABEL_DIFFUSE) { + state->diffuse_bounce++; + if (state->diffuse_bounce >= kernel_data.integrator.max_diffuse_bounce) { + state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + else { + state->glossy_bounce++; + if (state->glossy_bounce >= kernel_data.integrator.max_glossy_bounce) { + state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + } + else { + kernel_assert(label & LABEL_TRANSMIT); + + state->flag |= PATH_RAY_TRANSMIT; + + if (!(label & LABEL_TRANSMIT_TRANSPARENT)) { + state->flag &= ~PATH_RAY_TRANSPARENT_BACKGROUND; + } + + state->transmission_bounce++; + if (state->transmission_bounce >= kernel_data.integrator.max_transmission_bounce) { + state->flag |= PATH_RAY_TERMINATE_AFTER_TRANSPARENT; + } + } + + /* diffuse/glossy/singular */ + if (label & LABEL_DIFFUSE) { + state->flag |= PATH_RAY_DIFFUSE | PATH_RAY_DIFFUSE_ANCESTOR; + } + else if (label & LABEL_GLOSSY) { + state->flag |= PATH_RAY_GLOSSY; + } + else { + kernel_assert(label & LABEL_SINGULAR); + state->flag |= PATH_RAY_GLOSSY | PATH_RAY_SINGULAR | PATH_RAY_MIS_SKIP; + } + } + + /* random number generator next bounce */ + state->rng_offset += PRNG_BOUNCE_NUM; #ifdef __DENOISING_FEATURES__ - if((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) { - state->flag &= ~PATH_RAY_STORE_SHADOW_INFO; - } + if ((state->denoising_feature_weight == 0.0f) && !(state->flag & PATH_RAY_SHADOW_CATCHER)) { + state->flag &= ~PATH_RAY_STORE_SHADOW_INFO; + } #endif } #ifdef __VOLUME__ ccl_device_inline bool path_state_volume_next(KernelGlobals *kg, ccl_addr_space PathState *state) { - /* For volume bounding meshes we pass through without counting transparent - * bounces, only sanity check in case self intersection gets us stuck. */ - state->volume_bounds_bounce++; - if(state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) { - return false; - } - - /* Random number generator next bounce. */ - if(state->volume_bounds_bounce > 1) { - state->rng_offset += PRNG_BOUNCE_NUM; - } - - return true; + /* For volume bounding meshes we pass through without counting transparent + * bounces, only sanity check in case self intersection gets us stuck. */ + state->volume_bounds_bounce++; + if (state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) { + return false; + } + + /* Random number generator next bounce. */ + if (state->volume_bounds_bounce > 1) { + state->rng_offset += PRNG_BOUNCE_NUM; + } + + return true; } #endif -ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, ccl_addr_space PathState *state) +ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, + ccl_addr_space PathState *state) { - uint flag = state->flag & PATH_RAY_ALL_VISIBILITY; + uint flag = state->flag & PATH_RAY_ALL_VISIBILITY; - /* for visibility, diffuse/glossy are for reflection only */ - if(flag & PATH_RAY_TRANSMIT) - flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY); - /* todo: this is not supported as its own ray visibility yet */ - if(state->flag & PATH_RAY_VOLUME_SCATTER) - flag |= PATH_RAY_DIFFUSE; + /* for visibility, diffuse/glossy are for reflection only */ + if (flag & PATH_RAY_TRANSMIT) + flag &= ~(PATH_RAY_DIFFUSE | PATH_RAY_GLOSSY); + /* todo: this is not supported as its own ray visibility yet */ + if (state->flag & PATH_RAY_VOLUME_SCATTER) + flag |= PATH_RAY_DIFFUSE; - return flag; + return flag; } ccl_device_inline float path_state_continuation_probability(KernelGlobals *kg, ccl_addr_space PathState *state, const float3 throughput) { - if(state->flag & PATH_RAY_TERMINATE_IMMEDIATE) { - /* Ray is to be terminated immediately. */ - return 0.0f; - } - else if(state->flag & PATH_RAY_TRANSPARENT) { - /* Do at least one bounce without RR. */ - if(state->transparent_bounce <= 1) { - return 1.0f; - } + if (state->flag & PATH_RAY_TERMINATE_IMMEDIATE) { + /* Ray is to be terminated immediately. */ + return 0.0f; + } + else if (state->flag & PATH_RAY_TRANSPARENT) { + /* Do at least one bounce without RR. */ + if (state->transparent_bounce <= 1) { + return 1.0f; + } #ifdef __SHADOW_TRICKS__ - /* Exception for shadow catcher not working correctly with RR. */ - else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) { - return 1.0f; - } + /* Exception for shadow catcher not working correctly with RR. */ + else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->transparent_bounce <= 8)) { + return 1.0f; + } #endif - } - else { - /* Do at least one bounce without RR. */ - if(state->bounce <= 1) { - return 1.0f; - } + } + else { + /* Do at least one bounce without RR. */ + if (state->bounce <= 1) { + return 1.0f; + } #ifdef __SHADOW_TRICKS__ - /* Exception for shadow catcher not working correctly with RR. */ - else if((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) { - return 1.0f; - } + /* Exception for shadow catcher not working correctly with RR. */ + else if ((state->flag & PATH_RAY_SHADOW_CATCHER) && (state->bounce <= 3)) { + return 1.0f; + } #endif - } + } - /* Probabilistic termination: use sqrt() to roughly match typical view - * transform and do path termination a bit later on average. */ - return min(sqrtf(max3(fabs(throughput)) * state->branch_factor), 1.0f); + /* Probabilistic termination: use sqrt() to roughly match typical view + * transform and do path termination a bit later on average. */ + return min(sqrtf(max3(fabs(throughput)) * state->branch_factor), 1.0f); } /* TODO(DingTo): Find more meaningful name for this */ ccl_device_inline void path_state_modify_bounce(ccl_addr_space PathState *state, bool increase) { - /* Modify bounce temporarily for shader eval */ - if(increase) - state->bounce += 1; - else - state->bounce -= 1; + /* Modify bounce temporarily for shader eval */ + if (increase) + state->bounce += 1; + else + state->bounce -= 1; } ccl_device_inline bool path_state_ao_bounce(KernelGlobals *kg, ccl_addr_space PathState *state) { - if(state->bounce <= kernel_data.integrator.ao_bounces) { - return false; - } + if (state->bounce <= kernel_data.integrator.ao_bounces) { + return false; + } - int bounce = state->bounce - state->transmission_bounce - (state->glossy_bounce > 0); - return (bounce > kernel_data.integrator.ao_bounces); + int bounce = state->bounce - state->transmission_bounce - (state->glossy_bounce > 0); + return (bounce > kernel_data.integrator.ao_bounces); } ccl_device_inline void path_state_branch(ccl_addr_space PathState *state, int branch, int num_branches) { - if(num_branches > 1) { - /* Path is splitting into a branch, adjust so that each branch - * still gets a unique sample from the same sequence. */ - state->sample = state->sample*num_branches + branch; - state->num_samples = state->num_samples*num_branches; - state->branch_factor *= num_branches; - } + if (num_branches > 1) { + /* Path is splitting into a branch, adjust so that each branch + * still gets a unique sample from the same sequence. */ + state->sample = state->sample * num_branches + branch; + state->num_samples = state->num_samples * num_branches; + state->branch_factor *= num_branches; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_subsurface.h b/intern/cycles/kernel/kernel_path_subsurface.h index b5a92c74ed5..97d3f292ca3 100644 --- a/intern/cycles/kernel/kernel_path_subsurface.h +++ b/intern/cycles/kernel/kernel_path_subsurface.h @@ -22,141 +22,118 @@ ccl_device # else ccl_device_inline # endif -bool kernel_path_subsurface_scatter( - KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - PathRadiance *L, - ccl_addr_space PathState *state, - ccl_addr_space Ray *ray, - ccl_addr_space float3 *throughput, - ccl_addr_space SubsurfaceIndirectRays *ss_indirect) + bool + kernel_path_subsurface_scatter(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + PathRadiance *L, + ccl_addr_space PathState *state, + ccl_addr_space Ray *ray, + ccl_addr_space float3 *throughput, + ccl_addr_space SubsurfaceIndirectRays *ss_indirect) { - PROFILING_INIT(kg, PROFILING_SUBSURFACE); - - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - - const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u); - - /* do bssrdf scatter step if we picked a bssrdf closure */ - if(sc) { - /* We should never have two consecutive BSSRDF bounces, - * the second one should be converted to a diffuse BSDF to - * avoid this. - */ - kernel_assert(!(state->flag & PATH_RAY_DIFFUSE_ANCESTOR)); - - uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb); - - LocalIntersection ss_isect; - int num_hits = subsurface_scatter_multi_intersect(kg, - &ss_isect, - sd, - state, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - false); + PROFILING_INIT(kg, PROFILING_SUBSURFACE); + + float bssrdf_u, bssrdf_v; + path_state_rng_2D(kg, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + + const ShaderClosure *sc = shader_bssrdf_pick(sd, throughput, &bssrdf_u); + + /* do bssrdf scatter step if we picked a bssrdf closure */ + if (sc) { + /* We should never have two consecutive BSSRDF bounces, + * the second one should be converted to a diffuse BSDF to + * avoid this. + */ + kernel_assert(!(state->flag & PATH_RAY_DIFFUSE_ANCESTOR)); + + uint lcg_state = lcg_state_init_addrspace(state, 0x68bc21eb); + + LocalIntersection ss_isect; + int num_hits = subsurface_scatter_multi_intersect( + kg, &ss_isect, sd, state, sc, &lcg_state, bssrdf_u, bssrdf_v, false); # ifdef __VOLUME__ - bool need_update_volume_stack = - kernel_data.integrator.use_volumes && - sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; -# endif /* __VOLUME__ */ - - /* Closure memory will be overwritten, so read required variables now. */ - Bssrdf *bssrdf = (Bssrdf *)sc; - ClosureType bssrdf_type = sc->type; - float bssrdf_roughness = bssrdf->roughness; - - /* compute lighting with the BSDF closure */ - for(int hit = 0; hit < num_hits; hit++) { - /* NOTE: We reuse the existing ShaderData, we assume the path - * integration loop stops when this function returns true. - */ - subsurface_scatter_multi_setup(kg, - &ss_isect, - hit, - sd, - state, - bssrdf_type, - bssrdf_roughness); - - kernel_path_surface_connect_light(kg, sd, emission_sd, *throughput, state, L); - - ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays]; - ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays]; - ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays]; - PathRadianceState *hit_L_state = &ss_indirect->L_state[ss_indirect->num_rays]; - - *hit_state = *state; - *hit_ray = *ray; - *hit_tp = *throughput; - *hit_L_state = L->state; - - hit_state->rng_offset += PRNG_BOUNCE_NUM; - - if(kernel_path_surface_bounce(kg, - sd, - hit_tp, - hit_state, - hit_L_state, - hit_ray)) - { + bool need_update_volume_stack = kernel_data.integrator.use_volumes && + sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; +# endif /* __VOLUME__ */ + + /* Closure memory will be overwritten, so read required variables now. */ + Bssrdf *bssrdf = (Bssrdf *)sc; + ClosureType bssrdf_type = sc->type; + float bssrdf_roughness = bssrdf->roughness; + + /* compute lighting with the BSDF closure */ + for (int hit = 0; hit < num_hits; hit++) { + /* NOTE: We reuse the existing ShaderData, we assume the path + * integration loop stops when this function returns true. + */ + subsurface_scatter_multi_setup(kg, &ss_isect, hit, sd, state, bssrdf_type, bssrdf_roughness); + + kernel_path_surface_connect_light(kg, sd, emission_sd, *throughput, state, L); + + ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays]; + ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays]; + ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays]; + PathRadianceState *hit_L_state = &ss_indirect->L_state[ss_indirect->num_rays]; + + *hit_state = *state; + *hit_ray = *ray; + *hit_tp = *throughput; + *hit_L_state = L->state; + + hit_state->rng_offset += PRNG_BOUNCE_NUM; + + if (kernel_path_surface_bounce(kg, sd, hit_tp, hit_state, hit_L_state, hit_ray)) { # ifdef __LAMP_MIS__ - hit_state->ray_t = 0.0f; -# endif /* __LAMP_MIS__ */ + hit_state->ray_t = 0.0f; +# endif /* __LAMP_MIS__ */ # ifdef __VOLUME__ - if(need_update_volume_stack) { - Ray volume_ray = *ray; - /* Setup ray from previous surface point to the new one. */ - volume_ray.D = normalize_len(hit_ray->P - volume_ray.P, - &volume_ray.t); - - kernel_volume_stack_update_for_subsurface( - kg, - emission_sd, - &volume_ray, - hit_state->volume_stack); - } -# endif /* __VOLUME__ */ - ss_indirect->num_rays++; - } - } - return true; - } - return false; + if (need_update_volume_stack) { + Ray volume_ray = *ray; + /* Setup ray from previous surface point to the new one. */ + volume_ray.D = normalize_len(hit_ray->P - volume_ray.P, &volume_ray.t); + + kernel_volume_stack_update_for_subsurface( + kg, emission_sd, &volume_ray, hit_state->volume_stack); + } +# endif /* __VOLUME__ */ + ss_indirect->num_rays++; + } + } + return true; + } + return false; } ccl_device_inline void kernel_path_subsurface_init_indirect( - ccl_addr_space SubsurfaceIndirectRays *ss_indirect) + ccl_addr_space SubsurfaceIndirectRays *ss_indirect) { - ss_indirect->num_rays = 0; + ss_indirect->num_rays = 0; } ccl_device void kernel_path_subsurface_setup_indirect( - KernelGlobals *kg, - ccl_addr_space SubsurfaceIndirectRays *ss_indirect, - ccl_addr_space PathState *state, - ccl_addr_space Ray *ray, - PathRadiance *L, - ccl_addr_space float3 *throughput) + KernelGlobals *kg, + ccl_addr_space SubsurfaceIndirectRays *ss_indirect, + ccl_addr_space PathState *state, + ccl_addr_space Ray *ray, + PathRadiance *L, + ccl_addr_space float3 *throughput) { - /* Setup state, ray and throughput for indirect SSS rays. */ - ss_indirect->num_rays--; + /* Setup state, ray and throughput for indirect SSS rays. */ + ss_indirect->num_rays--; - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); - *state = ss_indirect->state[ss_indirect->num_rays]; - *ray = ss_indirect->rays[ss_indirect->num_rays]; - L->state = ss_indirect->L_state[ss_indirect->num_rays]; - *throughput = ss_indirect->throughputs[ss_indirect->num_rays]; + *state = ss_indirect->state[ss_indirect->num_rays]; + *ray = ss_indirect->rays[ss_indirect->num_rays]; + L->state = ss_indirect->L_state[ss_indirect->num_rays]; + *throughput = ss_indirect->throughputs[ss_indirect->num_rays]; - state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM; + state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM; } -#endif /* __SUBSURFACE__ */ +#endif /* __SUBSURFACE__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 0d18a1e8c77..6251313c5f8 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -16,255 +16,280 @@ CCL_NAMESPACE_BEGIN -#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || defined(__BAKING__) +#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) || defined(__SHADOW_TRICKS__) || \ + defined(__BAKING__) /* branched path tracing: connect path directly to position on one or more lights and add it to L */ ccl_device_noinline void kernel_branched_path_surface_connect_light( - KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - ccl_addr_space PathState *state, - float3 throughput, - float num_samples_adjust, - PathRadiance *L, - int sample_all_lights) + KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + ccl_addr_space PathState *state, + float3 throughput, + float num_samples_adjust, + PathRadiance *L, + int sample_all_lights) { -#ifdef __EMISSION__ - /* sample illumination from lights to find path contribution */ - if(!(sd->flag & SD_BSDF_HAS_EVAL)) - return; - - Ray light_ray; - BsdfEval L_light; - bool is_lamp; - -# ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; +# ifdef __EMISSION__ + /* sample illumination from lights to find path contribution */ + if (!(sd->flag & SD_BSDF_HAS_EVAL)) + return; + + Ray light_ray; + BsdfEval L_light; + bool is_lamp; + +# ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +# endif + + if (sample_all_lights) { + /* lamp sampling */ + for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) { + if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) + continue; + + int num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i)); + float num_samples_inv = num_samples_adjust / + (num_samples * kernel_data.integrator.num_all_lights); + uint lamp_rng_hash = cmj_hash(state->rng_hash, i); + + for (int j = 0; j < num_samples; j++) { + float light_u, light_v; + path_branched_rng_2D( + kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + float terminate = path_branched_rng_light_termination( + kg, lamp_rng_hash, state, j, num_samples); + + LightSample ls; + if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { + /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled. + * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */ + if (kernel_data.integrator.pdf_triangles != 0.0f) + ls.pdf *= 2.0f; + + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, + state, + throughput * num_samples_inv, + &L_light, + shadow, + num_samples_inv, + is_lamp); + } + else { + path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light); + } + } + } + } + } + + /* mesh light sampling */ + if (kernel_data.integrator.pdf_triangles != 0.0f) { + int num_samples = ceil_to_int(num_samples_adjust * + kernel_data.integrator.mesh_light_samples); + float num_samples_inv = num_samples_adjust / num_samples; + + for (int j = 0; j < num_samples; j++) { + float light_u, light_v; + path_branched_rng_2D( + kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + float terminate = path_branched_rng_light_termination( + kg, state->rng_hash, state, j, num_samples); + + /* only sample triangle lights */ + if (kernel_data.integrator.num_all_lights) + light_u = 0.5f * light_u; + + LightSample ls; + if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */ + if (kernel_data.integrator.num_all_lights) + ls.pdf *= 2.0f; + + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, + state, + throughput * num_samples_inv, + &L_light, + shadow, + num_samples_inv, + is_lamp); + } + else { + path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light); + } + } + } + } + } + } + else { + /* sample one light at random */ + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); + float terminate = path_state_rng_light_termination(kg, state); + + LightSample ls; + if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + /* sample random light */ + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, + state, + throughput * num_samples_adjust, + &L_light, + shadow, + num_samples_adjust, + is_lamp); + } + else { + path_radiance_accum_total_light(L, state, throughput * num_samples_adjust, &L_light); + } + } + } + } # endif - - if(sample_all_lights) { - /* lamp sampling */ - for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) - continue; - - int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); - float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); - uint lamp_rng_hash = cmj_hash(state->rng_hash, i); - - for(int j = 0; j < num_samples; j++) { - float light_u, light_v; - path_branched_rng_2D(kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_branched_rng_light_termination(kg, lamp_rng_hash, state, j, num_samples); - - LightSample ls; - if(lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { - /* The sampling probability returned by lamp_light_sample assumes that all lights were sampled. - * However, this code only samples lamps, so if the scene also had mesh lights, the real probability is twice as high. */ - if(kernel_data.integrator.pdf_triangles != 0.0f) - ls.pdf *= 2.0f; - - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput*num_samples_inv, &L_light); - } - } - } - } - } - - /* mesh light sampling */ - if(kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples); - float num_samples_inv = num_samples_adjust/num_samples; - - for(int j = 0; j < num_samples; j++) { - float light_u, light_v; - path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples); - - /* only sample triangle lights */ - if(kernel_data.integrator.num_all_lights) - light_u = 0.5f*light_u; - - LightSample ls; - if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - /* Same as above, probability needs to be corrected since the sampling was forced to select a mesh light. */ - if(kernel_data.integrator.num_all_lights) - ls.pdf *= 2.0f; - - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput*num_samples_inv, &L_light); - } - } - } - } - } - } - else { - /* sample one light at random */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_state_rng_light_termination(kg, state); - - LightSample ls; - if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - /* sample random light */ - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, throughput*num_samples_adjust, &L_light, shadow, num_samples_adjust, is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput*num_samples_adjust, &L_light); - } - } - } - } -#endif } /* branched path tracing: bounce off or through surface to with new direction stored in ray */ -ccl_device bool kernel_branched_path_surface_bounce( - KernelGlobals *kg, - ShaderData *sd, - const ShaderClosure *sc, - int sample, - int num_samples, - ccl_addr_space float3 *throughput, - ccl_addr_space PathState *state, - PathRadianceState *L_state, - ccl_addr_space Ray *ray, - float sum_sample_weight) +ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + int sample, + int num_samples, + ccl_addr_space float3 *throughput, + ccl_addr_space PathState *state, + PathRadianceState *L_state, + ccl_addr_space Ray *ray, + float sum_sample_weight) { - /* sample BSDF */ - float bsdf_pdf; - BsdfEval bsdf_eval; - float3 bsdf_omega_in; - differential3 bsdf_domega_in; - float bsdf_u, bsdf_v; - path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - int label; - - label = shader_bsdf_sample_closure(kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, - &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); - - if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) - return false; - - /* modify throughput */ - path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label); - -#ifdef __DENOISING_FEATURES__ - state->denoising_feature_weight *= sc->sample_weight / (sum_sample_weight * num_samples); -#endif + /* sample BSDF */ + float bsdf_pdf; + BsdfEval bsdf_eval; + float3 bsdf_omega_in; + differential3 bsdf_domega_in; + float bsdf_u, bsdf_v; + path_branched_rng_2D( + kg, state->rng_hash, state, sample, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + int label; + + label = shader_bsdf_sample_closure( + kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); + + if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) + return false; + + /* modify throughput */ + path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label); + +# ifdef __DENOISING_FEATURES__ + state->denoising_feature_weight *= sc->sample_weight / (sum_sample_weight * num_samples); +# endif - /* modify path state */ - path_state_next(kg, state, label); + /* modify path state */ + path_state_next(kg, state, label); - /* setup ray */ - ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng); - ray->D = normalize(bsdf_omega_in); - ray->t = FLT_MAX; -#ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; - ray->dD = bsdf_domega_in; -#endif -#ifdef __OBJECT_MOTION__ - ray->time = sd->time; -#endif + /* setup ray */ + ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng); + ray->D = normalize(bsdf_omega_in); + ray->t = FLT_MAX; +# ifdef __RAY_DIFFERENTIALS__ + ray->dP = sd->dP; + ray->dD = bsdf_domega_in; +# endif +# ifdef __OBJECT_MOTION__ + ray->time = sd->time; +# endif -#ifdef __VOLUME__ - /* enter/exit volume */ - if(label & LABEL_TRANSMIT) - kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); -#endif +# ifdef __VOLUME__ + /* enter/exit volume */ + if (label & LABEL_TRANSMIT) + kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); +# endif - /* branch RNG state */ - path_state_branch(state, sample, num_samples); + /* branch RNG state */ + path_state_branch(state, sample, num_samples); - /* set MIS state */ - state->min_ray_pdf = fminf(bsdf_pdf, FLT_MAX); - state->ray_pdf = bsdf_pdf; -#ifdef __LAMP_MIS__ - state->ray_t = 0.0f; -#endif + /* set MIS state */ + state->min_ray_pdf = fminf(bsdf_pdf, FLT_MAX); + state->ray_pdf = bsdf_pdf; +# ifdef __LAMP_MIS__ + state->ray_t = 0.0f; +# endif - return true; + return true; } #endif /* path tracing: connect path directly to position on a light and add it to L */ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, - ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state, - PathRadiance *L) + ShaderData *sd, + ShaderData *emission_sd, + float3 throughput, + ccl_addr_space PathState *state, + PathRadiance *L) { - PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT); + PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT); #ifdef __EMISSION__ - if(!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) - return; - -#ifdef __SHADOW_TRICKS__ - if(state->flag & PATH_RAY_SHADOW_CATCHER) { - kernel_branched_path_surface_connect_light(kg, - sd, - emission_sd, - state, - throughput, - 1.0f, - L, - 1); - return; - } -#endif + if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) + return; + +# ifdef __SHADOW_TRICKS__ + if (state->flag & PATH_RAY_SHADOW_CATCHER) { + kernel_branched_path_surface_connect_light(kg, sd, emission_sd, state, throughput, 1.0f, L, 1); + return; + } +# endif - /* sample illumination from lights to find path contribution */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); + /* sample illumination from lights to find path contribution */ + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - Ray light_ray; - BsdfEval L_light; - bool is_lamp; + Ray light_ray; + BsdfEval L_light; + bool is_lamp; -#ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; -#endif +# ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +# endif - LightSample ls; - if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - float terminate = path_state_rng_light_termination(kg, state); - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput, &L_light); - } - } - } + LightSample ls; + if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + float terminate = path_state_rng_light_termination(kg, state); + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); + } + else { + path_radiance_accum_total_light(L, state, throughput, &L_light); + } + } + } #endif } @@ -276,87 +301,87 @@ ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg, PathRadianceState *L_state, ccl_addr_space Ray *ray) { - PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE); - - /* no BSDF? we can stop here */ - if(sd->flag & SD_BSDF) { - /* sample BSDF */ - float bsdf_pdf; - BsdfEval bsdf_eval; - float3 bsdf_omega_in; - differential3 bsdf_domega_in; - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - int label; - - label = shader_bsdf_sample(kg, sd, bsdf_u, bsdf_v, &bsdf_eval, - &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); - - if(bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) - return false; - - /* modify throughput */ - path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label); - - /* set labels */ - if(!(label & LABEL_TRANSPARENT)) { - state->ray_pdf = bsdf_pdf; + PROFILING_INIT(kg, PROFILING_SURFACE_BOUNCE); + + /* no BSDF? we can stop here */ + if (sd->flag & SD_BSDF) { + /* sample BSDF */ + float bsdf_pdf; + BsdfEval bsdf_eval; + float3 bsdf_omega_in; + differential3 bsdf_domega_in; + float bsdf_u, bsdf_v; + path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + int label; + + label = shader_bsdf_sample( + kg, sd, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf); + + if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) + return false; + + /* modify throughput */ + path_radiance_bsdf_bounce(kg, L_state, throughput, &bsdf_eval, bsdf_pdf, state->bounce, label); + + /* set labels */ + if (!(label & LABEL_TRANSPARENT)) { + state->ray_pdf = bsdf_pdf; #ifdef __LAMP_MIS__ - state->ray_t = 0.0f; + state->ray_t = 0.0f; #endif - state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf); - } + state->min_ray_pdf = fminf(bsdf_pdf, state->min_ray_pdf); + } - /* update path state */ - path_state_next(kg, state, label); + /* update path state */ + path_state_next(kg, state, label); - /* setup ray */ - ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT)? -sd->Ng: sd->Ng); - ray->D = normalize(bsdf_omega_in); + /* setup ray */ + ray->P = ray_offset(sd->P, (label & LABEL_TRANSMIT) ? -sd->Ng : sd->Ng); + ray->D = normalize(bsdf_omega_in); - if(state->bounce == 0) - ray->t -= sd->ray_length; /* clipping works through transparent */ - else - ray->t = FLT_MAX; + if (state->bounce == 0) + ray->t -= sd->ray_length; /* clipping works through transparent */ + else + ray->t = FLT_MAX; #ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; - ray->dD = bsdf_domega_in; + ray->dP = sd->dP; + ray->dD = bsdf_domega_in; #endif #ifdef __VOLUME__ - /* enter/exit volume */ - if(label & LABEL_TRANSMIT) - kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); + /* enter/exit volume */ + if (label & LABEL_TRANSMIT) + kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); #endif - return true; - } + return true; + } #ifdef __VOLUME__ - else if(sd->flag & SD_HAS_ONLY_VOLUME) { - if(!path_state_volume_next(kg, state)) { - return false; - } - - if(state->bounce == 0) - ray->t -= sd->ray_length; /* clipping works through transparent */ - else - ray->t = FLT_MAX; - - /* setup ray position, direction stays unchanged */ - ray->P = ray_offset(sd->P, -sd->Ng); -#ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; -#endif + else if (sd->flag & SD_HAS_ONLY_VOLUME) { + if (!path_state_volume_next(kg, state)) { + return false; + } + + if (state->bounce == 0) + ray->t -= sd->ray_length; /* clipping works through transparent */ + else + ray->t = FLT_MAX; + + /* setup ray position, direction stays unchanged */ + ray->P = ray_offset(sd->P, -sd->Ng); +# ifdef __RAY_DIFFERENTIALS__ + ray->dP = sd->dP; +# endif - /* enter/exit volume */ - kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); - return true; - } + /* enter/exit volume */ + kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); + return true; + } #endif - else { - /* no bsdf or volume? */ - return false; - } + else { + /* no bsdf or volume? */ + return false; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index d2506fc1e7e..fea4dfc159d 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -18,269 +18,307 @@ CCL_NAMESPACE_BEGIN #ifdef __VOLUME_SCATTER__ -ccl_device_inline void kernel_path_volume_connect_light( - KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - float3 throughput, - ccl_addr_space PathState *state, - PathRadiance *L) +ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + float3 throughput, + ccl_addr_space PathState *state, + PathRadiance *L) { -#ifdef __EMISSION__ - if(!kernel_data.integrator.use_direct_light) - return; - - /* sample illumination from lights to find path contribution */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - - Ray light_ray; - BsdfEval L_light; - LightSample ls; - bool is_lamp; - - /* connect to light from given point where shader has been evaluated */ - light_ray.time = sd->time; - - if(light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) - { - float terminate = path_state_rng_light_termination(kg, state); - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); - } - } - } -#endif /* __EMISSION__ */ +# ifdef __EMISSION__ + if (!kernel_data.integrator.use_direct_light) + return; + + /* sample illumination from lights to find path contribution */ + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); + + Ray light_ray; + BsdfEval L_light; + LightSample ls; + bool is_lamp; + + /* connect to light from given point where shader has been evaluated */ + light_ray.time = sd->time; + + if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + float terminate = path_state_rng_light_termination(kg, state); + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); + } + } + } +# endif /* __EMISSION__ */ } -#ifdef __KERNEL_GPU__ +# ifdef __KERNEL_GPU__ ccl_device_noinline -#else +# else ccl_device -#endif -bool kernel_path_volume_bounce( - KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space float3 *throughput, - ccl_addr_space PathState *state, - PathRadianceState *L_state, - ccl_addr_space Ray *ray) +# endif + bool + kernel_path_volume_bounce(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space float3 *throughput, + ccl_addr_space PathState *state, + PathRadianceState *L_state, + ccl_addr_space Ray *ray) { - /* sample phase function */ - float phase_pdf; - BsdfEval phase_eval; - float3 phase_omega_in; - differential3 phase_domega_in; - float phase_u, phase_v; - path_state_rng_2D(kg, state, PRNG_BSDF_U, &phase_u, &phase_v); - int label; - - label = shader_volume_phase_sample(kg, sd, phase_u, phase_v, &phase_eval, - &phase_omega_in, &phase_domega_in, &phase_pdf); - - if(phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) - return false; - - /* modify throughput */ - path_radiance_bsdf_bounce(kg, L_state, throughput, &phase_eval, phase_pdf, state->bounce, label); - - /* set labels */ - state->ray_pdf = phase_pdf; -#ifdef __LAMP_MIS__ - state->ray_t = 0.0f; -#endif - state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf); - - /* update path state */ - path_state_next(kg, state, label); - - /* Russian roulette termination of volume ray scattering. */ - float probability = path_state_continuation_probability(kg, state, *throughput); - - if(probability == 0.0f) { - return false; - } - else if(probability != 1.0f) { - /* Use dimension from the previous bounce, has not been used yet. */ - float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE - PRNG_BOUNCE_NUM); - - if(terminate >= probability) { - return false; - } - - *throughput /= probability; - } - - /* setup ray */ - ray->P = sd->P; - ray->D = phase_omega_in; - ray->t = FLT_MAX; - -#ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; - ray->dD = phase_domega_in; -#endif - - return true; + /* sample phase function */ + float phase_pdf; + BsdfEval phase_eval; + float3 phase_omega_in; + differential3 phase_domega_in; + float phase_u, phase_v; + path_state_rng_2D(kg, state, PRNG_BSDF_U, &phase_u, &phase_v); + int label; + + label = shader_volume_phase_sample( + kg, sd, phase_u, phase_v, &phase_eval, &phase_omega_in, &phase_domega_in, &phase_pdf); + + if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) + return false; + + /* modify throughput */ + path_radiance_bsdf_bounce(kg, L_state, throughput, &phase_eval, phase_pdf, state->bounce, label); + + /* set labels */ + state->ray_pdf = phase_pdf; +# ifdef __LAMP_MIS__ + state->ray_t = 0.0f; +# endif + state->min_ray_pdf = fminf(phase_pdf, state->min_ray_pdf); + + /* update path state */ + path_state_next(kg, state, label); + + /* Russian roulette termination of volume ray scattering. */ + float probability = path_state_continuation_probability(kg, state, *throughput); + + if (probability == 0.0f) { + return false; + } + else if (probability != 1.0f) { + /* Use dimension from the previous bounce, has not been used yet. */ + float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE - PRNG_BOUNCE_NUM); + + if (terminate >= probability) { + return false; + } + + *throughput /= probability; + } + + /* setup ray */ + ray->P = sd->P; + ray->D = phase_omega_in; + ray->t = FLT_MAX; + +# ifdef __RAY_DIFFERENTIALS__ + ray->dP = sd->dP; + ray->dD = phase_domega_in; +# endif + + return true; } -#ifndef __SPLIT_KERNEL__ -ccl_device void kernel_branched_path_volume_connect_light( - KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - float3 throughput, - ccl_addr_space PathState *state, - PathRadiance *L, - bool sample_all_lights, - Ray *ray, - const VolumeSegment *segment) +# ifndef __SPLIT_KERNEL__ +ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + float3 throughput, + ccl_addr_space PathState *state, + PathRadiance *L, + bool sample_all_lights, + Ray *ray, + const VolumeSegment *segment) { -#ifdef __EMISSION__ - if(!kernel_data.integrator.use_direct_light) - return; - - Ray light_ray; - BsdfEval L_light; - bool is_lamp; - - light_ray.time = sd->time; - - if(sample_all_lights) { - /* lamp sampling */ - for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) - continue; - - int num_samples = light_select_num_samples(kg, i); - float num_samples_inv = 1.0f/(num_samples*kernel_data.integrator.num_all_lights); - uint lamp_rng_hash = cmj_hash(state->rng_hash, i); - - for(int j = 0; j < num_samples; j++) { - /* sample random position on given light */ - float light_u, light_v; - path_branched_rng_2D(kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - - LightSample ls; - lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls); - - float3 tp = throughput; - - /* sample position on volume segment */ - float rphase = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL); - float rscatter = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); - - /* todo: split up light_sample so we don't have to call it again with new position */ - if(result == VOLUME_PATH_SCATTERED && - lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { - if(kernel_data.integrator.pdf_triangles != 0.0f) - ls.pdf *= 2.0f; - - float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples); - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); - } - } - } - } - } - - /* mesh light sampling */ - if(kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = kernel_data.integrator.mesh_light_samples; - float num_samples_inv = 1.0f/num_samples; - - for(int j = 0; j < num_samples; j++) { - /* sample random position on random triangle */ - float light_u, light_v; - path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - - /* only sample triangle lights */ - if(kernel_data.integrator.num_all_lights) - light_u = 0.5f*light_u; - - LightSample ls; - light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls); - - float3 tp = throughput; - - /* sample position on volume segment */ - float rphase = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL); - float rscatter = path_branched_rng_1D(kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); - - /* todo: split up light_sample so we don't have to call it again with new position */ - if(result == VOLUME_PATH_SCATTERED && - light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - if(kernel_data.integrator.num_all_lights) - ls.pdf *= 2.0f; - - float terminate = path_branched_rng_light_termination(kg, state->rng_hash, state, j, num_samples); - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, tp*num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); - } - } - } - } - } - } - else { - /* sample random position on random light */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - - LightSample ls; - light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls); - - float3 tp = throughput; - - /* sample position on volume segment */ - float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, ray, sd, &tp, rphase, rscatter, segment, (ls.t != FLT_MAX)? &ls.P: NULL, false); - - /* todo: split up light_sample so we don't have to call it again with new position */ - if(result == VOLUME_PATH_SCATTERED && - light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - /* sample random light */ - float terminate = path_state_rng_light_termination(kg, state); - if(direct_emission(kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp); - } - } - } - } -#endif /* __EMISSION__ */ +# ifdef __EMISSION__ + if (!kernel_data.integrator.use_direct_light) + return; + + Ray light_ray; + BsdfEval L_light; + bool is_lamp; + + light_ray.time = sd->time; + + if (sample_all_lights) { + /* lamp sampling */ + for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) { + if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) + continue; + + int num_samples = light_select_num_samples(kg, i); + float num_samples_inv = 1.0f / (num_samples * kernel_data.integrator.num_all_lights); + uint lamp_rng_hash = cmj_hash(state->rng_hash, i); + + for (int j = 0; j < num_samples; j++) { + /* sample random position on given light */ + float light_u, light_v; + path_branched_rng_2D( + kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + + LightSample ls; + lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls); + + float3 tp = throughput; + + /* sample position on volume segment */ + float rphase = path_branched_rng_1D( + kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL); + float rscatter = path_branched_rng_1D( + kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, + ray, + sd, + &tp, + rphase, + rscatter, + segment, + (ls.t != FLT_MAX) ? &ls.P : + NULL, + false); + + /* todo: split up light_sample so we don't have to call it again with new position */ + if (result == VOLUME_PATH_SCATTERED && + lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { + if (kernel_data.integrator.pdf_triangles != 0.0f) + ls.pdf *= 2.0f; + + float terminate = path_branched_rng_light_termination( + kg, state->rng_hash, state, j, num_samples); + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light( + L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); + } + } + } + } + } + + /* mesh light sampling */ + if (kernel_data.integrator.pdf_triangles != 0.0f) { + int num_samples = kernel_data.integrator.mesh_light_samples; + float num_samples_inv = 1.0f / num_samples; + + for (int j = 0; j < num_samples; j++) { + /* sample random position on random triangle */ + float light_u, light_v; + path_branched_rng_2D( + kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + + /* only sample triangle lights */ + if (kernel_data.integrator.num_all_lights) + light_u = 0.5f * light_u; + + LightSample ls; + light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls); + + float3 tp = throughput; + + /* sample position on volume segment */ + float rphase = path_branched_rng_1D( + kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL); + float rscatter = path_branched_rng_1D( + kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, + ray, + sd, + &tp, + rphase, + rscatter, + segment, + (ls.t != FLT_MAX) ? &ls.P : + NULL, + false); + + /* todo: split up light_sample so we don't have to call it again with new position */ + if (result == VOLUME_PATH_SCATTERED && + light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + if (kernel_data.integrator.num_all_lights) + ls.pdf *= 2.0f; + + float terminate = path_branched_rng_light_termination( + kg, state->rng_hash, state, j, num_samples); + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light( + L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); + } + } + } + } + } + } + else { + /* sample random position on random light */ + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); + + LightSample ls; + light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls); + + float3 tp = throughput; + + /* sample position on volume segment */ + float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); + float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + state, + ray, + sd, + &tp, + rphase, + rscatter, + segment, + (ls.t != FLT_MAX) ? &ls.P : + NULL, + false); + + /* todo: split up light_sample so we don't have to call it again with new position */ + if (result == VOLUME_PATH_SCATTERED && + light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + /* sample random light */ + float terminate = path_state_rng_light_termination(kg, state); + if (direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { + /* trace shadow ray */ + float3 shadow; + + if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp); + } + } + } + } +# endif /* __EMISSION__ */ } -#endif /* __SPLIT_KERNEL__ */ +# endif /* __SPLIT_KERNEL__ */ -#endif /* __VOLUME_SCATTER__ */ +#endif /* __VOLUME_SCATTER__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_profiling.h b/intern/cycles/kernel/kernel_profiling.h index a46d6376473..780830879d8 100644 --- a/intern/cycles/kernel/kernel_profiling.h +++ b/intern/cycles/kernel/kernel_profiling.h @@ -26,15 +26,21 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_CPU__ # define PROFILING_INIT(kg, event) ProfilingHelper profiling_helper(&kg->profiler, event) # define PROFILING_EVENT(event) profiling_helper.set_event(event) -# define PROFILING_SHADER(shader) if((shader) != SHADER_NONE) { profiling_helper.set_shader((shader) & SHADER_MASK); } -# define PROFILING_OBJECT(object) if((object) != PRIM_NONE) { profiling_helper.set_object(object); } +# define PROFILING_SHADER(shader) \ + if ((shader) != SHADER_NONE) { \ + profiling_helper.set_shader((shader)&SHADER_MASK); \ + } +# define PROFILING_OBJECT(object) \ + if ((object) != PRIM_NONE) { \ + profiling_helper.set_object(object); \ + } #else # define PROFILING_INIT(kg, event) # define PROFILING_EVENT(event) # define PROFILING_SHADER(shader) # define PROFILING_OBJECT(object) -#endif /* __KERNEL_CPU__ */ +#endif /* __KERNEL_CPU__ */ CCL_NAMESPACE_END -#endif /* __KERNEL_PROFILING_H__ */ +#endif /* __KERNEL_PROFILING_H__ */ diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h index 7bad89c831c..f74ced45fd5 100644 --- a/intern/cycles/kernel/kernel_projection.h +++ b/intern/cycles/kernel/kernel_projection.h @@ -39,233 +39,223 @@ CCL_NAMESPACE_BEGIN ccl_device float2 direction_to_spherical(float3 dir) { - float theta = safe_acosf(dir.z); - float phi = atan2f(dir.x, dir.y); + float theta = safe_acosf(dir.z); + float phi = atan2f(dir.x, dir.y); - return make_float2(theta, phi); + return make_float2(theta, phi); } ccl_device float3 spherical_to_direction(float theta, float phi) { - float sin_theta = sinf(theta); - return make_float3(sin_theta*cosf(phi), - sin_theta*sinf(phi), - cosf(theta)); + float sin_theta = sinf(theta); + return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta)); } /* Equirectangular coordinates <-> Cartesian direction */ ccl_device float2 direction_to_equirectangular_range(float3 dir, float4 range) { - if(is_zero(dir)) - return make_float2(0.0f, 0.0f); + if (is_zero(dir)) + return make_float2(0.0f, 0.0f); - float u = (atan2f(dir.y, dir.x) - range.y) / range.x; - float v = (acosf(dir.z / len(dir)) - range.w) / range.z; + float u = (atan2f(dir.y, dir.x) - range.y) / range.x; + float v = (acosf(dir.z / len(dir)) - range.w) / range.z; - return make_float2(u, v); + return make_float2(u, v); } ccl_device float3 equirectangular_range_to_direction(float u, float v, float4 range) { - float phi = range.x*u + range.y; - float theta = range.z*v + range.w; - float sin_theta = sinf(theta); - return make_float3(sin_theta*cosf(phi), - sin_theta*sinf(phi), - cosf(theta)); + float phi = range.x * u + range.y; + float theta = range.z * v + range.w; + float sin_theta = sinf(theta); + return make_float3(sin_theta * cosf(phi), sin_theta * sinf(phi), cosf(theta)); } ccl_device float2 direction_to_equirectangular(float3 dir) { - return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F)); + return direction_to_equirectangular_range(dir, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F)); } ccl_device float3 equirectangular_to_direction(float u, float v) { - return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F)); + return equirectangular_range_to_direction(u, v, make_float4(-M_2PI_F, M_PI_F, -M_PI_F, M_PI_F)); } /* Fisheye <-> Cartesian direction */ ccl_device float2 direction_to_fisheye(float3 dir, float fov) { - float r = atan2f(sqrtf(dir.y*dir.y + dir.z*dir.z), dir.x) / fov; - float phi = atan2f(dir.z, dir.y); + float r = atan2f(sqrtf(dir.y * dir.y + dir.z * dir.z), dir.x) / fov; + float phi = atan2f(dir.z, dir.y); - float u = r * cosf(phi) + 0.5f; - float v = r * sinf(phi) + 0.5f; + float u = r * cosf(phi) + 0.5f; + float v = r * sinf(phi) + 0.5f; - return make_float2(u, v); + return make_float2(u, v); } ccl_device float3 fisheye_to_direction(float u, float v, float fov) { - u = (u - 0.5f) * 2.0f; - v = (v - 0.5f) * 2.0f; + u = (u - 0.5f) * 2.0f; + v = (v - 0.5f) * 2.0f; - float r = sqrtf(u*u + v*v); + float r = sqrtf(u * u + v * v); - if(r > 1.0f) - return make_float3(0.0f, 0.0f, 0.0f); + if (r > 1.0f) + return make_float3(0.0f, 0.0f, 0.0f); - float phi = safe_acosf((r != 0.0f)? u/r: 0.0f); - float theta = r * fov * 0.5f; + float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f); + float theta = r * fov * 0.5f; - if(v < 0.0f) phi = -phi; + if (v < 0.0f) + phi = -phi; - return make_float3( - cosf(theta), - -cosf(phi)*sinf(theta), - sinf(phi)*sinf(theta) - ); + return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta)); } ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float width, float height) { - float theta = safe_acosf(dir.x); - float r = 2.0f * lens * sinf(theta * 0.5f); - float phi = atan2f(dir.z, dir.y); + float theta = safe_acosf(dir.x); + float r = 2.0f * lens * sinf(theta * 0.5f); + float phi = atan2f(dir.z, dir.y); - float u = r * cosf(phi) / width + 0.5f; - float v = r * sinf(phi) / height + 0.5f; + float u = r * cosf(phi) / width + 0.5f; + float v = r * sinf(phi) / height + 0.5f; - return make_float2(u, v); + return make_float2(u, v); } -ccl_device_inline float3 fisheye_equisolid_to_direction(float u, float v, - float lens, - float fov, - float width, float height) +ccl_device_inline float3 +fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height) { - u = (u - 0.5f) * width; - v = (v - 0.5f) * height; + u = (u - 0.5f) * width; + v = (v - 0.5f) * height; - float rmax = 2.0f * lens * sinf(fov * 0.25f); - float r = sqrtf(u*u + v*v); + float rmax = 2.0f * lens * sinf(fov * 0.25f); + float r = sqrtf(u * u + v * v); - if(r > rmax) - return make_float3(0.0f, 0.0f, 0.0f); + if (r > rmax) + return make_float3(0.0f, 0.0f, 0.0f); - float phi = safe_acosf((r != 0.0f)? u/r: 0.0f); - float theta = 2.0f * asinf(r/(2.0f * lens)); + float phi = safe_acosf((r != 0.0f) ? u / r : 0.0f); + float theta = 2.0f * asinf(r / (2.0f * lens)); - if(v < 0.0f) phi = -phi; + if (v < 0.0f) + phi = -phi; - return make_float3( - cosf(theta), - -cosf(phi)*sinf(theta), - sinf(phi)*sinf(theta) - ); + return make_float3(cosf(theta), -cosf(phi) * sinf(theta), sinf(phi) * sinf(theta)); } /* Mirror Ball <-> Cartesion direction */ ccl_device float3 mirrorball_to_direction(float u, float v) { - /* point on sphere */ - float3 dir; + /* point on sphere */ + float3 dir; - dir.x = 2.0f*u - 1.0f; - dir.z = 2.0f*v - 1.0f; + dir.x = 2.0f * u - 1.0f; + dir.z = 2.0f * v - 1.0f; - if(dir.x*dir.x + dir.z*dir.z > 1.0f) - return make_float3(0.0f, 0.0f, 0.0f); + if (dir.x * dir.x + dir.z * dir.z > 1.0f) + return make_float3(0.0f, 0.0f, 0.0f); - dir.y = -sqrtf(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f)); + dir.y = -sqrtf(max(1.0f - dir.x * dir.x - dir.z * dir.z, 0.0f)); - /* reflection */ - float3 I = make_float3(0.0f, -1.0f, 0.0f); + /* reflection */ + float3 I = make_float3(0.0f, -1.0f, 0.0f); - return 2.0f*dot(dir, I)*dir - I; + return 2.0f * dot(dir, I) * dir - I; } ccl_device float2 direction_to_mirrorball(float3 dir) { - /* inverse of mirrorball_to_direction */ - dir.y -= 1.0f; + /* inverse of mirrorball_to_direction */ + dir.y -= 1.0f; - float div = 2.0f*sqrtf(max(-0.5f*dir.y, 0.0f)); - if(div > 0.0f) - dir /= div; + float div = 2.0f * sqrtf(max(-0.5f * dir.y, 0.0f)); + if (div > 0.0f) + dir /= div; - float u = 0.5f*(dir.x + 1.0f); - float v = 0.5f*(dir.z + 1.0f); + float u = 0.5f * (dir.x + 1.0f); + float v = 0.5f * (dir.z + 1.0f); - return make_float2(u, v); + return make_float2(u, v); } ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v) { - switch(cam->panorama_type) { - case PANORAMA_EQUIRECTANGULAR: - return equirectangular_range_to_direction(u, v, cam->equirectangular_range); - case PANORAMA_MIRRORBALL: - return mirrorball_to_direction(u, v); - case PANORAMA_FISHEYE_EQUIDISTANT: - return fisheye_to_direction(u, v, cam->fisheye_fov); - case PANORAMA_FISHEYE_EQUISOLID: - default: - return fisheye_equisolid_to_direction(u, v, cam->fisheye_lens, - cam->fisheye_fov, cam->sensorwidth, cam->sensorheight); - } + switch (cam->panorama_type) { + case PANORAMA_EQUIRECTANGULAR: + return equirectangular_range_to_direction(u, v, cam->equirectangular_range); + case PANORAMA_MIRRORBALL: + return mirrorball_to_direction(u, v); + case PANORAMA_FISHEYE_EQUIDISTANT: + return fisheye_to_direction(u, v, cam->fisheye_fov); + case PANORAMA_FISHEYE_EQUISOLID: + default: + return fisheye_equisolid_to_direction( + u, v, cam->fisheye_lens, cam->fisheye_fov, cam->sensorwidth, cam->sensorheight); + } } ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, float3 dir) { - switch(cam->panorama_type) { - case PANORAMA_EQUIRECTANGULAR: - return direction_to_equirectangular_range(dir, cam->equirectangular_range); - case PANORAMA_MIRRORBALL: - return direction_to_mirrorball(dir); - case PANORAMA_FISHEYE_EQUIDISTANT: - return direction_to_fisheye(dir, cam->fisheye_fov); - case PANORAMA_FISHEYE_EQUISOLID: - default: - return direction_to_fisheye_equisolid(dir, cam->fisheye_lens, - cam->sensorwidth, cam->sensorheight); - } + switch (cam->panorama_type) { + case PANORAMA_EQUIRECTANGULAR: + return direction_to_equirectangular_range(dir, cam->equirectangular_range); + case PANORAMA_MIRRORBALL: + return direction_to_mirrorball(dir); + case PANORAMA_FISHEYE_EQUIDISTANT: + return direction_to_fisheye(dir, cam->fisheye_fov); + case PANORAMA_FISHEYE_EQUISOLID: + default: + return direction_to_fisheye_equisolid( + dir, cam->fisheye_lens, cam->sensorwidth, cam->sensorheight); + } } -ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam, float3 *P, float3 *D) +ccl_device_inline void spherical_stereo_transform(ccl_constant KernelCamera *cam, + float3 *P, + float3 *D) { - float interocular_offset = cam->interocular_offset; - - /* Interocular offset of zero means either non stereo, or stereo without - * spherical stereo. */ - kernel_assert(interocular_offset != 0.0f); - - if(cam->pole_merge_angle_to > 0.0f) { - const float pole_merge_angle_from = cam->pole_merge_angle_from, - pole_merge_angle_to = cam->pole_merge_angle_to; - float altitude = fabsf(safe_asinf((*D).z)); - if(altitude > pole_merge_angle_to) { - interocular_offset = 0.0f; - } - else if(altitude > pole_merge_angle_from) { - float fac = (altitude - pole_merge_angle_from) / (pole_merge_angle_to - pole_merge_angle_from); - float fade = cosf(fac * M_PI_2_F); - interocular_offset *= fade; - } - } - - float3 up = make_float3(0.0f, 0.0f, 1.0f); - float3 side = normalize(cross(*D, up)); - float3 stereo_offset = side * interocular_offset; - - *P += stereo_offset; - - /* Convergence distance is FLT_MAX in the case of parallel convergence mode, - * no need to modify direction in this case either. */ - const float convergence_distance = cam->convergence_distance; - - if(convergence_distance != FLT_MAX) - { - float3 screen_offset = convergence_distance * (*D); - *D = normalize(screen_offset - stereo_offset); - } + float interocular_offset = cam->interocular_offset; + + /* Interocular offset of zero means either non stereo, or stereo without + * spherical stereo. */ + kernel_assert(interocular_offset != 0.0f); + + if (cam->pole_merge_angle_to > 0.0f) { + const float pole_merge_angle_from = cam->pole_merge_angle_from, + pole_merge_angle_to = cam->pole_merge_angle_to; + float altitude = fabsf(safe_asinf((*D).z)); + if (altitude > pole_merge_angle_to) { + interocular_offset = 0.0f; + } + else if (altitude > pole_merge_angle_from) { + float fac = (altitude - pole_merge_angle_from) / + (pole_merge_angle_to - pole_merge_angle_from); + float fade = cosf(fac * M_PI_2_F); + interocular_offset *= fade; + } + } + + float3 up = make_float3(0.0f, 0.0f, 1.0f); + float3 side = normalize(cross(*D, up)); + float3 stereo_offset = side * interocular_offset; + + *P += stereo_offset; + + /* Convergence distance is FLT_MAX in the case of parallel convergence mode, + * no need to modify direction in this case either. */ + const float convergence_distance = cam->convergence_distance; + + if (convergence_distance != FLT_MAX) { + float3 screen_offset = convergence_distance * (*D); + *D = normalize(screen_offset - stereo_offset); + } } CCL_NAMESPACE_END -#endif /* __KERNEL_PROJECTION_CL__ */ +#endif /* __KERNEL_PROJECTION_CL__ */ diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h index de8cc4a0cef..91a39fc1465 100644 --- a/intern/cycles/kernel/kernel_queues.h +++ b/intern/cycles/kernel/kernel_queues.h @@ -23,24 +23,24 @@ CCL_NAMESPACE_BEGIN * Queue utility functions for split kernel */ #ifdef __KERNEL_OPENCL__ -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +# pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable #endif /* * Enqueue ray index into the queue */ ccl_device void enqueue_ray_index( - int ray_index, /* Ray index to be enqueued. */ - int queue_number, /* Queue in which the ray index should be enqueued. */ - ccl_global int *queues, /* Buffer of all queues. */ - int queue_size, /* Size of each queue. */ - ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */ + int ray_index, /* Ray index to be enqueued. */ + int queue_number, /* Queue in which the ray index should be enqueued. */ + ccl_global int *queues, /* Buffer of all queues. */ + int queue_size, /* Size of each queue. */ + ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */ { - /* This thread's queue index. */ - int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint*)&queue_index[queue_number]) - + (queue_number * queue_size); - queues[my_queue_index] = ray_index; + /* This thread's queue index. */ + int my_queue_index = atomic_fetch_and_inc_uint32((ccl_global uint *)&queue_index[queue_number]) + + (queue_number * queue_size); + queues[my_queue_index] = ray_index; } /* @@ -51,96 +51,95 @@ ccl_device void enqueue_ray_index( * is no more ray to allocate to other threads. */ ccl_device int get_ray_index( - KernelGlobals *kg, - int thread_index, /* Global thread index. */ - int queue_number, /* Queue to operate on. */ - ccl_global int *queues, /* Buffer of all queues. */ - int queuesize, /* Size of a queue. */ - int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */ + KernelGlobals *kg, + int thread_index, /* Global thread index. */ + int queue_number, /* Queue to operate on. */ + ccl_global int *queues, /* Buffer of all queues. */ + int queuesize, /* Size of a queue. */ + int empty_queue) /* Empty the queue slot as soon as we fetch the ray index. */ { - int ray_index = queues[queue_number * queuesize + thread_index]; - if(empty_queue && ray_index != QUEUE_EMPTY_SLOT) { - queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT; - } - return ray_index; + int ray_index = queues[queue_number * queuesize + thread_index]; + if (empty_queue && ray_index != QUEUE_EMPTY_SLOT) { + queues[queue_number * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + } + return ray_index; } /* The following functions are to realize Local memory variant of enqueue ray index function. */ /* All threads should call this function. */ ccl_device void enqueue_ray_index_local( - int ray_index, /* Ray index to enqueue. */ - int queue_number, /* Queue in which to enqueue ray index. */ - char enqueue_flag, /* True for threads whose ray index has to be enqueued. */ - int queuesize, /* queue size. */ - ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */ - ccl_global int *Queue_data, /* Queues. */ - ccl_global int *Queue_index) /* To do global queue atomics. */ + int ray_index, /* Ray index to enqueue. */ + int queue_number, /* Queue in which to enqueue ray index. */ + char enqueue_flag, /* True for threads whose ray index has to be enqueued. */ + int queuesize, /* queue size. */ + ccl_local_param unsigned int *local_queue_atomics, /* To to local queue atomics. */ + ccl_global int *Queue_data, /* Queues. */ + ccl_global int *Queue_index) /* To do global queue atomics. */ { - int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); - - /* Get local queue id .*/ - unsigned int lqidx; - if(enqueue_flag) { - lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - /* Get global queue offset. */ - if(lidx == 0) { - *local_queue_atomics = atomic_fetch_and_add_uint32((ccl_global uint*)&Queue_index[queue_number], - *local_queue_atomics); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - /* Get global queue index and enqueue ray. */ - if(enqueue_flag) { - unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx; - Queue_data[my_gqidx] = ray_index; - } + int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); + + /* Get local queue id .*/ + unsigned int lqidx; + if (enqueue_flag) { + lqidx = atomic_fetch_and_inc_uint32(local_queue_atomics); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + /* Get global queue offset. */ + if (lidx == 0) { + *local_queue_atomics = atomic_fetch_and_add_uint32( + (ccl_global uint *)&Queue_index[queue_number], *local_queue_atomics); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + /* Get global queue index and enqueue ray. */ + if (enqueue_flag) { + unsigned int my_gqidx = queue_number * queuesize + (*local_queue_atomics) + lqidx; + Queue_data[my_gqidx] = ray_index; + } } ccl_device unsigned int get_local_queue_index( - int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ - ccl_local_param unsigned int *local_queue_atomics) + int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */ + ccl_local_param unsigned int *local_queue_atomics) { - int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]); - return my_lqidx; + int my_lqidx = atomic_fetch_and_inc_uint32(&local_queue_atomics[queue_number]); + return my_lqidx; } ccl_device unsigned int get_global_per_queue_offset( - int queue_number, - ccl_local_param unsigned int *local_queue_atomics, - ccl_global int* global_queue_atomics) + int queue_number, + ccl_local_param unsigned int *local_queue_atomics, + ccl_global int *global_queue_atomics) { - unsigned int queue_offset = atomic_fetch_and_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number], - local_queue_atomics[queue_number]); - return queue_offset; + unsigned int queue_offset = atomic_fetch_and_add_uint32( + (ccl_global uint *)&global_queue_atomics[queue_number], local_queue_atomics[queue_number]); + return queue_offset; } ccl_device unsigned int get_global_queue_index( int queue_number, int queuesize, unsigned int lqidx, - ccl_local_param unsigned int * global_per_queue_offset) + ccl_local_param unsigned int *global_per_queue_offset) { - int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; - return my_gqidx; + int my_gqidx = queuesize * queue_number + lqidx + global_per_queue_offset[queue_number]; + return my_gqidx; } -ccl_device int dequeue_ray_index( - int queue_number, - ccl_global int *queues, - int queue_size, - ccl_global int *queue_index) +ccl_device int dequeue_ray_index(int queue_number, + ccl_global int *queues, + int queue_size, + ccl_global int *queue_index) { - int index = atomic_fetch_and_dec_uint32((ccl_global uint*)&queue_index[queue_number])-1; + int index = atomic_fetch_and_dec_uint32((ccl_global uint *)&queue_index[queue_number]) - 1; - if(index < 0) { - return QUEUE_EMPTY_SLOT; - } + if (index < 0) { + return QUEUE_EMPTY_SLOT; + } - return queues[index + queue_number * queue_size]; + return queues[index + queue_number * queue_size]; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 61ddf4a4f81..6779c1f7160 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -23,7 +23,6 @@ CCL_NAMESPACE_BEGIN * this single threaded on a CPU for repeatable results. */ //#define __DEBUG_CORRELATION__ - /* High Dimensional Sobol. * * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal @@ -36,136 +35,138 @@ CCL_NAMESPACE_BEGIN * progressive pattern that doesn't suffer from this problem, because even * with this offset some dimensions are quite poor. */ -#define SOBOL_SKIP 64 +# define SOBOL_SKIP 64 ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension) { - uint result = 0; - uint i = index + SOBOL_SKIP; - for(uint j = 0; i; i >>= 1, j++) { - if(i & 1) { - result ^= kernel_tex_fetch(__sobol_directions, 32*dimension + j); - } - } - return result; + uint result = 0; + uint i = index + SOBOL_SKIP; + for (uint j = 0; i; i >>= 1, j++) { + if (i & 1) { + result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j); + } + } + return result; } -#endif /* __SOBOL__ */ - +#endif /* __SOBOL__ */ -ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, - uint rng_hash, - int sample, int num_samples, - int dimension) +ccl_device_forceinline float path_rng_1D( + KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension) { #ifdef __DEBUG_CORRELATION__ - return (float)drand48(); + return (float)drand48(); #endif #ifdef __CMJ__ # ifdef __SOBOL__ - if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) # endif - { - /* Correlated multi-jitter. */ - int p = rng_hash + dimension; - return cmj_sample_1D(sample, num_samples, p); - } + { + /* Correlated multi-jitter. */ + int p = rng_hash + dimension; + return cmj_sample_1D(sample, num_samples, p); + } #endif #ifdef __SOBOL__ - /* Sobol sequence value using direction vectors. */ - uint result = sobol_dimension(kg, sample, dimension); - float r = (float)result * (1.0f/(float)0xFFFFFFFF); + /* Sobol sequence value using direction vectors. */ + uint result = sobol_dimension(kg, sample, dimension); + float r = (float)result * (1.0f / (float)0xFFFFFFFF); - /* Cranly-Patterson rotation using rng seed */ - float shift; + /* Cranly-Patterson rotation using rng seed */ + float shift; - /* Hash rng with dimension to solve correlation issues. - * See T38710, T50116. - */ - uint tmp_rng = cmj_hash_simple(dimension, rng_hash); - shift = tmp_rng * (1.0f/(float)0xFFFFFFFF); + /* Hash rng with dimension to solve correlation issues. + * See T38710, T50116. + */ + uint tmp_rng = cmj_hash_simple(dimension, rng_hash); + shift = tmp_rng * (1.0f / (float)0xFFFFFFFF); - return r + shift - floorf(r + shift); + return r + shift - floorf(r + shift); #endif } ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, uint rng_hash, - int sample, int num_samples, + int sample, + int num_samples, int dimension, - float *fx, float *fy) + float *fx, + float *fy) { #ifdef __DEBUG_CORRELATION__ - *fx = (float)drand48(); - *fy = (float)drand48(); - return; + *fx = (float)drand48(); + *fy = (float)drand48(); + return; #endif #ifdef __CMJ__ # ifdef __SOBOL__ - if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) + if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) # endif - { - /* Correlated multi-jitter. */ - int p = rng_hash + dimension; - cmj_sample_2D(sample, num_samples, p, fx, fy); - return; - } + { + /* Correlated multi-jitter. */ + int p = rng_hash + dimension; + cmj_sample_2D(sample, num_samples, p, fx, fy); + return; + } #endif #ifdef __SOBOL__ - /* Sobol. */ - *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension); - *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1); + /* Sobol. */ + *fx = path_rng_1D(kg, rng_hash, sample, num_samples, dimension); + *fy = path_rng_1D(kg, rng_hash, sample, num_samples, dimension + 1); #endif } ccl_device_inline void path_rng_init(KernelGlobals *kg, - int sample, int num_samples, + int sample, + int num_samples, uint *rng_hash, - int x, int y, - float *fx, float *fy) + int x, + int y, + float *fx, + float *fy) { - /* load state */ - *rng_hash = hash_int_2d(x, y); - *rng_hash ^= kernel_data.integrator.seed; + /* load state */ + *rng_hash = hash_int_2d(x, y); + *rng_hash ^= kernel_data.integrator.seed; #ifdef __DEBUG_CORRELATION__ - srand48(*rng_hash + sample); + srand48(*rng_hash + sample); #endif - if(sample == 0) { - *fx = 0.5f; - *fy = 0.5f; - } - else { - path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy); - } + if (sample == 0) { + *fx = 0.5f; + *fy = 0.5f; + } + else { + path_rng_2D(kg, *rng_hash, sample, num_samples, PRNG_FILTER_U, fx, fy); + } } /* Linear Congruential Generator */ ccl_device uint lcg_step_uint(uint *rng) { - /* implicit mod 2^32 */ - *rng = (1103515245*(*rng) + 12345); - return *rng; + /* implicit mod 2^32 */ + *rng = (1103515245 * (*rng) + 12345); + return *rng; } ccl_device float lcg_step_float(uint *rng) { - /* implicit mod 2^32 */ - *rng = (1103515245*(*rng) + 12345); - return (float)*rng * (1.0f/(float)0xFFFFFFFF); + /* implicit mod 2^32 */ + *rng = (1103515245 * (*rng) + 12345); + return (float)*rng * (1.0f / (float)0xFFFFFFFF); } ccl_device uint lcg_init(uint seed) { - uint rng = seed; - lcg_step_uint(&rng); - return rng; + uint rng = seed; + lcg_step_uint(&rng); + return rng; } /* Path Tracing Utility Functions @@ -181,118 +182,107 @@ ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, const ccl_addr_space PathState *state, int dimension) { - return path_rng_1D(kg, - state->rng_hash, - state->sample, state->num_samples, - state->rng_offset + dimension); + return path_rng_1D( + kg, state->rng_hash, state->sample, state->num_samples, state->rng_offset + dimension); } -ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, - const ccl_addr_space PathState *state, - int dimension, - float *fx, float *fy) +ccl_device_inline void path_state_rng_2D( + KernelGlobals *kg, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy) { - path_rng_2D(kg, - state->rng_hash, - state->sample, state->num_samples, - state->rng_offset + dimension, - fx, fy); + path_rng_2D(kg, + state->rng_hash, + state->sample, + state->num_samples, + state->rng_offset + dimension, + fx, + fy); } ccl_device_inline float path_state_rng_1D_hash(KernelGlobals *kg, - const ccl_addr_space PathState *state, - uint hash) + const ccl_addr_space PathState *state, + uint hash) { - /* Use a hash instead of dimension, this is not great but avoids adding - * more dimensions to each bounce which reduces quality of dimensions we - * are already using. */ - return path_rng_1D(kg, - cmj_hash_simple(state->rng_hash, hash), - state->sample, state->num_samples, - state->rng_offset); + /* Use a hash instead of dimension, this is not great but avoids adding + * more dimensions to each bounce which reduces quality of dimensions we + * are already using. */ + return path_rng_1D(kg, + cmj_hash_simple(state->rng_hash, hash), + state->sample, + state->num_samples, + state->rng_offset); } -ccl_device_inline float path_branched_rng_1D( - KernelGlobals *kg, - uint rng_hash, - const ccl_addr_space PathState *state, - int branch, - int num_branches, - int dimension) +ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, + uint rng_hash, + const ccl_addr_space PathState *state, + int branch, + int num_branches, + int dimension) { - return path_rng_1D(kg, - rng_hash, - state->sample * num_branches + branch, - state->num_samples * num_branches, - state->rng_offset + dimension); + return path_rng_1D(kg, + rng_hash, + state->sample * num_branches + branch, + state->num_samples * num_branches, + state->rng_offset + dimension); } -ccl_device_inline void path_branched_rng_2D( - KernelGlobals *kg, - uint rng_hash, - const ccl_addr_space PathState *state, - int branch, - int num_branches, - int dimension, - float *fx, float *fy) +ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, + uint rng_hash, + const ccl_addr_space PathState *state, + int branch, + int num_branches, + int dimension, + float *fx, + float *fy) { - path_rng_2D(kg, - rng_hash, - state->sample * num_branches + branch, - state->num_samples * num_branches, - state->rng_offset + dimension, - fx, fy); + path_rng_2D(kg, + rng_hash, + state->sample * num_branches + branch, + state->num_samples * num_branches, + state->rng_offset + dimension, + fx, + fy); } /* Utitility functions to get light termination value, * since it might not be needed in many cases. */ -ccl_device_inline float path_state_rng_light_termination( - KernelGlobals *kg, - const ccl_addr_space PathState *state) +ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, + const ccl_addr_space PathState *state) { - if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) { - return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE); - } - return 0.0f; + if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) { + return path_state_rng_1D(kg, state, PRNG_LIGHT_TERMINATE); + } + return 0.0f; } -ccl_device_inline float path_branched_rng_light_termination( - KernelGlobals *kg, - uint rng_hash, - const ccl_addr_space PathState *state, - int branch, - int num_branches) +ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, + uint rng_hash, + const ccl_addr_space PathState *state, + int branch, + int num_branches) { - if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) { - return path_branched_rng_1D(kg, - rng_hash, - state, - branch, - num_branches, - PRNG_LIGHT_TERMINATE); - } - return 0.0f; + if (kernel_data.integrator.light_inv_rr_threshold > 0.0f) { + return path_branched_rng_1D(kg, rng_hash, state, branch, num_branches, PRNG_LIGHT_TERMINATE); + } + return 0.0f; } -ccl_device_inline uint lcg_state_init(PathState *state, - uint scramble) +ccl_device_inline uint lcg_state_init(PathState *state, uint scramble) { - return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble); + return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble); } -ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state, - uint scramble) +ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space PathState *state, uint scramble) { - return lcg_init(state->rng_hash + state->rng_offset + state->sample*scramble); + return lcg_init(state->rng_hash + state->rng_offset + state->sample * scramble); } - ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng) { - /* Implicit mod 2^32 */ - *rng = (1103515245*(*rng) + 12345); - return (float)*rng * (1.0f/(float)0xFFFFFFFF); + /* Implicit mod 2^32 */ + *rng = (1103515245 * (*rng) + 12345); + return (float)*rng * (1.0f / (float)0xFFFFFFFF); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b1da523501d..351b623addb 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -37,14 +37,14 @@ CCL_NAMESPACE_BEGIN #ifdef __OBJECT_MOTION__ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) { - if(sd->object_flag & SD_OBJECT_MOTION) { - sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); - sd->ob_itfm = transform_quick_inverse(sd->ob_tfm); - } - else { - sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); - sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); - } + if (sd->object_flag & SD_OBJECT_MOTION) { + sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); + sd->ob_itfm = transform_quick_inverse(sd->ob_tfm); + } + else { + sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); + sd->ob_itfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM); + } } #endif @@ -53,104 +53,104 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, const Intersection *isect, const Ray *ray) { - PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); #ifdef __INSTANCING__ - sd->object = (isect->object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; + sd->object = (isect->object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, isect->prim) : + isect->object; #endif - sd->lamp = LAMP_NONE; + sd->lamp = LAMP_NONE; - sd->type = isect->type; - sd->flag = 0; - sd->object_flag = kernel_tex_fetch(__object_flag, - sd->object); + sd->type = isect->type; + sd->flag = 0; + sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); - /* matrices and time */ + /* matrices and time */ #ifdef __OBJECT_MOTION__ - shader_setup_object_transforms(kg, sd, ray->time); + shader_setup_object_transforms(kg, sd, ray->time); #endif - sd->time = ray->time; + sd->time = ray->time; - sd->prim = kernel_tex_fetch(__prim_index, isect->prim); - sd->ray_length = isect->t; + sd->prim = kernel_tex_fetch(__prim_index, isect->prim); + sd->ray_length = isect->t; #ifdef __UV__ - sd->u = isect->u; - sd->v = isect->v; + sd->u = isect->u; + sd->v = isect->v; #endif #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) { - /* curve */ - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - - sd->shader = __float_as_int(curvedata.z); - sd->P = curve_refine(kg, sd, isect, ray); - } - else + if (sd->type & PRIMITIVE_ALL_CURVE) { + /* curve */ + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + + sd->shader = __float_as_int(curvedata.z); + sd->P = curve_refine(kg, sd, isect, ray); + } + else #endif - if(sd->type & PRIMITIVE_TRIANGLE) { - /* static triangle */ - float3 Ng = triangle_normal(kg, sd); - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + if (sd->type & PRIMITIVE_TRIANGLE) { + /* static triangle */ + float3 Ng = triangle_normal(kg, sd); + sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); - /* vectors */ - sd->P = triangle_refine(kg, sd, isect, ray); - sd->Ng = Ng; - sd->N = Ng; + /* vectors */ + sd->P = triangle_refine(kg, sd, isect, ray); + sd->Ng = Ng; + sd->N = Ng; - /* smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) - sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); + /* smooth normal */ + if (sd->shader & SHADER_SMOOTH_NORMAL) + sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); #ifdef __DPDU__ - /* dPdu/dPdv */ - triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); + /* dPdu/dPdv */ + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #endif - } - else { - /* motion triangle */ - motion_triangle_shader_setup(kg, sd, isect, ray, false); - } + } + else { + /* motion triangle */ + motion_triangle_shader_setup(kg, sd, isect, ray, false); + } - sd->I = -ray->D; + sd->I = -ray->D; - sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; #ifdef __INSTANCING__ - if(isect->object != OBJECT_NONE) { - /* instance transform */ - object_normal_transform_auto(kg, sd, &sd->N); - object_normal_transform_auto(kg, sd, &sd->Ng); + if (isect->object != OBJECT_NONE) { + /* instance transform */ + object_normal_transform_auto(kg, sd, &sd->N); + object_normal_transform_auto(kg, sd, &sd->Ng); # ifdef __DPDU__ - object_dir_transform_auto(kg, sd, &sd->dPdu); - object_dir_transform_auto(kg, sd, &sd->dPdv); + object_dir_transform_auto(kg, sd, &sd->dPdu); + object_dir_transform_auto(kg, sd, &sd->dPdv); # endif - } + } #endif - /* backfacing test */ - bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); + /* backfacing test */ + bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); - if(backfacing) { - sd->flag |= SD_BACKFACING; - sd->Ng = -sd->Ng; - sd->N = -sd->N; + if (backfacing) { + sd->flag |= SD_BACKFACING; + sd->Ng = -sd->Ng; + sd->N = -sd->N; #ifdef __DPDU__ - sd->dPdu = -sd->dPdu; - sd->dPdv = -sd->dPdv; + sd->dPdu = -sd->dPdu; + sd->dPdv = -sd->dPdv; #endif - } + } #ifdef __RAY_DIFFERENTIALS__ - /* differentials */ - differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t); - differential_incoming(&sd->dI, ray->dD); - differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); + /* differentials */ + differential_transfer(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, isect->t); + differential_incoming(&sd->dI, ray->dD); + differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); #endif - PROFILING_SHADER(sd->shader); - PROFILING_OBJECT(sd->object); + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } /* ShaderData setup from BSSRDF scatter */ @@ -161,86 +161,86 @@ ccl_device # else ccl_device_inline # endif -void shader_setup_from_subsurface( - KernelGlobals *kg, - ShaderData *sd, - const Intersection *isect, - const Ray *ray) + void + shader_setup_from_subsurface(KernelGlobals *kg, + ShaderData *sd, + const Intersection *isect, + const Ray *ray) { - PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); - const bool backfacing = sd->flag & SD_BACKFACING; + const bool backfacing = sd->flag & SD_BACKFACING; - /* object, matrices, time, ray_length stay the same */ - sd->flag = 0; - sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); - sd->prim = kernel_tex_fetch(__prim_index, isect->prim); - sd->type = isect->type; + /* object, matrices, time, ray_length stay the same */ + sd->flag = 0; + sd->object_flag = kernel_tex_fetch(__object_flag, sd->object); + sd->prim = kernel_tex_fetch(__prim_index, isect->prim); + sd->type = isect->type; # ifdef __UV__ - sd->u = isect->u; - sd->v = isect->v; + sd->u = isect->u; + sd->v = isect->v; # endif - /* fetch triangle data */ - if(sd->type == PRIMITIVE_TRIANGLE) { - float3 Ng = triangle_normal(kg, sd); - sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); + /* fetch triangle data */ + if (sd->type == PRIMITIVE_TRIANGLE) { + float3 Ng = triangle_normal(kg, sd); + sd->shader = kernel_tex_fetch(__tri_shader, sd->prim); - /* static triangle */ - sd->P = triangle_refine_local(kg, sd, isect, ray); - sd->Ng = Ng; - sd->N = Ng; + /* static triangle */ + sd->P = triangle_refine_local(kg, sd, isect, ray); + sd->Ng = Ng; + sd->N = Ng; - if(sd->shader & SHADER_SMOOTH_NORMAL) - sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); + if (sd->shader & SHADER_SMOOTH_NORMAL) + sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); # ifdef __DPDU__ - /* dPdu/dPdv */ - triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); + /* dPdu/dPdv */ + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); # endif - } - else { - /* motion triangle */ - motion_triangle_shader_setup(kg, sd, isect, ray, true); - } + } + else { + /* motion triangle */ + motion_triangle_shader_setup(kg, sd, isect, ray, true); + } - sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; # ifdef __INSTANCING__ - if(isect->object != OBJECT_NONE) { - /* instance transform */ - object_normal_transform_auto(kg, sd, &sd->N); - object_normal_transform_auto(kg, sd, &sd->Ng); + if (isect->object != OBJECT_NONE) { + /* instance transform */ + object_normal_transform_auto(kg, sd, &sd->N); + object_normal_transform_auto(kg, sd, &sd->Ng); # ifdef __DPDU__ - object_dir_transform_auto(kg, sd, &sd->dPdu); - object_dir_transform_auto(kg, sd, &sd->dPdv); + object_dir_transform_auto(kg, sd, &sd->dPdu); + object_dir_transform_auto(kg, sd, &sd->dPdv); # endif - } + } # endif - /* backfacing test */ - if(backfacing) { - sd->flag |= SD_BACKFACING; - sd->Ng = -sd->Ng; - sd->N = -sd->N; + /* backfacing test */ + if (backfacing) { + sd->flag |= SD_BACKFACING; + sd->Ng = -sd->Ng; + sd->N = -sd->N; # ifdef __DPDU__ - sd->dPdu = -sd->dPdu; - sd->dPdv = -sd->dPdv; + sd->dPdu = -sd->dPdu; + sd->dPdv = -sd->dPdv; # endif - } + } - /* should not get used in principle as the shading will only use a diffuse - * BSDF, but the shader might still access it */ - sd->I = sd->N; + /* should not get used in principle as the shading will only use a diffuse + * BSDF, but the shader might still access it */ + sd->I = sd->N; # ifdef __RAY_DIFFERENTIALS__ - /* differentials */ - differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); - /* don't modify dP and dI */ + /* differentials */ + differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng); + /* don't modify dP and dI */ # endif - PROFILING_SHADER(sd->shader); + PROFILING_SHADER(sd->shader); } #endif @@ -251,194 +251,208 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg, const float3 P, const float3 Ng, const float3 I, - int shader, int object, int prim, - float u, float v, float t, + int shader, + int object, + int prim, + float u, + float v, + float t, float time, bool object_space, int lamp) { - PROFILING_INIT(kg, PROFILING_SHADER_SETUP); - - /* vectors */ - sd->P = P; - sd->N = Ng; - sd->Ng = Ng; - sd->I = I; - sd->shader = shader; - if(prim != PRIM_NONE) - sd->type = PRIMITIVE_TRIANGLE; - else if(lamp != LAMP_NONE) - sd->type = PRIMITIVE_LAMP; - else - sd->type = PRIMITIVE_NONE; - - /* primitive */ + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + + /* vectors */ + sd->P = P; + sd->N = Ng; + sd->Ng = Ng; + sd->I = I; + sd->shader = shader; + if (prim != PRIM_NONE) + sd->type = PRIMITIVE_TRIANGLE; + else if (lamp != LAMP_NONE) + sd->type = PRIMITIVE_LAMP; + else + sd->type = PRIMITIVE_NONE; + + /* primitive */ #ifdef __INSTANCING__ - sd->object = object; + sd->object = object; #endif - sd->lamp = LAMP_NONE; - /* currently no access to bvh prim index for strand sd->prim*/ - sd->prim = prim; + sd->lamp = LAMP_NONE; + /* currently no access to bvh prim index for strand sd->prim*/ + sd->prim = prim; #ifdef __UV__ - sd->u = u; - sd->v = v; + sd->u = u; + sd->v = v; #endif - sd->time = time; - sd->ray_length = t; + sd->time = time; + sd->ray_length = t; - sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; - sd->object_flag = 0; - if(sd->object != OBJECT_NONE) { - sd->object_flag |= kernel_tex_fetch(__object_flag, - sd->object); + sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->object_flag = 0; + if (sd->object != OBJECT_NONE) { + sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object); #ifdef __OBJECT_MOTION__ - shader_setup_object_transforms(kg, sd, time); - } - else if(lamp != LAMP_NONE) { - sd->ob_tfm = lamp_fetch_transform(kg, lamp, false); - sd->ob_itfm = lamp_fetch_transform(kg, lamp, true); - sd->lamp = lamp; + shader_setup_object_transforms(kg, sd, time); + } + else if (lamp != LAMP_NONE) { + sd->ob_tfm = lamp_fetch_transform(kg, lamp, false); + sd->ob_itfm = lamp_fetch_transform(kg, lamp, true); + sd->lamp = lamp; #else - } - else if(lamp != LAMP_NONE) { - sd->lamp = lamp; + } + else if (lamp != LAMP_NONE) { + sd->lamp = lamp; #endif - } + } - /* transform into world space */ - if(object_space) { - object_position_transform_auto(kg, sd, &sd->P); - object_normal_transform_auto(kg, sd, &sd->Ng); - sd->N = sd->Ng; - object_dir_transform_auto(kg, sd, &sd->I); - } + /* transform into world space */ + if (object_space) { + object_position_transform_auto(kg, sd, &sd->P); + object_normal_transform_auto(kg, sd, &sd->Ng); + sd->N = sd->Ng; + object_dir_transform_auto(kg, sd, &sd->I); + } - if(sd->type & PRIMITIVE_TRIANGLE) { - /* smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) { - sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); + if (sd->type & PRIMITIVE_TRIANGLE) { + /* smooth normal */ + if (sd->shader & SHADER_SMOOTH_NORMAL) { + sd->N = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v); #ifdef __INSTANCING__ - if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_normal_transform_auto(kg, sd, &sd->N); - } + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_normal_transform_auto(kg, sd, &sd->N); + } #endif - } + } - /* dPdu/dPdv */ + /* dPdu/dPdv */ #ifdef __DPDU__ - triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); # ifdef __INSTANCING__ - if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_dir_transform_auto(kg, sd, &sd->dPdu); - object_dir_transform_auto(kg, sd, &sd->dPdv); - } + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_dir_transform_auto(kg, sd, &sd->dPdu); + object_dir_transform_auto(kg, sd, &sd->dPdv); + } # endif #endif - } - else { + } + else { #ifdef __DPDU__ - sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); - sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); #endif - } + } - /* backfacing test */ - if(sd->prim != PRIM_NONE) { - bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); + /* backfacing test */ + if (sd->prim != PRIM_NONE) { + bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); - if(backfacing) { - sd->flag |= SD_BACKFACING; - sd->Ng = -sd->Ng; - sd->N = -sd->N; + if (backfacing) { + sd->flag |= SD_BACKFACING; + sd->Ng = -sd->Ng; + sd->N = -sd->N; #ifdef __DPDU__ - sd->dPdu = -sd->dPdu; - sd->dPdv = -sd->dPdv; + sd->dPdu = -sd->dPdu; + sd->dPdv = -sd->dPdv; #endif - } - } + } + } #ifdef __RAY_DIFFERENTIALS__ - /* no ray differentials here yet */ - sd->dP = differential3_zero(); - sd->dI = differential3_zero(); - sd->du = differential_zero(); - sd->dv = differential_zero(); + /* no ray differentials here yet */ + sd->dP = differential3_zero(); + sd->dI = differential3_zero(); + sd->du = differential_zero(); + sd->dv = differential_zero(); #endif - PROFILING_SHADER(sd->shader); - PROFILING_OBJECT(sd->object); + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } /* ShaderData setup for displacement */ -ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, - int object, int prim, float u, float v) +ccl_device void shader_setup_from_displace( + KernelGlobals *kg, ShaderData *sd, int object, int prim, float u, float v) { - float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); - int shader; - - triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); - - /* force smooth shading for displacement */ - shader |= SHADER_SMOOTH_NORMAL; - - shader_setup_from_sample(kg, sd, - P, Ng, I, - shader, object, prim, - u, v, 0.0f, 0.5f, - !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), - LAMP_NONE); + float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); + int shader; + + triangle_point_normal(kg, object, prim, u, v, &P, &Ng, &shader); + + /* force smooth shading for displacement */ + shader |= SHADER_SMOOTH_NORMAL; + + shader_setup_from_sample( + kg, + sd, + P, + Ng, + I, + shader, + object, + prim, + u, + v, + 0.0f, + 0.5f, + !(kernel_tex_fetch(__object_flag, object) & SD_OBJECT_TRANSFORM_APPLIED), + LAMP_NONE); } /* ShaderData setup from ray into background */ -ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray) +ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, + ShaderData *sd, + const Ray *ray) { - PROFILING_INIT(kg, PROFILING_SHADER_SETUP); - - /* vectors */ - sd->P = ray->D; - sd->N = -ray->D; - sd->Ng = -ray->D; - sd->I = -ray->D; - sd->shader = kernel_data.background.surface_shader; - sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; - sd->object_flag = 0; - sd->time = ray->time; - sd->ray_length = 0.0f; + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + + /* vectors */ + sd->P = ray->D; + sd->N = -ray->D; + sd->Ng = -ray->D; + sd->I = -ray->D; + sd->shader = kernel_data.background.surface_shader; + sd->flag = kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->object_flag = 0; + sd->time = ray->time; + sd->ray_length = 0.0f; #ifdef __INSTANCING__ - sd->object = OBJECT_NONE; + sd->object = OBJECT_NONE; #endif - sd->lamp = LAMP_NONE; - sd->prim = PRIM_NONE; + sd->lamp = LAMP_NONE; + sd->prim = PRIM_NONE; #ifdef __UV__ - sd->u = 0.0f; - sd->v = 0.0f; + sd->u = 0.0f; + sd->v = 0.0f; #endif #ifdef __DPDU__ - /* dPdu/dPdv */ - sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); - sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); + /* dPdu/dPdv */ + sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); #endif #ifdef __RAY_DIFFERENTIALS__ - /* differentials */ - sd->dP = ray->dD; - differential_incoming(&sd->dI, sd->dP); - sd->du = differential_zero(); - sd->dv = differential_zero(); + /* differentials */ + sd->dP = ray->dD; + differential_incoming(&sd->dI, sd->dP); + sd->du = differential_zero(); + sd->dv = differential_zero(); #endif - /* for NDC coordinates */ - sd->ray_P = ray->P; + /* for NDC coordinates */ + sd->ray_P = ray->P; - PROFILING_SHADER(sd->shader); - PROFILING_OBJECT(sd->object); + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } /* ShaderData setup from point inside volume */ @@ -446,141 +460,145 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat #ifdef __VOLUME__ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray) { - PROFILING_INIT(kg, PROFILING_SHADER_SETUP); - - /* vectors */ - sd->P = ray->P; - sd->N = -ray->D; - sd->Ng = -ray->D; - sd->I = -ray->D; - sd->shader = SHADER_NONE; - sd->flag = 0; - sd->object_flag = 0; - sd->time = ray->time; - sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */ + PROFILING_INIT(kg, PROFILING_SHADER_SETUP); + + /* vectors */ + sd->P = ray->P; + sd->N = -ray->D; + sd->Ng = -ray->D; + sd->I = -ray->D; + sd->shader = SHADER_NONE; + sd->flag = 0; + sd->object_flag = 0; + sd->time = ray->time; + sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */ # ifdef __INSTANCING__ - sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */ + sd->object = OBJECT_NONE; /* todo: fill this for texture coordinates */ # endif - sd->lamp = LAMP_NONE; - sd->prim = PRIM_NONE; - sd->type = PRIMITIVE_NONE; + sd->lamp = LAMP_NONE; + sd->prim = PRIM_NONE; + sd->type = PRIMITIVE_NONE; # ifdef __UV__ - sd->u = 0.0f; - sd->v = 0.0f; + sd->u = 0.0f; + sd->v = 0.0f; # endif # ifdef __DPDU__ - /* dPdu/dPdv */ - sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); - sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); + /* dPdu/dPdv */ + sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); # endif # ifdef __RAY_DIFFERENTIALS__ - /* differentials */ - sd->dP = ray->dD; - differential_incoming(&sd->dI, sd->dP); - sd->du = differential_zero(); - sd->dv = differential_zero(); + /* differentials */ + sd->dP = ray->dD; + differential_incoming(&sd->dI, sd->dP); + sd->du = differential_zero(); + sd->dv = differential_zero(); # endif - /* for NDC coordinates */ - sd->ray_P = ray->P; - sd->ray_dP = ray->dP; + /* for NDC coordinates */ + sd->ray_P = ray->P; + sd->ray_dP = ray->dP; - PROFILING_SHADER(sd->shader); - PROFILING_OBJECT(sd->object); + PROFILING_SHADER(sd->shader); + PROFILING_OBJECT(sd->object); } -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ /* Merging */ #if defined(__BRANCHED_PATH__) || defined(__VOLUME__) ccl_device_inline void shader_merge_closures(ShaderData *sd) { - /* merge identical closures, better when we sample a single closure at a time */ - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sci = &sd->closure[i]; - - for(int j = i + 1; j < sd->num_closure; j++) { - ShaderClosure *scj = &sd->closure[j]; - - if(sci->type != scj->type) - continue; - if(!bsdf_merge(sci, scj)) - continue; - - sci->weight += scj->weight; - sci->sample_weight += scj->sample_weight; - - int size = sd->num_closure - (j+1); - if(size > 0) { - for(int k = 0; k < size; k++) { - scj[k] = scj[k+1]; - } - } - - sd->num_closure--; - kernel_assert(sd->num_closure >= 0); - j--; - } - } + /* merge identical closures, better when we sample a single closure at a time */ + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sci = &sd->closure[i]; + + for (int j = i + 1; j < sd->num_closure; j++) { + ShaderClosure *scj = &sd->closure[j]; + + if (sci->type != scj->type) + continue; + if (!bsdf_merge(sci, scj)) + continue; + + sci->weight += scj->weight; + sci->sample_weight += scj->sample_weight; + + int size = sd->num_closure - (j + 1); + if (size > 0) { + for (int k = 0; k < size; k++) { + scj[k] = scj[k + 1]; + } + } + + sd->num_closure--; + kernel_assert(sd->num_closure >= 0); + j--; + } + } } -#endif /* __BRANCHED_PATH__ || __VOLUME__ */ +#endif /* __BRANCHED_PATH__ || __VOLUME__ */ /* Defensive sampling. */ -ccl_device_inline void shader_prepare_closures(ShaderData *sd, - ccl_addr_space PathState *state) +ccl_device_inline void shader_prepare_closures(ShaderData *sd, ccl_addr_space PathState *state) { - /* We can likely also do defensive sampling at deeper bounces, particularly - * for cases like a perfect mirror but possibly also others. This will need - * a good heuristic. */ - if(state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) { - float sum = 0.0f; - - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } - - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sc->sample_weight = max(sc->sample_weight, 0.125f * sum); - } - } - } + /* We can likely also do defensive sampling at deeper bounces, particularly + * for cases like a perfect mirror but possibly also others. This will need + * a good heuristic. */ + if (state->bounce + state->transparent_bounce == 0 && sd->num_closure > 1) { + float sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } + + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sc->sample_weight = max(sc->sample_weight, 0.125f * sum); + } + } + } } - /* BSDF */ -ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, ShaderData *sd, const float3 omega_in, float *pdf, - const ShaderClosure *skip_sc, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight) +ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, + ShaderData *sd, + const float3 omega_in, + float *pdf, + const ShaderClosure *skip_sc, + BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight) { - /* this is the veach one-sample model with balance heuristic, some pdf - * factors drop out when using balance heuristic weighting */ - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; + /* this is the veach one-sample model with balance heuristic, some pdf + * factors drop out when using balance heuristic weighting */ + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; - if(sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) { - float bsdf_pdf = 0.0f; - float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); + if (sc != skip_sc && CLOSURE_IS_BSDF(sc->type)) { + float bsdf_pdf = 0.0f; + float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); - if(bsdf_pdf != 0.0f) { - bsdf_eval_accum(result_eval, sc->type, eval*sc->weight, 1.0f); - sum_pdf += bsdf_pdf*sc->sample_weight; - } + if (bsdf_pdf != 0.0f) { + bsdf_eval_accum(result_eval, sc->type, eval * sc->weight, 1.0f); + sum_pdf += bsdf_pdf * sc->sample_weight; + } - sum_sample_weight += sc->sample_weight; - } - } + sum_sample_weight += sc->sample_weight; + } + } - *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f; + *pdf = (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; } #ifdef __BRANCHED_PATH__ @@ -591,633 +609,654 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg, float light_pdf, bool use_mis) { - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF(sc->type)) { - float bsdf_pdf = 0.0f; - float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); - if(bsdf_pdf != 0.0f) { - float mis_weight = use_mis? power_heuristic(light_pdf, bsdf_pdf): 1.0f; - bsdf_eval_accum(result_eval, - sc->type, - eval * sc->weight, - mis_weight); - } - } - } + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF(sc->type)) { + float bsdf_pdf = 0.0f; + float3 eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf); + if (bsdf_pdf != 0.0f) { + float mis_weight = use_mis ? power_heuristic(light_pdf, bsdf_pdf) : 1.0f; + bsdf_eval_accum(result_eval, sc->type, eval * sc->weight, mis_weight); + } + } + } } -#endif /* __BRANCHED_PATH__ */ - +#endif /* __BRANCHED_PATH__ */ #ifndef __KERNEL_CUDA__ ccl_device #else ccl_device_inline #endif -void shader_bsdf_eval(KernelGlobals *kg, - ShaderData *sd, - const float3 omega_in, - BsdfEval *eval, - float light_pdf, - bool use_mis) + void + shader_bsdf_eval(KernelGlobals *kg, + ShaderData *sd, + const float3 omega_in, + BsdfEval *eval, + float light_pdf, + bool use_mis) { - PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL); + PROFILING_INIT(kg, PROFILING_CLOSURE_EVAL); - bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); + bsdf_eval_init( + eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); #ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) - _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis); - else + if (kernel_data.integrator.branched) + _shader_bsdf_multi_eval_branched(kg, sd, omega_in, eval, light_pdf, use_mis); + else #endif - { - float pdf; - _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f); - if(use_mis) { - float weight = power_heuristic(light_pdf, pdf); - bsdf_eval_mis(eval, weight); - } - } + { + float pdf; + _shader_bsdf_multi_eval(kg, sd, omega_in, &pdf, NULL, eval, 0.0f, 0.0f); + if (use_mis) { + float weight = power_heuristic(light_pdf, pdf); + bsdf_eval_mis(eval, weight); + } + } } -ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd, - float *randu) +ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd, float *randu) { - /* Note the sampling here must match shader_bssrdf_pick, - * since we reuse the same random number. */ - int sampled = 0; + /* Note the sampling here must match shader_bssrdf_pick, + * since we reuse the same random number. */ + int sampled = 0; - if(sd->num_closure > 1) { - /* Pick a BSDF or based on sample weights. */ - float sum = 0.0f; + if (sd->num_closure > 1) { + /* Pick a BSDF or based on sample weights. */ + float sum = 0.0f; - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - sum += sc->sample_weight; - } - } + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + sum += sc->sample_weight; + } + } - float r = (*randu)*sum; - float partial_sum = 0.0f; + float r = (*randu) * sum; + float partial_sum = 0.0f; - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - float next_sum = partial_sum + sc->sample_weight; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + float next_sum = partial_sum + sc->sample_weight; - if(r < next_sum) { - sampled = i; + if (r < next_sum) { + sampled = i; - /* Rescale to reuse for direction sample, to better - * preserve stratifaction. */ - *randu = (r - partial_sum) / sc->sample_weight; - break; - } + /* Rescale to reuse for direction sample, to better + * preserve stratifaction. */ + *randu = (r - partial_sum) / sc->sample_weight; + break; + } - partial_sum = next_sum; - } - } - } + partial_sum = next_sum; + } + } + } - const ShaderClosure *sc = &sd->closure[sampled]; - return CLOSURE_IS_BSDF(sc->type)? sc: NULL; + const ShaderClosure *sc = &sd->closure[sampled]; + return CLOSURE_IS_BSDF(sc->type) ? sc : NULL; } ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd, ccl_addr_space float3 *throughput, float *randu) { - /* Note the sampling here must match shader_bsdf_pick, - * since we reuse the same random number. */ - int sampled = 0; - - if(sd->num_closure > 1) { - /* Pick a BSDF or BSSRDF or based on sample weights. */ - float sum_bsdf = 0.0f; - float sum_bssrdf = 0.0f; - - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - if(CLOSURE_IS_BSDF(sc->type)) { - sum_bsdf += sc->sample_weight; - } - else if(CLOSURE_IS_BSSRDF(sc->type)) { - sum_bssrdf += sc->sample_weight; - } - } - - float r = (*randu)*(sum_bsdf + sum_bssrdf); - float partial_sum = 0.0f; - - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { - float next_sum = partial_sum + sc->sample_weight; - - if(r < next_sum) { - if(CLOSURE_IS_BSDF(sc->type)) { - *throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf; - return NULL; - } - else { - *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf; - sampled = i; - - /* Rescale to reuse for direction sample, to better - * preserve stratifaction. */ - *randu = (r - partial_sum) / sc->sample_weight; - break; - } - } - - partial_sum = next_sum; - } - } - } - - const ShaderClosure *sc = &sd->closure[sampled]; - return CLOSURE_IS_BSSRDF(sc->type)? sc: NULL; + /* Note the sampling here must match shader_bsdf_pick, + * since we reuse the same random number. */ + int sampled = 0; + + if (sd->num_closure > 1) { + /* Pick a BSDF or BSSRDF or based on sample weights. */ + float sum_bsdf = 0.0f; + float sum_bssrdf = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF(sc->type)) { + sum_bsdf += sc->sample_weight; + } + else if (CLOSURE_IS_BSSRDF(sc->type)) { + sum_bssrdf += sc->sample_weight; + } + } + + float r = (*randu) * (sum_bsdf + sum_bssrdf); + float partial_sum = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) { + float next_sum = partial_sum + sc->sample_weight; + + if (r < next_sum) { + if (CLOSURE_IS_BSDF(sc->type)) { + *throughput *= (sum_bsdf + sum_bssrdf) / sum_bsdf; + return NULL; + } + else { + *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf; + sampled = i; + + /* Rescale to reuse for direction sample, to better + * preserve stratifaction. */ + *randu = (r - partial_sum) / sc->sample_weight; + break; + } + } + + partial_sum = next_sum; + } + } + } + + const ShaderClosure *sc = &sd->closure[sampled]; + return CLOSURE_IS_BSSRDF(sc->type) ? sc : NULL; } ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd, - float randu, float randv, + float randu, + float randv, BsdfEval *bsdf_eval, float3 *omega_in, differential3 *domega_in, float *pdf) { - PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE); + PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE); - const ShaderClosure *sc = shader_bsdf_pick(sd, &randu); - if(sc == NULL) { - *pdf = 0.0f; - return LABEL_NONE; - } + const ShaderClosure *sc = shader_bsdf_pick(sd, &randu); + if (sc == NULL) { + *pdf = 0.0f; + return LABEL_NONE; + } - /* BSSRDF should already have been handled elsewhere. */ - kernel_assert(CLOSURE_IS_BSDF(sc->type)); + /* BSSRDF should already have been handled elsewhere. */ + kernel_assert(CLOSURE_IS_BSDF(sc->type)); - int label; - float3 eval; + int label; + float3 eval; - *pdf = 0.0f; - label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); + *pdf = 0.0f; + label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - if(*pdf != 0.0f) { - bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass); + if (*pdf != 0.0f) { + bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight, kernel_data.film.use_light_pass); - if(sd->num_closure > 1) { - float sweight = sc->sample_weight; - _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf*sweight, sweight); - } - } + if (sd->num_closure > 1) { + float sweight = sc->sample_weight; + _shader_bsdf_multi_eval(kg, sd, *omega_in, pdf, sc, bsdf_eval, *pdf * sweight, sweight); + } + } - return label; + return label; } -ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, ShaderData *sd, - const ShaderClosure *sc, float randu, float randv, BsdfEval *bsdf_eval, - float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, + ShaderData *sd, + const ShaderClosure *sc, + float randu, + float randv, + BsdfEval *bsdf_eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { - PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE); + PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE); - int label; - float3 eval; + int label; + float3 eval; - *pdf = 0.0f; - label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); + *pdf = 0.0f; + label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - if(*pdf != 0.0f) - bsdf_eval_init(bsdf_eval, sc->type, eval*sc->weight, kernel_data.film.use_light_pass); + if (*pdf != 0.0f) + bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight, kernel_data.film.use_light_pass); - return label; + return label; } ccl_device float shader_bsdf_average_roughness(ShaderData *sd) { - float roughness = 0.0f; - float sum_weight = 0.0f; - - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(CLOSURE_IS_BSDF(sc->type)) { - /* sqrt once to undo the squaring from multiplying roughness on the - * two axes, and once for the squared roughness convention. */ - float weight = fabsf(average(sc->weight)); - roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); - sum_weight += weight; - } - } - - return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; + float roughness = 0.0f; + float sum_weight = 0.0f; + + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if (CLOSURE_IS_BSDF(sc->type)) { + /* sqrt once to undo the squaring from multiplying roughness on the + * two axes, and once for the squared roughness convention. */ + float weight = fabsf(average(sc->weight)); + roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc))); + sum_weight += weight; + } + } + + return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f; } ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness) { - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF(sc->type)) - bsdf_blur(kg, sc, roughness); - } + if (CLOSURE_IS_BSDF(sc->type)) + bsdf_blur(kg, sc, roughness); + } } ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, const ShaderData *sd) { - if(sd->flag & SD_HAS_ONLY_VOLUME) { - return make_float3(1.0f, 1.0f, 1.0f); - } - else if(sd->flag & SD_TRANSPARENT) { - return sd->closure_transparent_extinction; - } - else { - return make_float3(0.0f, 0.0f, 0.0f); - } + if (sd->flag & SD_HAS_ONLY_VOLUME) { + return make_float3(1.0f, 1.0f, 1.0f); + } + else if (sd->flag & SD_TRANSPARENT) { + return sd->closure_transparent_extinction; + } + else { + return make_float3(0.0f, 0.0f, 0.0f); + } } ccl_device void shader_bsdf_disable_transparency(KernelGlobals *kg, ShaderData *sd) { - if(sd->flag & SD_TRANSPARENT) { - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { - sc->sample_weight = 0.0f; - sc->weight = make_float3(0.0f, 0.0f, 0.0f); - } - } - - sd->flag &= ~SD_TRANSPARENT; - } + if (sd->flag & SD_TRANSPARENT) { + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) { + sc->sample_weight = 0.0f; + sc->weight = make_float3(0.0f, 0.0f, 0.0f); + } + } + + sd->flag &= ~SD_TRANSPARENT; + } } ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd) { - float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd); + float3 alpha = make_float3(1.0f, 1.0f, 1.0f) - shader_bsdf_transparency(kg, sd); - alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f)); - alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f)); + alpha = max(alpha, make_float3(0.0f, 0.0f, 0.0f)); + alpha = min(alpha, make_float3(1.0f, 1.0f, 1.0f)); - return alpha; + return alpha; } ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd) { - float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) - eval += sc->weight; - } + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) + eval += sc->weight; + } - return eval; + return eval; } ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd) { - float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) - eval += sc->weight; - } + if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + eval += sc->weight; + } - return eval; + return eval; } ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd) { - float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) - eval += sc->weight; - } + if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type)) + eval += sc->weight; + } - return eval; + return eval; } ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) { - float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 eval = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type)) - eval += sc->weight; - } + if (CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type)) + eval += sc->weight; + } - return eval; + return eval; } ccl_device float3 shader_bsdf_average_normal(KernelGlobals *kg, ShaderData *sd) { - float3 N = make_float3(0.0f, 0.0f, 0.0f); + float3 N = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) - N += sc->N*fabsf(average(sc->weight)); - } + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) + N += sc->N * fabsf(average(sc->weight)); + } - return (is_zero(N))? sd->N : normalize(N); + return (is_zero(N)) ? sd->N : normalize(N); } ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_) { - float3 eval = make_float3(0.0f, 0.0f, 0.0f); - float3 N = make_float3(0.0f, 0.0f, 0.0f); + float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 N = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { - const DiffuseBsdf *bsdf = (const DiffuseBsdf*)sc; - eval += sc->weight*ao_factor; - N += bsdf->N*fabsf(average(sc->weight)); - } - } + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) { + const DiffuseBsdf *bsdf = (const DiffuseBsdf *)sc; + eval += sc->weight * ao_factor; + N += bsdf->N * fabsf(average(sc->weight)); + } + } - *N_ = (is_zero(N))? sd->N : normalize(N); - return eval; + *N_ = (is_zero(N)) ? sd->N : normalize(N); + return eval; } #ifdef __SUBSURFACE__ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_) { - float3 eval = make_float3(0.0f, 0.0f, 0.0f); - float3 N = make_float3(0.0f, 0.0f, 0.0f); - float texture_blur = 0.0f, weight_sum = 0.0f; + float3 eval = make_float3(0.0f, 0.0f, 0.0f); + float3 N = make_float3(0.0f, 0.0f, 0.0f); + float texture_blur = 0.0f, weight_sum = 0.0f; - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSSRDF(sc->type)) { - const Bssrdf *bssrdf = (const Bssrdf*)sc; - float avg_weight = fabsf(average(sc->weight)); + if (CLOSURE_IS_BSSRDF(sc->type)) { + const Bssrdf *bssrdf = (const Bssrdf *)sc; + float avg_weight = fabsf(average(sc->weight)); - N += bssrdf->N*avg_weight; - eval += sc->weight; - texture_blur += bssrdf->texture_blur*avg_weight; - weight_sum += avg_weight; - } - } + N += bssrdf->N * avg_weight; + eval += sc->weight; + texture_blur += bssrdf->texture_blur * avg_weight; + weight_sum += avg_weight; + } + } - if(N_) - *N_ = (is_zero(N))? sd->N: normalize(N); + if (N_) + *N_ = (is_zero(N)) ? sd->N : normalize(N); - if(texture_blur_) - *texture_blur_ = safe_divide(texture_blur, weight_sum); + if (texture_blur_) + *texture_blur_ = safe_divide(texture_blur, weight_sum); - return eval; + return eval; } -#endif /* __SUBSURFACE__ */ +#endif /* __SUBSURFACE__ */ /* Constant emission optimization */ ccl_device bool shader_constant_emission_eval(KernelGlobals *kg, int shader, float3 *eval) { - int shader_index = shader & SHADER_MASK; - int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags; + int shader_index = shader & SHADER_MASK; + int shader_flag = kernel_tex_fetch(__shaders, shader_index).flags; - if (shader_flag & SD_HAS_CONSTANT_EMISSION) { - *eval = make_float3( - kernel_tex_fetch(__shaders, shader_index).constant_emission[0], - kernel_tex_fetch(__shaders, shader_index).constant_emission[1], - kernel_tex_fetch(__shaders, shader_index).constant_emission[2]); + if (shader_flag & SD_HAS_CONSTANT_EMISSION) { + *eval = make_float3(kernel_tex_fetch(__shaders, shader_index).constant_emission[0], + kernel_tex_fetch(__shaders, shader_index).constant_emission[1], + kernel_tex_fetch(__shaders, shader_index).constant_emission[2]); - return true; - } + return true; + } - return false; + return false; } /* Background */ ccl_device float3 shader_background_eval(ShaderData *sd) { - if(sd->flag & SD_EMISSION) { - return sd->closure_emission_background; - } - else { - return make_float3(0.0f, 0.0f, 0.0f); - } + if (sd->flag & SD_EMISSION) { + return sd->closure_emission_background; + } + else { + return make_float3(0.0f, 0.0f, 0.0f); + } } /* Emission */ ccl_device float3 shader_emissive_eval(ShaderData *sd) { - if(sd->flag & SD_EMISSION) { - return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; - } - else { - return make_float3(0.0f, 0.0f, 0.0f); - } + if (sd->flag & SD_EMISSION) { + return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background; + } + else { + return make_float3(0.0f, 0.0f, 0.0f); + } } /* Holdout */ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) { - float3 weight = make_float3(0.0f, 0.0f, 0.0f); + float3 weight = make_float3(0.0f, 0.0f, 0.0f); - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_HOLDOUT(sc->type)) - weight += sc->weight; - } + if (CLOSURE_IS_HOLDOUT(sc->type)) + weight += sc->weight; + } - return weight; + return weight; } /* Surface Evaluation */ -ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, - ccl_addr_space PathState *state, int path_flag) +ccl_device void shader_eval_surface(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space PathState *state, + int path_flag) { - PROFILING_INIT(kg, PROFILING_SHADER_EVAL); - - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = kernel_data.integrator.max_closures; - } - - sd->num_closure = 0; - sd->num_closure_left = max_closures; + PROFILING_INIT(kg, PROFILING_SHADER_EVAL); + + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = kernel_data.integrator.max_closures; + } + + sd->num_closure = 0; + sd->num_closure_left = max_closures; #ifdef __OSL__ - if(kg->osl) { - if (sd->object == OBJECT_NONE) { - OSLShader::eval_background(kg, sd, state, path_flag); - } - else { - OSLShader::eval_surface(kg, sd, state, path_flag); - } - } - else + if (kg->osl) { + if (sd->object == OBJECT_NONE) { + OSLShader::eval_background(kg, sd, state, path_flag); + } + else { + OSLShader::eval_surface(kg, sd, state, path_flag); + } + } + else #endif - { + { #ifdef __SVM__ - svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag); + svm_eval_nodes(kg, sd, state, SHADER_TYPE_SURFACE, path_flag); #else - if(sd->object == OBJECT_NONE) { - sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); - sd->flag |= SD_EMISSION; - } - else { - DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, - sizeof(DiffuseBsdf), - make_float3(0.8f, 0.8f, 0.8f)); - if(bsdf != NULL) { - bsdf->N = sd->N; - sd->flag |= bsdf_diffuse_setup(bsdf); - } - } + if (sd->object == OBJECT_NONE) { + sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f); + sd->flag |= SD_EMISSION; + } + else { + DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc( + sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f)); + if (bsdf != NULL) { + bsdf->N = sd->N; + sd->flag |= bsdf_diffuse_setup(bsdf); + } + } #endif - } + } - if(sd->flag & SD_BSDF_NEEDS_LCG) { - sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953); - } + if (sd->flag & SD_BSDF_NEEDS_LCG) { + sd->lcg_state = lcg_state_init_addrspace(state, 0xb4bc3953); + } } /* Volume */ #ifdef __VOLUME__ -ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, const float3 omega_in, float *pdf, - int skip_phase, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight) +ccl_device_inline void _shader_volume_phase_multi_eval(const ShaderData *sd, + const float3 omega_in, + float *pdf, + int skip_phase, + BsdfEval *result_eval, + float sum_pdf, + float sum_sample_weight) { - for(int i = 0; i < sd->num_closure; i++) { - if(i == skip_phase) - continue; + for (int i = 0; i < sd->num_closure; i++) { + if (i == skip_phase) + continue; - const ShaderClosure *sc = &sd->closure[i]; + const ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_PHASE(sc->type)) { - float phase_pdf = 0.0f; - float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf); + if (CLOSURE_IS_PHASE(sc->type)) { + float phase_pdf = 0.0f; + float3 eval = volume_phase_eval(sd, sc, omega_in, &phase_pdf); - if(phase_pdf != 0.0f) { - bsdf_eval_accum(result_eval, sc->type, eval, 1.0f); - sum_pdf += phase_pdf*sc->sample_weight; - } + if (phase_pdf != 0.0f) { + bsdf_eval_accum(result_eval, sc->type, eval, 1.0f); + sum_pdf += phase_pdf * sc->sample_weight; + } - sum_sample_weight += sc->sample_weight; - } - } + sum_sample_weight += sc->sample_weight; + } + } - *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f; + *pdf = (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f; } -ccl_device void shader_volume_phase_eval(KernelGlobals *kg, const ShaderData *sd, - const float3 omega_in, BsdfEval *eval, float *pdf) +ccl_device void shader_volume_phase_eval( + KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, BsdfEval *eval, float *pdf) { - PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL); + PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_EVAL); - bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); + bsdf_eval_init( + eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); - _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f); + _shader_volume_phase_multi_eval(sd, omega_in, pdf, -1, eval, 0.0f, 0.0f); } -ccl_device int shader_volume_phase_sample(KernelGlobals *kg, const ShaderData *sd, - float randu, float randv, BsdfEval *phase_eval, - float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device int shader_volume_phase_sample(KernelGlobals *kg, + const ShaderData *sd, + float randu, + float randv, + BsdfEval *phase_eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { - PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE); + PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE); - int sampled = 0; + int sampled = 0; - if(sd->num_closure > 1) { - /* pick a phase closure based on sample weights */ - float sum = 0.0f; + if (sd->num_closure > 1) { + /* pick a phase closure based on sample weights */ + float sum = 0.0f; - for(sampled = 0; sampled < sd->num_closure; sampled++) { - const ShaderClosure *sc = &sd->closure[sampled]; + for (sampled = 0; sampled < sd->num_closure; sampled++) { + const ShaderClosure *sc = &sd->closure[sampled]; - if(CLOSURE_IS_PHASE(sc->type)) - sum += sc->sample_weight; - } + if (CLOSURE_IS_PHASE(sc->type)) + sum += sc->sample_weight; + } - float r = randu*sum; - float partial_sum = 0.0f; + float r = randu * sum; + float partial_sum = 0.0f; - for(sampled = 0; sampled < sd->num_closure; sampled++) { - const ShaderClosure *sc = &sd->closure[sampled]; + for (sampled = 0; sampled < sd->num_closure; sampled++) { + const ShaderClosure *sc = &sd->closure[sampled]; - if(CLOSURE_IS_PHASE(sc->type)) { - float next_sum = partial_sum + sc->sample_weight; + if (CLOSURE_IS_PHASE(sc->type)) { + float next_sum = partial_sum + sc->sample_weight; - if(r <= next_sum) { - /* Rescale to reuse for BSDF direction sample. */ - randu = (r - partial_sum) / sc->sample_weight; - break; - } + if (r <= next_sum) { + /* Rescale to reuse for BSDF direction sample. */ + randu = (r - partial_sum) / sc->sample_weight; + break; + } - partial_sum = next_sum; - } - } + partial_sum = next_sum; + } + } - if(sampled == sd->num_closure) { - *pdf = 0.0f; - return LABEL_NONE; - } - } + if (sampled == sd->num_closure) { + *pdf = 0.0f; + return LABEL_NONE; + } + } - /* todo: this isn't quite correct, we don't weight anisotropy properly - * depending on color channels, even if this is perhaps not a common case */ - const ShaderClosure *sc = &sd->closure[sampled]; - int label; - float3 eval; + /* todo: this isn't quite correct, we don't weight anisotropy properly + * depending on color channels, even if this is perhaps not a common case */ + const ShaderClosure *sc = &sd->closure[sampled]; + int label; + float3 eval; - *pdf = 0.0f; - label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); + *pdf = 0.0f; + label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - if(*pdf != 0.0f) { - bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass); - } + if (*pdf != 0.0f) { + bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass); + } - return label; + return label; } -ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *sd, - const ShaderClosure *sc, float randu, float randv, BsdfEval *phase_eval, - float3 *omega_in, differential3 *domega_in, float *pdf) +ccl_device int shader_phase_sample_closure(KernelGlobals *kg, + const ShaderData *sd, + const ShaderClosure *sc, + float randu, + float randv, + BsdfEval *phase_eval, + float3 *omega_in, + differential3 *domega_in, + float *pdf) { - PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE); + PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE); - int label; - float3 eval; + int label; + float3 eval; - *pdf = 0.0f; - label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); + *pdf = 0.0f; + label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); - if(*pdf != 0.0f) - bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass); + if (*pdf != 0.0f) + bsdf_eval_init(phase_eval, sc->type, eval, kernel_data.film.use_light_pass); - return label; + return label; } /* Volume Evaluation */ @@ -1228,83 +1267,85 @@ ccl_device_inline void shader_eval_volume(KernelGlobals *kg, ccl_addr_space VolumeStack *stack, int path_flag) { - /* If path is being terminated, we are tracing a shadow ray or evaluating - * emission, then we don't need to store closures. The emission and shadow - * shader data also do not have a closure array to save GPU memory. */ - int max_closures; - if(path_flag & (PATH_RAY_TERMINATE|PATH_RAY_SHADOW|PATH_RAY_EMISSION)) { - max_closures = 0; - } - else { - max_closures = kernel_data.integrator.max_closures; - } - - /* reset closures once at the start, we will be accumulating the closures - * for all volumes in the stack into a single array of closures */ - sd->num_closure = 0; - sd->num_closure_left = max_closures; - sd->flag = 0; - sd->object_flag = 0; - - for(int i = 0; stack[i].shader != SHADER_NONE; i++) { - /* setup shaderdata from stack. it's mostly setup already in - * shader_setup_from_volume, this switching should be quick */ - sd->object = stack[i].object; - sd->lamp = LAMP_NONE; - sd->shader = stack[i].shader; - - sd->flag &= ~SD_SHADER_FLAGS; - sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; - sd->object_flag &= ~SD_OBJECT_FLAGS; - - if(sd->object != OBJECT_NONE) { - sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object); - -#ifdef __OBJECT_MOTION__ - /* todo: this is inefficient for motion blur, we should be - * caching matrices instead of recomputing them each step */ - shader_setup_object_transforms(kg, sd, sd->time); -#endif - } - - /* evaluate shader */ -#ifdef __SVM__ -# ifdef __OSL__ - if(kg->osl) { - OSLShader::eval_volume(kg, sd, state, path_flag); - } - else + /* If path is being terminated, we are tracing a shadow ray or evaluating + * emission, then we don't need to store closures. The emission and shadow + * shader data also do not have a closure array to save GPU memory. */ + int max_closures; + if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) { + max_closures = 0; + } + else { + max_closures = kernel_data.integrator.max_closures; + } + + /* reset closures once at the start, we will be accumulating the closures + * for all volumes in the stack into a single array of closures */ + sd->num_closure = 0; + sd->num_closure_left = max_closures; + sd->flag = 0; + sd->object_flag = 0; + + for (int i = 0; stack[i].shader != SHADER_NONE; i++) { + /* setup shaderdata from stack. it's mostly setup already in + * shader_setup_from_volume, this switching should be quick */ + sd->object = stack[i].object; + sd->lamp = LAMP_NONE; + sd->shader = stack[i].shader; + + sd->flag &= ~SD_SHADER_FLAGS; + sd->flag |= kernel_tex_fetch(__shaders, (sd->shader & SHADER_MASK)).flags; + sd->object_flag &= ~SD_OBJECT_FLAGS; + + if (sd->object != OBJECT_NONE) { + sd->object_flag |= kernel_tex_fetch(__object_flag, sd->object); + +# ifdef __OBJECT_MOTION__ + /* todo: this is inefficient for motion blur, we should be + * caching matrices instead of recomputing them each step */ + shader_setup_object_transforms(kg, sd, sd->time); +# endif + } + + /* evaluate shader */ +# ifdef __SVM__ +# ifdef __OSL__ + if (kg->osl) { + OSLShader::eval_volume(kg, sd, state, path_flag); + } + else +# endif + { + svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag); + } # endif - { - svm_eval_nodes(kg, sd, state, SHADER_TYPE_VOLUME, path_flag); - } -#endif - /* merge closures to avoid exceeding number of closures limit */ - if(i > 0) - shader_merge_closures(sd); - } + /* merge closures to avoid exceeding number of closures limit */ + if (i > 0) + shader_merge_closures(sd); + } } -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ /* Displacement Evaluation */ -ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state) +ccl_device void shader_eval_displacement(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space PathState *state) { - sd->num_closure = 0; - sd->num_closure_left = 0; + sd->num_closure = 0; + sd->num_closure_left = 0; - /* this will modify sd->P */ + /* this will modify sd->P */ #ifdef __SVM__ # ifdef __OSL__ - if(kg->osl) - OSLShader::eval_displacement(kg, sd, state); - else + if (kg->osl) + OSLShader::eval_displacement(kg, sd, state); + else # endif - { - svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0); - } + { + svm_eval_nodes(kg, sd, state, SHADER_TYPE_DISPLACEMENT, 0); + } #endif } @@ -1313,29 +1354,29 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ccl_ #ifdef __TRANSPARENT_SHADOWS__ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect) { - int prim = kernel_tex_fetch(__prim_index, isect->prim); - int shader = 0; + int prim = kernel_tex_fetch(__prim_index, isect->prim); + int shader = 0; -#ifdef __HAIR__ - if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) { -#endif - shader = kernel_tex_fetch(__tri_shader, prim); -#ifdef __HAIR__ - } - else { - float4 str = kernel_tex_fetch(__curves, prim); - shader = __float_as_int(str.z); - } -#endif - int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; +# ifdef __HAIR__ + if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) { +# endif + shader = kernel_tex_fetch(__tri_shader, prim); +# ifdef __HAIR__ + } + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +# endif + int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags; - return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; + return (flag & SD_HAS_TRANSPARENT_SHADOW) != 0; } -#endif /* __TRANSPARENT_SHADOWS__ */ +#endif /* __TRANSPARENT_SHADOWS__ */ ccl_device float shader_cryptomatte_id(KernelGlobals *kg, int shader) { - return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; + return kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).cryptomatte_id; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index fafa3ad4bfa..6af1369feab 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN typedef struct VolumeState { # ifdef __SPLIT_KERNEL__ # else - PathState ps; + PathState ps; # endif } VolumeState; @@ -28,77 +28,70 @@ typedef struct VolumeState { # ifdef __SPLIT_KERNEL__ ccl_addr_space # endif -ccl_device_inline PathState *shadow_blocked_volume_path_state( - KernelGlobals *kg, - VolumeState *volume_state, - ccl_addr_space PathState *state, - ShaderData *sd, - Ray *ray) + ccl_device_inline PathState * + shadow_blocked_volume_path_state(KernelGlobals *kg, + VolumeState *volume_state, + ccl_addr_space PathState *state, + ShaderData *sd, + Ray *ray) { # ifdef __SPLIT_KERNEL__ - ccl_addr_space PathState *ps = - &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; + ccl_addr_space PathState *ps = + &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)]; # else - PathState *ps = &volume_state->ps; + PathState *ps = &volume_state->ps; # endif - *ps = *state; - /* We are checking for shadow on the "other" side of the surface, so need - * to discard volume we are currently at. - */ - if(dot(sd->Ng, ray->D) < 0.0f) { - kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack); - } - return ps; + *ps = *state; + /* We are checking for shadow on the "other" side of the surface, so need + * to discard volume we are currently at. + */ + if (dot(sd->Ng, ray->D) < 0.0f) { + kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack); + } + return ps; } -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ /* Attenuate throughput accordingly to the given intersection event. * Returns true if the throughput is zero and traversal can be aborted. */ ccl_device_forceinline bool shadow_handle_transparent_isect( - KernelGlobals *kg, - ShaderData *shadow_sd, - ccl_addr_space PathState *state, -# ifdef __VOLUME__ - ccl_addr_space struct PathState *volume_state, -# endif - Intersection *isect, - Ray *ray, - float3 *throughput) + KernelGlobals *kg, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, +#ifdef __VOLUME__ + ccl_addr_space struct PathState *volume_state, +#endif + Intersection *isect, + Ray *ray, + float3 *throughput) { #ifdef __VOLUME__ - /* Attenuation between last surface and next surface. */ - if(volume_state->volume_stack[0].shader != SHADER_NONE) { - Ray segment_ray = *ray; - segment_ray.t = isect->t; - kernel_volume_shadow(kg, - shadow_sd, - volume_state, - &segment_ray, - throughput); - } + /* Attenuation between last surface and next surface. */ + if (volume_state->volume_stack[0].shader != SHADER_NONE) { + Ray segment_ray = *ray; + segment_ray.t = isect->t; + kernel_volume_shadow(kg, shadow_sd, volume_state, &segment_ray, throughput); + } #endif - /* Setup shader data at surface. */ - shader_setup_from_ray(kg, shadow_sd, isect, ray); - /* Attenuation from transparent surface. */ - if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { - path_state_modify_bounce(state, true); - shader_eval_surface(kg, - shadow_sd, - state, - PATH_RAY_SHADOW); - path_state_modify_bounce(state, false); - *throughput *= shader_bsdf_transparency(kg, shadow_sd); - } - /* Stop if all light is blocked. */ - if(is_zero(*throughput)) { - return true; - } + /* Setup shader data at surface. */ + shader_setup_from_ray(kg, shadow_sd, isect, ray); + /* Attenuation from transparent surface. */ + if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) { + path_state_modify_bounce(state, true); + shader_eval_surface(kg, shadow_sd, state, PATH_RAY_SHADOW); + path_state_modify_bounce(state, false); + *throughput *= shader_bsdf_transparency(kg, shadow_sd); + } + /* Stop if all light is blocked. */ + if (is_zero(*throughput)) { + return true; + } #ifdef __VOLUME__ - /* Exit/enter volume. */ - kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack); + /* Exit/enter volume. */ + kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack); #endif - return false; + return false; } /* Special version which only handles opaque shadows. */ @@ -110,19 +103,15 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, Intersection *isect, float3 *shadow) { - const bool blocked = scene_intersect(kg, - *ray, - visibility & PATH_RAY_SHADOW_OPAQUE, - isect, - NULL, - 0.0f, 0.0f); + const bool blocked = scene_intersect( + kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); #ifdef __VOLUME__ - if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { - /* Apply attenuation from current volume shader. */ - kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); - } + if (!blocked && state->volume_stack[0].shader != SHADER_NONE) { + /* Apply attenuation from current volume shader. */ + kernel_volume_shadow(kg, shadow_sd, state, ray, shadow); + } #endif - return blocked; + return blocked; } #ifdef __TRANSPARENT_SHADOWS__ @@ -169,94 +158,80 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, uint max_hits, float3 *shadow) { - /* Intersect to find an opaque surface, or record all transparent - * surface hits. - */ - uint num_hits; - const bool blocked = scene_intersect_shadow_all(kg, - ray, - hits, - visibility, - max_hits, - &num_hits); + /* Intersect to find an opaque surface, or record all transparent + * surface hits. + */ + uint num_hits; + const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits); # ifdef __VOLUME__ - VolumeState volume_state; + VolumeState volume_state; # endif - /* If no opaque surface found but we did find transparent hits, - * shade them. - */ - if(!blocked && num_hits > 0) { - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float3 Pend = ray->P + ray->D*ray->t; - float last_t = 0.0f; - int bounce = state->transparent_bounce; - Intersection *isect = hits; + /* If no opaque surface found but we did find transparent hits, + * shade them. + */ + if (!blocked && num_hits > 0) { + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 Pend = ray->P + ray->D * ray->t; + float last_t = 0.0f; + int bounce = state->transparent_bounce; + Intersection *isect = hits; # ifdef __VOLUME__ # ifdef __SPLIT_KERNEL__ - ccl_addr_space + ccl_addr_space # endif - PathState *ps = shadow_blocked_volume_path_state(kg, - &volume_state, - state, - sd, - ray); + PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray); # endif - sort_intersections(hits, num_hits); - for(int hit = 0; hit < num_hits; hit++, isect++) { - /* Adjust intersection distance for moving ray forward. */ - float new_t = isect->t; - isect->t -= last_t; - /* Skip hit if we did not move forward, step by step raytracing - * would have skipped it as well then. - */ - if(last_t == new_t) { - continue; - } - last_t = new_t; - /* Attenuate the throughput. */ - if(shadow_handle_transparent_isect(kg, - shadow_sd, - state, -#ifdef __VOLUME__ - ps, -#endif - isect, - ray, - &throughput)) - { - return true; - } - /* Move ray forward. */ - ray->P = shadow_sd->P; - if(ray->t != FLT_MAX) { - ray->D = normalize_len(Pend - ray->P, &ray->t); - } - bounce++; - } + sort_intersections(hits, num_hits); + for (int hit = 0; hit < num_hits; hit++, isect++) { + /* Adjust intersection distance for moving ray forward. */ + float new_t = isect->t; + isect->t -= last_t; + /* Skip hit if we did not move forward, step by step raytracing + * would have skipped it as well then. + */ + if (last_t == new_t) { + continue; + } + last_t = new_t; + /* Attenuate the throughput. */ + if (shadow_handle_transparent_isect(kg, + shadow_sd, + state, # ifdef __VOLUME__ - /* Attenuation for last line segment towards light. */ - if(ps->volume_stack[0].shader != SHADER_NONE) { - kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); - } + ps, # endif - *shadow = throughput; - return is_zero(throughput); - } + isect, + ray, + &throughput)) { + return true; + } + /* Move ray forward. */ + ray->P = shadow_sd->P; + if (ray->t != FLT_MAX) { + ray->D = normalize_len(Pend - ray->P, &ray->t); + } + bounce++; + } # ifdef __VOLUME__ - if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { - /* Apply attenuation from current volume shader. */ + /* Attenuation for last line segment towards light. */ + if (ps->volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); + } +# endif + *shadow = throughput; + return is_zero(throughput); + } +# ifdef __VOLUME__ + if (!blocked && state->volume_stack[0].shader != SHADER_NONE) { + /* Apply attenuation from current volume shader. */ # ifdef __SPLIT_KERNEL__ - ccl_addr_space + ccl_addr_space # endif - PathState *ps = shadow_blocked_volume_path_state(kg, - &volume_state, - state, - sd, - ray); - kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); - } + PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray); + kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); + } # endif - return blocked; + return blocked; } /* Here we do all device specific trickery before invoking actual traversal @@ -272,43 +247,36 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, float3 *shadow) { # ifdef __SPLIT_KERNEL__ - Intersection hits_[SHADOW_STACK_MAX_HITS]; - Intersection *hits = &hits_[0]; + Intersection hits_[SHADOW_STACK_MAX_HITS]; + Intersection *hits = &hits_[0]; # elif defined(__KERNEL_CUDA__) - Intersection *hits = kg->hits_stack; + Intersection *hits = kg->hits_stack; # else - Intersection hits_stack[SHADOW_STACK_MAX_HITS]; - Intersection *hits = hits_stack; + Intersection hits_stack[SHADOW_STACK_MAX_HITS]; + Intersection *hits = hits_stack; # endif # ifndef __KERNEL_GPU__ - /* Prefer to use stack but use dynamic allocation if too deep max hits - * we need max_hits + 1 storage space due to the logic in - * scene_intersect_shadow_all which will first store and then check if - * the limit is exceeded. - * - * Ignore this on GPU because of slow/unavailable malloc(). - */ - if(max_hits + 1 > SHADOW_STACK_MAX_HITS) { - if(kg->transparent_shadow_intersections == NULL) { - const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; - kg->transparent_shadow_intersections = - (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1)); - } - hits = kg->transparent_shadow_intersections; - } -# endif /* __KERNEL_GPU__ */ - /* Invoke actual traversal. */ - return shadow_blocked_transparent_all_loop(kg, - sd, - shadow_sd, - state, - visibility, - ray, - hits, - max_hits, - shadow); + /* Prefer to use stack but use dynamic allocation if too deep max hits + * we need max_hits + 1 storage space due to the logic in + * scene_intersect_shadow_all which will first store and then check if + * the limit is exceeded. + * + * Ignore this on GPU because of slow/unavailable malloc(). + */ + if (max_hits + 1 > SHADOW_STACK_MAX_HITS) { + if (kg->transparent_shadow_intersections == NULL) { + const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; + kg->transparent_shadow_intersections = (Intersection *)malloc(sizeof(Intersection) * + (transparent_max_bounce + 1)); + } + hits = kg->transparent_shadow_intersections; + } +# endif /* __KERNEL_GPU__ */ + /* Invoke actual traversal. */ + return shadow_blocked_transparent_all_loop( + kg, sd, shadow_sd, state, visibility, ray, hits, max_hits, shadow); } -# endif /* __SHADOW_RECORD_ALL__ */ +# endif /* __SHADOW_RECORD_ALL__ */ # if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__) /* Shadow function to compute how much light is blocked, @@ -323,130 +291,100 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg, /* This function is only implementing device-independent traversal logic * which requires some precalculation done. */ -ccl_device bool shadow_blocked_transparent_stepped_loop( - KernelGlobals *kg, - ShaderData *sd, - ShaderData *shadow_sd, - ccl_addr_space PathState *state, - const uint visibility, - Ray *ray, - Intersection *isect, - const bool blocked, - const bool is_transparent_isect, - float3 *shadow) +ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg, + ShaderData *sd, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + const uint visibility, + Ray *ray, + Intersection *isect, + const bool blocked, + const bool is_transparent_isect, + float3 *shadow) { # ifdef __VOLUME__ - VolumeState volume_state; + VolumeState volume_state; # endif - if(blocked && is_transparent_isect) { - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - float3 Pend = ray->P + ray->D*ray->t; - int bounce = state->transparent_bounce; + if (blocked && is_transparent_isect) { + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 Pend = ray->P + ray->D * ray->t; + int bounce = state->transparent_bounce; # ifdef __VOLUME__ # ifdef __SPLIT_KERNEL__ - ccl_addr_space + ccl_addr_space # endif - PathState *ps = shadow_blocked_volume_path_state(kg, - &volume_state, - state, - sd, - ray); + PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray); # endif - for(;;) { - if(bounce >= kernel_data.integrator.transparent_max_bounce) { - return true; - } - if(!scene_intersect(kg, - *ray, - visibility & PATH_RAY_SHADOW_TRANSPARENT, - isect, - NULL, - 0.0f, 0.0f)) - { - break; - } - if(!shader_transparent_shadow(kg, isect)) { - return true; - } - /* Attenuate the throughput. */ - if(shadow_handle_transparent_isect(kg, - shadow_sd, - state, -#ifdef __VOLUME__ - ps, -#endif - isect, - ray, - &throughput)) - { - return true; - } - /* Move ray forward. */ - ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); - if(ray->t != FLT_MAX) { - ray->D = normalize_len(Pend - ray->P, &ray->t); - } - bounce++; - } + for (;;) { + if (bounce >= kernel_data.integrator.transparent_max_bounce) { + return true; + } + if (!scene_intersect( + kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f)) { + break; + } + if (!shader_transparent_shadow(kg, isect)) { + return true; + } + /* Attenuate the throughput. */ + if (shadow_handle_transparent_isect(kg, + shadow_sd, + state, +# ifdef __VOLUME__ + ps, +# endif + isect, + ray, + &throughput)) { + return true; + } + /* Move ray forward. */ + ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng); + if (ray->t != FLT_MAX) { + ray->D = normalize_len(Pend - ray->P, &ray->t); + } + bounce++; + } # ifdef __VOLUME__ - /* Attenuation for last line segment towards light. */ - if(ps->volume_stack[0].shader != SHADER_NONE) { - kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); - } + /* Attenuation for last line segment towards light. */ + if (ps->volume_stack[0].shader != SHADER_NONE) { + kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput); + } # endif - *shadow *= throughput; - return is_zero(throughput); - } + *shadow *= throughput; + return is_zero(throughput); + } # ifdef __VOLUME__ - if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { - /* Apply attenuation from current volume shader. */ + if (!blocked && state->volume_stack[0].shader != SHADER_NONE) { + /* Apply attenuation from current volume shader. */ # ifdef __SPLIT_KERNEL__ - ccl_addr_space + ccl_addr_space # endif - PathState *ps = shadow_blocked_volume_path_state(kg, - &volume_state, - state, - sd, - ray); - kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); - } + PathState *ps = shadow_blocked_volume_path_state(kg, &volume_state, state, sd, ray); + kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow); + } # endif - return blocked; + return blocked; } -ccl_device bool shadow_blocked_transparent_stepped( - KernelGlobals *kg, - ShaderData *sd, - ShaderData *shadow_sd, - ccl_addr_space PathState *state, - const uint visibility, - Ray *ray, - Intersection *isect, - float3 *shadow) +ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg, + ShaderData *sd, + ShaderData *shadow_sd, + ccl_addr_space PathState *state, + const uint visibility, + Ray *ray, + Intersection *isect, + float3 *shadow) { - bool blocked = scene_intersect(kg, - *ray, - visibility & PATH_RAY_SHADOW_OPAQUE, - isect, - NULL, - 0.0f, 0.0f); - bool is_transparent_isect = blocked - ? shader_transparent_shadow(kg, isect) - : false; - return shadow_blocked_transparent_stepped_loop(kg, - sd, - shadow_sd, - state, - visibility, - ray, - isect, - blocked, - is_transparent_isect, - shadow); + bool blocked = scene_intersect( + kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f); + bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false; + return shadow_blocked_transparent_stepped_loop( + kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow); } -# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */ -#endif /* __TRANSPARENT_SHADOWS__ */ +# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */ +#endif /* __TRANSPARENT_SHADOWS__ */ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, ShaderData *sd, @@ -455,100 +393,65 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, Ray *ray_input, float3 *shadow) { - Ray *ray = ray_input; - Intersection isect; - /* Some common early checks. */ - *shadow = make_float3(1.0f, 1.0f, 1.0f); - if(ray->t == 0.0f) { - return false; - } + Ray *ray = ray_input; + Intersection isect; + /* Some common early checks. */ + *shadow = make_float3(1.0f, 1.0f, 1.0f); + if (ray->t == 0.0f) { + return false; + } #ifdef __SHADOW_TRICKS__ - const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) - ? PATH_RAY_SHADOW_NON_CATCHER - : PATH_RAY_SHADOW; + const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER : + PATH_RAY_SHADOW; #else - const uint visibility = PATH_RAY_SHADOW; + const uint visibility = PATH_RAY_SHADOW; #endif - /* Do actual shadow shading. */ - /* First of all, we check if integrator requires transparent shadows. - * if not, we use simplest and fastest ever way to calculate occlusion. - */ + /* Do actual shadow shading. */ + /* First of all, we check if integrator requires transparent shadows. + * if not, we use simplest and fastest ever way to calculate occlusion. + */ #ifdef __TRANSPARENT_SHADOWS__ - if(!kernel_data.integrator.transparent_shadows) + if (!kernel_data.integrator.transparent_shadows) #endif - { - return shadow_blocked_opaque(kg, - shadow_sd, - state, - visibility, - ray, - &isect, - shadow); - } + { + return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow); + } #ifdef __TRANSPARENT_SHADOWS__ # ifdef __SHADOW_RECORD_ALL__ - /* For the transparent shadows we try to use record-all logic on the - * devices which supports this. - */ - const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; - /* Check transparent bounces here, for volume scatter which can do - * lighting before surface path termination is checked. - */ - if(state->transparent_bounce >= transparent_max_bounce) { - return true; - } - const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; + /* For the transparent shadows we try to use record-all logic on the + * devices which supports this. + */ + const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce; + /* Check transparent bounces here, for volume scatter which can do + * lighting before surface path termination is checked. + */ + if (state->transparent_bounce >= transparent_max_bounce) { + return true; + } + const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; # ifdef __KERNEL_GPU__ - /* On GPU we do trickey with tracing opaque ray first, this avoids speed - * regressions in some files. - * - * TODO(sergey): Check why using record-all behavior causes slowdown in such - * cases. Could that be caused by a higher spill pressure? - */ - const bool blocked = scene_intersect(kg, - *ray, - visibility & PATH_RAY_SHADOW_OPAQUE, - &isect, - NULL, - 0.0f, 0.0f); - const bool is_transparent_isect = blocked - ? shader_transparent_shadow(kg, &isect) - : false; - if(!blocked || !is_transparent_isect || - max_hits + 1 >= SHADOW_STACK_MAX_HITS) - { - return shadow_blocked_transparent_stepped_loop(kg, - sd, - shadow_sd, - state, - visibility, - ray, - &isect, - blocked, - is_transparent_isect, - shadow); - } -# endif /* __KERNEL_GPU__ */ - return shadow_blocked_transparent_all(kg, - sd, - shadow_sd, - state, - visibility, - ray, - max_hits, - shadow); + /* On GPU we do trickey with tracing opaque ray first, this avoids speed + * regressions in some files. + * + * TODO(sergey): Check why using record-all behavior causes slowdown in such + * cases. Could that be caused by a higher spill pressure? + */ + const bool blocked = scene_intersect( + kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); + const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false; + if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) { + return shadow_blocked_transparent_stepped_loop( + kg, sd, shadow_sd, state, visibility, ray, &isect, blocked, is_transparent_isect, shadow); + } +# endif /* __KERNEL_GPU__ */ + return shadow_blocked_transparent_all( + kg, sd, shadow_sd, state, visibility, ray, max_hits, shadow); # else /* __SHADOW_RECORD_ALL__ */ - /* Fallback to a slowest version which works on all devices. */ - return shadow_blocked_transparent_stepped(kg, - sd, - shadow_sd, - state, - visibility, - ray, - &isect, - shadow); -# endif /* __SHADOW_RECORD_ALL__ */ -#endif /* __TRANSPARENT_SHADOWS__ */ + /* Fallback to a slowest version which works on all devices. */ + return shadow_blocked_transparent_stepped( + kg, sd, shadow_sd, state, visibility, ray, &isect, shadow); +# endif /* __SHADOW_RECORD_ALL__ */ +#endif /* __TRANSPARENT_SHADOWS__ */ } #undef SHADOW_STACK_MAX_HITS diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 96b717530ce..7510e50a962 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -22,317 +22,295 @@ CCL_NAMESPACE_BEGIN * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf */ -ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd, - const ShaderClosure *sc, - float disk_r, - float r, - bool all) +ccl_device_inline float3 +subsurface_scatter_eval(ShaderData *sd, const ShaderClosure *sc, float disk_r, float r, bool all) { - /* this is the veach one-sample model with balance heuristic, some pdf - * factors drop out when using balance heuristic weighting */ - float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f); - float pdf_sum = 0.0f; - float sample_weight_inv = 0.0f; + /* this is the veach one-sample model with balance heuristic, some pdf + * factors drop out when using balance heuristic weighting */ + float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f); + float pdf_sum = 0.0f; + float sample_weight_inv = 0.0f; - if(!all) { - float sample_weight_sum = 0.0f; + if (!all) { + float sample_weight_sum = 0.0f; - for(int i = 0; i < sd->num_closure; i++) { - sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + sc = &sd->closure[i]; - if(CLOSURE_IS_DISK_BSSRDF(sc->type)) { - sample_weight_sum += sc->sample_weight; - } - } + if (CLOSURE_IS_DISK_BSSRDF(sc->type)) { + sample_weight_sum += sc->sample_weight; + } + } - sample_weight_inv = 1.0f/sample_weight_sum; - } + sample_weight_inv = 1.0f / sample_weight_sum; + } - for(int i = 0; i < sd->num_closure; i++) { - sc = &sd->closure[i]; + for (int i = 0; i < sd->num_closure; i++) { + sc = &sd->closure[i]; - if(CLOSURE_IS_DISK_BSSRDF(sc->type)) { - /* in case of branched path integrate we sample all bssrdf's once, - * for path trace we pick one, so adjust pdf for that */ - float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv; + if (CLOSURE_IS_DISK_BSSRDF(sc->type)) { + /* in case of branched path integrate we sample all bssrdf's once, + * for path trace we pick one, so adjust pdf for that */ + float sample_weight = (all) ? 1.0f : sc->sample_weight * sample_weight_inv; - /* compute pdf */ - float3 eval = bssrdf_eval(sc, r); - float pdf = bssrdf_pdf(sc, disk_r); + /* compute pdf */ + float3 eval = bssrdf_eval(sc, r); + float pdf = bssrdf_pdf(sc, disk_r); - eval_sum += sc->weight * eval; - pdf_sum += sample_weight * pdf; - } - } + eval_sum += sc->weight * eval; + pdf_sum += sample_weight * pdf; + } + } - return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f); + return (pdf_sum > 0.0f) ? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f); } /* replace closures with a single diffuse bsdf closure after scatter step */ -ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N) +ccl_device void subsurface_scatter_setup_diffuse_bsdf( + KernelGlobals *kg, ShaderData *sd, ClosureType type, float roughness, float3 weight, float3 N) { - sd->flag &= ~SD_CLOSURE_FLAGS; - sd->num_closure = 0; - sd->num_closure_left = kernel_data.integrator.max_closures; + sd->flag &= ~SD_CLOSURE_FLAGS; + sd->num_closure = 0; + sd->num_closure_left = kernel_data.integrator.max_closures; #ifdef __PRINCIPLED__ - if(type == CLOSURE_BSSRDF_PRINCIPLED_ID || - type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) - { - PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight); - - if(bsdf) { - bsdf->N = N; - bsdf->roughness = roughness; - sd->flag |= bsdf_principled_diffuse_setup(bsdf); - - /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes - * can recognize it as not being a regular Disney principled diffuse closure */ - bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID; - } - } - else if(CLOSURE_IS_BSDF_BSSRDF(type) || - CLOSURE_IS_BSSRDF(type)) -#endif /* __PRINCIPLED__ */ - { - DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); - - if(bsdf) { - bsdf->N = N; - sd->flag |= bsdf_diffuse_setup(bsdf); - - /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes - * can recognize it as not being a regular diffuse closure */ - bsdf->type = CLOSURE_BSDF_BSSRDF_ID; - } - } + if (type == CLOSURE_BSSRDF_PRINCIPLED_ID || type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) { + PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc( + sd, sizeof(PrincipledDiffuseBsdf), weight); + + if (bsdf) { + bsdf->N = N; + bsdf->roughness = roughness; + sd->flag |= bsdf_principled_diffuse_setup(bsdf); + + /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes + * can recognize it as not being a regular Disney principled diffuse closure */ + bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID; + } + } + else if (CLOSURE_IS_BSDF_BSSRDF(type) || CLOSURE_IS_BSSRDF(type)) +#endif /* __PRINCIPLED__ */ + { + DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); + + if (bsdf) { + bsdf->N = N; + sd->flag |= bsdf_diffuse_setup(bsdf); + + /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes + * can recognize it as not being a regular diffuse closure */ + bsdf->type = CLOSURE_BSDF_BSSRDF_ID; + } + } } /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */ ccl_device float3 subsurface_color_pow(float3 color, float exponent) { - color = max(color, make_float3(0.0f, 0.0f, 0.0f)); - - if(exponent == 1.0f) { - /* nothing to do */ - } - else if(exponent == 0.5f) { - color.x = sqrtf(color.x); - color.y = sqrtf(color.y); - color.z = sqrtf(color.z); - } - else { - color.x = powf(color.x, exponent); - color.y = powf(color.y, exponent); - color.z = powf(color.z, exponent); - } - - return color; + color = max(color, make_float3(0.0f, 0.0f, 0.0f)); + + if (exponent == 1.0f) { + /* nothing to do */ + } + else if (exponent == 0.5f) { + color.x = sqrtf(color.x); + color.y = sqrtf(color.y); + color.z = sqrtf(color.z); + } + else { + color.x = powf(color.x, exponent); + color.y = powf(color.y, exponent); + color.z = powf(color.z, exponent); + } + + return color; } -ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - float3 *eval, - float3 *N) +ccl_device void subsurface_color_bump_blur( + KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float3 *eval, float3 *N) { - /* average color and texture blur at outgoing point */ - float texture_blur; - float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur); - - /* do we have bump mapping? */ - bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0; - - if(bump || texture_blur > 0.0f) { - /* average color and normal at incoming point */ - shader_eval_surface(kg, sd, state, state->flag); - float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL); - - /* we simply divide out the average color and multiply with the average - * of the other one. we could try to do this per closure but it's quite - * tricky to match closures between shader evaluations, their number and - * order may change, this is simpler */ - if(texture_blur > 0.0f) { - out_color = subsurface_color_pow(out_color, texture_blur); - in_color = subsurface_color_pow(in_color, texture_blur); - - *eval *= safe_divide_color(in_color, out_color); - } - } + /* average color and texture blur at outgoing point */ + float texture_blur; + float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur); + + /* do we have bump mapping? */ + bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0; + + if (bump || texture_blur > 0.0f) { + /* average color and normal at incoming point */ + shader_eval_surface(kg, sd, state, state->flag); + float3 in_color = shader_bssrdf_sum(sd, (bump) ? N : NULL, NULL); + + /* we simply divide out the average color and multiply with the average + * of the other one. we could try to do this per closure but it's quite + * tricky to match closures between shader evaluations, their number and + * order may change, this is simpler */ + if (texture_blur > 0.0f) { + out_color = subsurface_color_pow(out_color, texture_blur); + in_color = subsurface_color_pow(in_color, texture_blur); + + *eval *= safe_divide_color(in_color, out_color); + } + } } /* Subsurface scattering step, from a point on the surface to other * nearby points on the same object. */ -ccl_device_inline int subsurface_scatter_disk( - KernelGlobals *kg, - LocalIntersection *ss_isect, - ShaderData *sd, - const ShaderClosure *sc, - uint *lcg_state, - float disk_u, - float disk_v, - bool all) +ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg, + LocalIntersection *ss_isect, + ShaderData *sd, + const ShaderClosure *sc, + uint *lcg_state, + float disk_u, + float disk_v, + bool all) { - /* pick random axis in local frame and point on disk */ - float3 disk_N, disk_T, disk_B; - float pick_pdf_N, pick_pdf_T, pick_pdf_B; - - disk_N = sd->Ng; - make_orthonormals(disk_N, &disk_T, &disk_B); - - if(disk_v < 0.5f) { - pick_pdf_N = 0.5f; - pick_pdf_T = 0.25f; - pick_pdf_B = 0.25f; - disk_v *= 2.0f; - } - else if(disk_v < 0.75f) { - float3 tmp = disk_N; - disk_N = disk_T; - disk_T = tmp; - pick_pdf_N = 0.25f; - pick_pdf_T = 0.5f; - pick_pdf_B = 0.25f; - disk_v = (disk_v - 0.5f)*4.0f; - } - else { - float3 tmp = disk_N; - disk_N = disk_B; - disk_B = tmp; - pick_pdf_N = 0.25f; - pick_pdf_T = 0.25f; - pick_pdf_B = 0.5f; - disk_v = (disk_v - 0.75f)*4.0f; - } - - /* sample point on disk */ - float phi = M_2PI_F * disk_v; - float disk_height, disk_r; - - bssrdf_sample(sc, disk_u, &disk_r, &disk_height); - - float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B; - - /* create ray */ + /* pick random axis in local frame and point on disk */ + float3 disk_N, disk_T, disk_B; + float pick_pdf_N, pick_pdf_T, pick_pdf_B; + + disk_N = sd->Ng; + make_orthonormals(disk_N, &disk_T, &disk_B); + + if (disk_v < 0.5f) { + pick_pdf_N = 0.5f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.25f; + disk_v *= 2.0f; + } + else if (disk_v < 0.75f) { + float3 tmp = disk_N; + disk_N = disk_T; + disk_T = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.5f; + pick_pdf_B = 0.25f; + disk_v = (disk_v - 0.5f) * 4.0f; + } + else { + float3 tmp = disk_N; + disk_N = disk_B; + disk_B = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.5f; + disk_v = (disk_v - 0.75f) * 4.0f; + } + + /* sample point on disk */ + float phi = M_2PI_F * disk_v; + float disk_height, disk_r; + + bssrdf_sample(sc, disk_u, &disk_r, &disk_height); + + float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B; + + /* create ray */ #ifdef __SPLIT_KERNEL__ - Ray ray_object = ss_isect->ray; - Ray *ray = &ray_object; + Ray ray_object = ss_isect->ray; + Ray *ray = &ray_object; #else - Ray *ray = &ss_isect->ray; + Ray *ray = &ss_isect->ray; #endif - ray->P = sd->P + disk_N*disk_height + disk_P; - ray->D = -disk_N; - ray->t = 2.0f*disk_height; - ray->dP = sd->dP; - ray->dD = differential3_zero(); - ray->time = sd->time; - - /* intersect with the same object. if multiple intersections are found it - * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */ - scene_intersect_local(kg, - *ray, - ss_isect, - sd->object, - lcg_state, - BSSRDF_MAX_HITS); - int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS); - - for(int hit = 0; hit < num_eval_hits; hit++) { - /* Quickly retrieve P and Ng without setting up ShaderData. */ - float3 hit_P; - if(sd->type & PRIMITIVE_TRIANGLE) { - hit_P = triangle_refine_local(kg, - sd, - &ss_isect->hits[hit], - ray); - } + ray->P = sd->P + disk_N * disk_height + disk_P; + ray->D = -disk_N; + ray->t = 2.0f * disk_height; + ray->dP = sd->dP; + ray->dD = differential3_zero(); + ray->time = sd->time; + + /* intersect with the same object. if multiple intersections are found it + * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */ + scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS); + int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS); + + for (int hit = 0; hit < num_eval_hits; hit++) { + /* Quickly retrieve P and Ng without setting up ShaderData. */ + float3 hit_P; + if (sd->type & PRIMITIVE_TRIANGLE) { + hit_P = triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray); + } #ifdef __OBJECT_MOTION__ - else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) { - float3 verts[3]; - motion_triangle_vertices( - kg, - sd->object, - kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim), - sd->time, - verts); - hit_P = motion_triangle_refine_local(kg, - sd, - &ss_isect->hits[hit], - ray, - verts); - } -#endif /* __OBJECT_MOTION__ */ - else { - ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f); - continue; - } - - float3 hit_Ng = ss_isect->Ng[hit]; - if(ss_isect->hits[hit].object != OBJECT_NONE) { - object_normal_transform(kg, sd, &hit_Ng); - } - - /* Probability densities for local frame axes. */ - float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng)); - float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng)); - float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng)); - - /* Multiple importance sample between 3 axes, power heuristic - * found to be slightly better than balance heuristic. pdf_N - * in the MIS weight and denominator cancelled out. */ - float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B)); - if(ss_isect->num_hits > BSSRDF_MAX_HITS) { - w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS; - } - - /* Real distance to sampled point. */ - float r = len(hit_P - sd->P); - - /* Evaluate profiles. */ - float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w; - - ss_isect->weight[hit] = eval; - } + else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { + float3 verts[3]; + motion_triangle_vertices(kg, + sd->object, + kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim), + sd->time, + verts); + hit_P = motion_triangle_refine_local(kg, sd, &ss_isect->hits[hit], ray, verts); + } +#endif /* __OBJECT_MOTION__ */ + else { + ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f); + continue; + } + + float3 hit_Ng = ss_isect->Ng[hit]; + if (ss_isect->hits[hit].object != OBJECT_NONE) { + object_normal_transform(kg, sd, &hit_Ng); + } + + /* Probability densities for local frame axes. */ + float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng)); + float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng)); + float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng)); + + /* Multiple importance sample between 3 axes, power heuristic + * found to be slightly better than balance heuristic. pdf_N + * in the MIS weight and denominator cancelled out. */ + float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B)); + if (ss_isect->num_hits > BSSRDF_MAX_HITS) { + w *= ss_isect->num_hits / (float)BSSRDF_MAX_HITS; + } + + /* Real distance to sampled point. */ + float r = len(hit_P - sd->P); + + /* Evaluate profiles. */ + float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w; + + ss_isect->weight[hit] = eval; + } #ifdef __SPLIT_KERNEL__ - ss_isect->ray = *ray; + ss_isect->ray = *ray; #endif - return num_eval_hits; + return num_eval_hits; } -ccl_device_noinline void subsurface_scatter_multi_setup( - KernelGlobals *kg, - LocalIntersection* ss_isect, - int hit, - ShaderData *sd, - ccl_addr_space PathState *state, - ClosureType type, - float roughness) +ccl_device_noinline void subsurface_scatter_multi_setup(KernelGlobals *kg, + LocalIntersection *ss_isect, + int hit, + ShaderData *sd, + ccl_addr_space PathState *state, + ClosureType type, + float roughness) { #ifdef __SPLIT_KERNEL__ - Ray ray_object = ss_isect->ray; - Ray *ray = &ray_object; + Ray ray_object = ss_isect->ray; + Ray *ray = &ray_object; #else - Ray *ray = &ss_isect->ray; + Ray *ray = &ss_isect->ray; #endif - /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */ + /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */ #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__) - kernel_split_params.dummy_sd_flag = sd->flag; + kernel_split_params.dummy_sd_flag = sd->flag; #endif - /* Setup new shading point. */ - shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray); + /* Setup new shading point. */ + shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray); - /* Optionally blur colors and bump mapping. */ - float3 weight = ss_isect->weight[hit]; - float3 N = sd->N; - subsurface_color_bump_blur(kg, sd, state, &weight, &N); + /* Optionally blur colors and bump mapping. */ + float3 weight = ss_isect->weight[hit]; + float3 N = sd->N; + subsurface_color_bump_blur(kg, sd, state, &weight, &N); - /* Setup diffuse BSDF. */ - subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N); + /* Setup diffuse BSDF. */ + subsurface_scatter_setup_diffuse_bsdf(kg, sd, type, roughness, weight, N); } /* Random walk subsurface scattering. @@ -340,196 +318,178 @@ ccl_device_noinline void subsurface_scatter_multi_setup( * "Practical and Controllable Subsurface Scattering for Production Path * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ -ccl_device void subsurface_random_walk_remap( - const float A, - const float d, - float *sigma_t, - float *sigma_s) +ccl_device void subsurface_random_walk_remap(const float A, + const float d, + float *sigma_t, + float *sigma_s) { - /* Compute attenuation and scattering coefficients from albedo. */ - const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f))); - const float s = 1.9f - A + 3.5f * sqr(A - 0.8f); + /* Compute attenuation and scattering coefficients from albedo. */ + const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f))); + const float s = 1.9f - A + 3.5f * sqr(A - 0.8f); - *sigma_t = 1.0f / fmaxf(d * s, 1e-16f); - *sigma_s = *sigma_t * a; + *sigma_t = 1.0f / fmaxf(d * s, 1e-16f); + *sigma_s = *sigma_t * a; } -ccl_device void subsurface_random_walk_coefficients( - const ShaderClosure *sc, - float3 *sigma_t, - float3 *sigma_s, - float3 *weight) +ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc, + float3 *sigma_t, + float3 *sigma_s, + float3 *weight) { - const Bssrdf *bssrdf = (const Bssrdf*)sc; - const float3 A = bssrdf->albedo; - const float3 d = bssrdf->radius; - float sigma_t_x, sigma_t_y, sigma_t_z; - float sigma_s_x, sigma_s_y, sigma_s_z; + const Bssrdf *bssrdf = (const Bssrdf *)sc; + const float3 A = bssrdf->albedo; + const float3 d = bssrdf->radius; + float sigma_t_x, sigma_t_y, sigma_t_z; + float sigma_s_x, sigma_s_y, sigma_s_z; - subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x); - subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y); - subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z); + subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x); + subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y); + subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z); - *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); - *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z); + *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z); + *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z); - /* Closure mixing and Fresnel weights separate from albedo. */ - *weight = safe_divide_color(bssrdf->weight, A); + /* Closure mixing and Fresnel weights separate from albedo. */ + *weight = safe_divide_color(bssrdf->weight, A); } -ccl_device_noinline bool subsurface_random_walk( - KernelGlobals *kg, - LocalIntersection *ss_isect, - ShaderData *sd, - ccl_addr_space PathState *state, - const ShaderClosure *sc, - const float bssrdf_u, - const float bssrdf_v) +ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg, + LocalIntersection *ss_isect, + ShaderData *sd, + ccl_addr_space PathState *state, + const ShaderClosure *sc, + const float bssrdf_u, + const float bssrdf_v) { - /* Sample diffuse surface scatter into the object. */ - float3 D; - float pdf; - sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf); - if(dot(-sd->Ng, D) <= 0.0f) { - return 0; - } - - /* Convert subsurface to volume coefficients. */ - float3 sigma_t, sigma_s; - float3 throughput = make_float3(1.0f, 1.0f, 1.0f); - subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput); - - /* Setup ray. */ + /* Sample diffuse surface scatter into the object. */ + float3 D; + float pdf; + sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf); + if (dot(-sd->Ng, D) <= 0.0f) { + return 0; + } + + /* Convert subsurface to volume coefficients. */ + float3 sigma_t, sigma_s; + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput); + + /* Setup ray. */ #ifdef __SPLIT_KERNEL__ - Ray ray_object = ss_isect->ray; - Ray *ray = &ray_object; + Ray ray_object = ss_isect->ray; + Ray *ray = &ray_object; #else - Ray *ray = &ss_isect->ray; + Ray *ray = &ss_isect->ray; #endif - ray->P = ray_offset(sd->P, -sd->Ng); - ray->D = D; - ray->t = FLT_MAX; - ray->time = sd->time; - - /* Modify state for RNGs, decorrelated from other paths. */ - uint prev_rng_offset = state->rng_offset; - uint prev_rng_hash = state->rng_hash; - state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef); - - /* Random walk until we hit the surface again. */ - bool hit = false; - - for(int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) { - /* Advance random number offset. */ - state->rng_offset += PRNG_BOUNCE_NUM; - - if(bounce > 0) { - /* Sample scattering direction. */ - const float anisotropy = 0.0f; - float scatter_u, scatter_v; - path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v); - ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL); - } - - /* Sample color channel, use MIS with balance heuristic. */ - float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - float3 albedo = safe_divide_color(sigma_s, sigma_t); - float3 channel_pdf; - int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf); - - /* Distance sampling. */ - float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); - float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); - float t = -logf(1.0f - rdist)/sample_sigma_t; - - ray->t = t; - scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1); - hit = (ss_isect->num_hits > 0); - - if(hit) { - /* Compute world space distance to surface hit. */ - float3 D = ray->D; - object_inverse_dir_transform(kg, sd, &D); - D = normalize(D) * ss_isect->hits[0].t; - object_dir_transform(kg, sd, &D); - t = len(D); - } - - /* Advance to new scatter location. */ - ray->P += t * ray->D; - - /* Update throughput. */ - float3 transmittance = volume_color_transmittance(sigma_t, t); - float pdf = dot(channel_pdf, (hit)? transmittance: sigma_t * transmittance); - throughput *= ((hit)? transmittance: sigma_s * transmittance) / pdf; - - if(hit) { - /* If we hit the surface, we are done. */ - break; - } - - /* Russian roulette. */ - float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); - float probability = min(max3(fabs(throughput)), 1.0f); - if(terminate >= probability) { - break; - } - throughput /= probability; - } - - kernel_assert(isfinite_safe(throughput.x) && - isfinite_safe(throughput.y) && - isfinite_safe(throughput.z)); - - state->rng_offset = prev_rng_offset; - state->rng_hash = prev_rng_hash; - - /* Return number of hits in ss_isect. */ - if(!hit) { - return 0; - } - - /* TODO: gain back performance lost from merging with disk BSSRDF. We - * only need to return on hit so this indirect ray push/pop overhead - * is not actually needed, but it does keep the code simpler. */ - ss_isect->weight[0] = throughput; + ray->P = ray_offset(sd->P, -sd->Ng); + ray->D = D; + ray->t = FLT_MAX; + ray->time = sd->time; + + /* Modify state for RNGs, decorrelated from other paths. */ + uint prev_rng_offset = state->rng_offset; + uint prev_rng_hash = state->rng_hash; + state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef); + + /* Random walk until we hit the surface again. */ + bool hit = false; + + for (int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) { + /* Advance random number offset. */ + state->rng_offset += PRNG_BOUNCE_NUM; + + if (bounce > 0) { + /* Sample scattering direction. */ + const float anisotropy = 0.0f; + float scatter_u, scatter_v; + path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v); + ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL); + } + + /* Sample color channel, use MIS with balance heuristic. */ + float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); + float3 albedo = safe_divide_color(sigma_s, sigma_t); + float3 channel_pdf; + int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf); + + /* Distance sampling. */ + float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); + float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float t = -logf(1.0f - rdist) / sample_sigma_t; + + ray->t = t; + scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1); + hit = (ss_isect->num_hits > 0); + + if (hit) { + /* Compute world space distance to surface hit. */ + float3 D = ray->D; + object_inverse_dir_transform(kg, sd, &D); + D = normalize(D) * ss_isect->hits[0].t; + object_dir_transform(kg, sd, &D); + t = len(D); + } + + /* Advance to new scatter location. */ + ray->P += t * ray->D; + + /* Update throughput. */ + float3 transmittance = volume_color_transmittance(sigma_t, t); + float pdf = dot(channel_pdf, (hit) ? transmittance : sigma_t * transmittance); + throughput *= ((hit) ? transmittance : sigma_s * transmittance) / pdf; + + if (hit) { + /* If we hit the surface, we are done. */ + break; + } + + /* Russian roulette. */ + float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); + float probability = min(max3(fabs(throughput)), 1.0f); + if (terminate >= probability) { + break; + } + throughput /= probability; + } + + kernel_assert(isfinite_safe(throughput.x) && isfinite_safe(throughput.y) && + isfinite_safe(throughput.z)); + + state->rng_offset = prev_rng_offset; + state->rng_hash = prev_rng_hash; + + /* Return number of hits in ss_isect. */ + if (!hit) { + return 0; + } + + /* TODO: gain back performance lost from merging with disk BSSRDF. We + * only need to return on hit so this indirect ray push/pop overhead + * is not actually needed, but it does keep the code simpler. */ + ss_isect->weight[0] = throughput; #ifdef __SPLIT_KERNEL__ - ss_isect->ray = *ray; + ss_isect->ray = *ray; #endif - return 1; + return 1; } -ccl_device_inline int subsurface_scatter_multi_intersect( - KernelGlobals *kg, - LocalIntersection *ss_isect, - ShaderData *sd, - ccl_addr_space PathState *state, - const ShaderClosure *sc, - uint *lcg_state, - float bssrdf_u, - float bssrdf_v, - bool all) +ccl_device_inline int subsurface_scatter_multi_intersect(KernelGlobals *kg, + LocalIntersection *ss_isect, + ShaderData *sd, + ccl_addr_space PathState *state, + const ShaderClosure *sc, + uint *lcg_state, + float bssrdf_u, + float bssrdf_v, + bool all) { - if(CLOSURE_IS_DISK_BSSRDF(sc->type)) { - return subsurface_scatter_disk(kg, - ss_isect, - sd, - sc, - lcg_state, - bssrdf_u, - bssrdf_v, - all); - } - else { - return subsurface_random_walk(kg, - ss_isect, - sd, - state, - sc, - bssrdf_u, - bssrdf_v); - } + if (CLOSURE_IS_DISK_BSSRDF(sc->type)) { + return subsurface_scatter_disk(kg, ss_isect, sd, sc, lcg_state, bssrdf_u, bssrdf_v, all); + } + else { + return subsurface_random_walk(kg, ss_isect, sd, state, sc, bssrdf_u, bssrdf_v); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 4b1c8e82dfa..3f62b726b6a 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -42,26 +42,26 @@ CCL_NAMESPACE_BEGIN /* Constants */ #define OBJECT_MOTION_PASS_SIZE 2 -#define FILTER_TABLE_SIZE 1024 -#define RAMP_TABLE_SIZE 256 -#define SHUTTER_TABLE_SIZE 256 +#define FILTER_TABLE_SIZE 1024 +#define RAMP_TABLE_SIZE 256 +#define SHUTTER_TABLE_SIZE 256 -#define BSSRDF_MIN_RADIUS 1e-8f -#define BSSRDF_MAX_HITS 4 -#define BSSRDF_MAX_BOUNCES 256 -#define LOCAL_MAX_HITS 4 +#define BSSRDF_MIN_RADIUS 1e-8f +#define BSSRDF_MAX_HITS 4 +#define BSSRDF_MAX_BOUNCES 256 +#define LOCAL_MAX_HITS 4 -#define VOLUME_BOUNDS_MAX 1024 +#define VOLUME_BOUNDS_MAX 1024 -#define BECKMANN_TABLE_SIZE 256 +#define BECKMANN_TABLE_SIZE 256 -#define SHADER_NONE (~0) -#define OBJECT_NONE (~0) -#define PRIM_NONE (~0) -#define LAMP_NONE (~0) -#define ID_NONE (0.0f) +#define SHADER_NONE (~0) +#define OBJECT_NONE (~0) +#define PRIM_NONE (~0) +#define LAMP_NONE (~0) +#define ID_NONE (0.0f) -#define VOLUME_STACK_SIZE 32 +#define VOLUME_STACK_SIZE 32 /* Split kernel constants */ #define WORK_POOL_SIZE_GPU 64 @@ -72,7 +72,6 @@ CCL_NAMESPACE_BEGIN # define WORK_POOL_SIZE WORK_POOL_SIZE_CPU #endif - #define SHADER_SORT_BLOCK_SIZE 2048 #ifdef __KERNEL_OPENCL__ @@ -137,16 +136,16 @@ CCL_NAMESPACE_BEGIN # endif # define __VOLUME_DECOUPLED__ # define __VOLUME_RECORD_ALL__ -#endif /* __KERNEL_CPU__ */ +#endif /* __KERNEL_CPU__ */ #ifdef __KERNEL_CUDA__ # ifdef __SPLIT_KERNEL__ # undef __BRANCHED_PATH__ # endif -#endif /* __KERNEL_CUDA__ */ +#endif /* __KERNEL_CUDA__ */ #ifdef __KERNEL_OPENCL__ -#endif /* __KERNEL_OPENCL__ */ +#endif /* __KERNEL_OPENCL__ */ /* Scene-based selective features compilation. */ #ifdef __NO_CAMERA_MOTION__ @@ -202,273 +201,269 @@ CCL_NAMESPACE_BEGIN /* Shader Evaluation */ typedef enum ShaderEvalType { - SHADER_EVAL_DISPLACE, - SHADER_EVAL_BACKGROUND, - /* bake types */ - SHADER_EVAL_BAKE, /* no real shade, it's used in the code to - * differentiate the type of shader eval from the above - */ - /* data passes */ - SHADER_EVAL_NORMAL, - SHADER_EVAL_UV, - SHADER_EVAL_ROUGHNESS, - SHADER_EVAL_DIFFUSE_COLOR, - SHADER_EVAL_GLOSSY_COLOR, - SHADER_EVAL_TRANSMISSION_COLOR, - SHADER_EVAL_SUBSURFACE_COLOR, - SHADER_EVAL_EMISSION, - - /* light passes */ - SHADER_EVAL_AO, - SHADER_EVAL_COMBINED, - SHADER_EVAL_SHADOW, - SHADER_EVAL_DIFFUSE, - SHADER_EVAL_GLOSSY, - SHADER_EVAL_TRANSMISSION, - SHADER_EVAL_SUBSURFACE, - - /* extra */ - SHADER_EVAL_ENVIRONMENT, + SHADER_EVAL_DISPLACE, + SHADER_EVAL_BACKGROUND, + /* bake types */ + SHADER_EVAL_BAKE, /* no real shade, it's used in the code to + * differentiate the type of shader eval from the above + */ + /* data passes */ + SHADER_EVAL_NORMAL, + SHADER_EVAL_UV, + SHADER_EVAL_ROUGHNESS, + SHADER_EVAL_DIFFUSE_COLOR, + SHADER_EVAL_GLOSSY_COLOR, + SHADER_EVAL_TRANSMISSION_COLOR, + SHADER_EVAL_SUBSURFACE_COLOR, + SHADER_EVAL_EMISSION, + + /* light passes */ + SHADER_EVAL_AO, + SHADER_EVAL_COMBINED, + SHADER_EVAL_SHADOW, + SHADER_EVAL_DIFFUSE, + SHADER_EVAL_GLOSSY, + SHADER_EVAL_TRANSMISSION, + SHADER_EVAL_SUBSURFACE, + + /* extra */ + SHADER_EVAL_ENVIRONMENT, } ShaderEvalType; /* Path Tracing * note we need to keep the u/v pairs at even values */ enum PathTraceDimension { - PRNG_FILTER_U = 0, - PRNG_FILTER_V = 1, - PRNG_LENS_U = 2, - PRNG_LENS_V = 3, - PRNG_TIME = 4, - PRNG_UNUSED_0 = 5, - PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */ - PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */ - PRNG_BASE_NUM = 10, - - PRNG_BSDF_U = 0, - PRNG_BSDF_V = 1, - PRNG_LIGHT_U = 2, - PRNG_LIGHT_V = 3, - PRNG_LIGHT_TERMINATE = 4, - PRNG_TERMINATE = 5, - PRNG_PHASE_CHANNEL = 6, - PRNG_SCATTER_DISTANCE = 7, - PRNG_BOUNCE_NUM = 8, - - PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */ - PRNG_BEVEL_V = 7, + PRNG_FILTER_U = 0, + PRNG_FILTER_V = 1, + PRNG_LENS_U = 2, + PRNG_LENS_V = 3, + PRNG_TIME = 4, + PRNG_UNUSED_0 = 5, + PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */ + PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */ + PRNG_BASE_NUM = 10, + + PRNG_BSDF_U = 0, + PRNG_BSDF_V = 1, + PRNG_LIGHT_U = 2, + PRNG_LIGHT_V = 3, + PRNG_LIGHT_TERMINATE = 4, + PRNG_TERMINATE = 5, + PRNG_PHASE_CHANNEL = 6, + PRNG_SCATTER_DISTANCE = 7, + PRNG_BOUNCE_NUM = 8, + + PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */ + PRNG_BEVEL_V = 7, }; enum SamplingPattern { - SAMPLING_PATTERN_SOBOL = 0, - SAMPLING_PATTERN_CMJ = 1, + SAMPLING_PATTERN_SOBOL = 0, + SAMPLING_PATTERN_CMJ = 1, - SAMPLING_NUM_PATTERNS, + SAMPLING_NUM_PATTERNS, }; /* these flags values correspond to raytypes in osl.cpp, so keep them in sync! */ enum PathRayFlag { - PATH_RAY_CAMERA = (1 << 0), - PATH_RAY_REFLECT = (1 << 1), - PATH_RAY_TRANSMIT = (1 << 2), - PATH_RAY_DIFFUSE = (1 << 3), - PATH_RAY_GLOSSY = (1 << 4), - PATH_RAY_SINGULAR = (1 << 5), - PATH_RAY_TRANSPARENT = (1 << 6), - - PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7), - PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8), - PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_OPAQUE_CATCHER), - PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER = (1 << 9), - PATH_RAY_SHADOW_TRANSPARENT_CATCHER = (1 << 10), - PATH_RAY_SHADOW_TRANSPARENT = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_CATCHER), - PATH_RAY_SHADOW_NON_CATCHER = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER|PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER), - PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT), - - PATH_RAY_CURVE = (1 << 11), /* visibility flag to define curve segments */ - PATH_RAY_VOLUME_SCATTER = (1 << 12), /* volume scattering */ - - /* Special flag to tag unaligned BVH nodes. */ - PATH_RAY_NODE_UNALIGNED = (1 << 13), - - PATH_RAY_ALL_VISIBILITY = ((1 << 14)-1), - - /* Don't apply multiple importance sampling weights to emission from - * lamp or surface hits, because they were not direct light sampled. */ - PATH_RAY_MIS_SKIP = (1 << 14), - /* Diffuse bounce earlier in the path, skip SSS to improve performance - * and avoid branching twice with disk sampling SSS. */ - PATH_RAY_DIFFUSE_ANCESTOR = (1 << 15), - /* Single pass has been written. */ - PATH_RAY_SINGLE_PASS_DONE = (1 << 16), - /* Ray is behind a shadow catcher .*/ - PATH_RAY_SHADOW_CATCHER = (1 << 17), - /* Store shadow data for shadow catcher or denoising. */ - PATH_RAY_STORE_SHADOW_INFO = (1 << 18), - /* Zero background alpha, for camera or transparent glass rays. */ - PATH_RAY_TRANSPARENT_BACKGROUND = (1 << 19), - /* Terminate ray immediately at next bounce. */ - PATH_RAY_TERMINATE_IMMEDIATE = (1 << 20), - /* Ray is to be terminated, but continue with transparent bounces and - * emission as long as we encounter them. This is required to make the - * MIS between direct and indirect light rays match, as shadow rays go - * through transparent surfaces to reach emisison too. */ - PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21), - /* Ray is to be terminated. */ - PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE|PATH_RAY_TERMINATE_AFTER_TRANSPARENT), - /* Path and shader is being evaluated for direct lighting emission. */ - PATH_RAY_EMISSION = (1 << 22) + PATH_RAY_CAMERA = (1 << 0), + PATH_RAY_REFLECT = (1 << 1), + PATH_RAY_TRANSMIT = (1 << 2), + PATH_RAY_DIFFUSE = (1 << 3), + PATH_RAY_GLOSSY = (1 << 4), + PATH_RAY_SINGULAR = (1 << 5), + PATH_RAY_TRANSPARENT = (1 << 6), + + PATH_RAY_SHADOW_OPAQUE_NON_CATCHER = (1 << 7), + PATH_RAY_SHADOW_OPAQUE_CATCHER = (1 << 8), + PATH_RAY_SHADOW_OPAQUE = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER | PATH_RAY_SHADOW_OPAQUE_CATCHER), + PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER = (1 << 9), + PATH_RAY_SHADOW_TRANSPARENT_CATCHER = (1 << 10), + PATH_RAY_SHADOW_TRANSPARENT = (PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER | + PATH_RAY_SHADOW_TRANSPARENT_CATCHER), + PATH_RAY_SHADOW_NON_CATCHER = (PATH_RAY_SHADOW_OPAQUE_NON_CATCHER | + PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER), + PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE | PATH_RAY_SHADOW_TRANSPARENT), + + PATH_RAY_CURVE = (1 << 11), /* visibility flag to define curve segments */ + PATH_RAY_VOLUME_SCATTER = (1 << 12), /* volume scattering */ + + /* Special flag to tag unaligned BVH nodes. */ + PATH_RAY_NODE_UNALIGNED = (1 << 13), + + PATH_RAY_ALL_VISIBILITY = ((1 << 14) - 1), + + /* Don't apply multiple importance sampling weights to emission from + * lamp or surface hits, because they were not direct light sampled. */ + PATH_RAY_MIS_SKIP = (1 << 14), + /* Diffuse bounce earlier in the path, skip SSS to improve performance + * and avoid branching twice with disk sampling SSS. */ + PATH_RAY_DIFFUSE_ANCESTOR = (1 << 15), + /* Single pass has been written. */ + PATH_RAY_SINGLE_PASS_DONE = (1 << 16), + /* Ray is behind a shadow catcher .*/ + PATH_RAY_SHADOW_CATCHER = (1 << 17), + /* Store shadow data for shadow catcher or denoising. */ + PATH_RAY_STORE_SHADOW_INFO = (1 << 18), + /* Zero background alpha, for camera or transparent glass rays. */ + PATH_RAY_TRANSPARENT_BACKGROUND = (1 << 19), + /* Terminate ray immediately at next bounce. */ + PATH_RAY_TERMINATE_IMMEDIATE = (1 << 20), + /* Ray is to be terminated, but continue with transparent bounces and + * emission as long as we encounter them. This is required to make the + * MIS between direct and indirect light rays match, as shadow rays go + * through transparent surfaces to reach emisison too. */ + PATH_RAY_TERMINATE_AFTER_TRANSPARENT = (1 << 21), + /* Ray is to be terminated. */ + PATH_RAY_TERMINATE = (PATH_RAY_TERMINATE_IMMEDIATE | PATH_RAY_TERMINATE_AFTER_TRANSPARENT), + /* Path and shader is being evaluated for direct lighting emission. */ + PATH_RAY_EMISSION = (1 << 22) }; /* Closure Label */ typedef enum ClosureLabel { - LABEL_NONE = 0, - LABEL_TRANSMIT = 1, - LABEL_REFLECT = 2, - LABEL_DIFFUSE = 4, - LABEL_GLOSSY = 8, - LABEL_SINGULAR = 16, - LABEL_TRANSPARENT = 32, - LABEL_VOLUME_SCATTER = 64, - LABEL_TRANSMIT_TRANSPARENT = 128, + LABEL_NONE = 0, + LABEL_TRANSMIT = 1, + LABEL_REFLECT = 2, + LABEL_DIFFUSE = 4, + LABEL_GLOSSY = 8, + LABEL_SINGULAR = 16, + LABEL_TRANSPARENT = 32, + LABEL_VOLUME_SCATTER = 64, + LABEL_TRANSMIT_TRANSPARENT = 128, } ClosureLabel; /* Render Passes */ -#define PASS_NAME_JOIN(a, b) a ## _ ## b +#define PASS_NAME_JOIN(a, b) a##_##b #define PASSMASK(pass) (1 << ((PASS_NAME_JOIN(PASS, pass)) % 32)) -#define PASSMASK_COMPONENT(comp) (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | \ - PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \ - PASSMASK(PASS_NAME_JOIN(comp, COLOR))) +#define PASSMASK_COMPONENT(comp) \ + (PASSMASK(PASS_NAME_JOIN(comp, DIRECT)) | PASSMASK(PASS_NAME_JOIN(comp, INDIRECT)) | \ + PASSMASK(PASS_NAME_JOIN(comp, COLOR))) typedef enum PassType { - PASS_NONE = 0, - - /* Main passes */ - PASS_COMBINED = 1, - PASS_DEPTH, - PASS_NORMAL, - PASS_UV, - PASS_OBJECT_ID, - PASS_MATERIAL_ID, - PASS_MOTION, - PASS_MOTION_WEIGHT, + PASS_NONE = 0, + + /* Main passes */ + PASS_COMBINED = 1, + PASS_DEPTH, + PASS_NORMAL, + PASS_UV, + PASS_OBJECT_ID, + PASS_MATERIAL_ID, + PASS_MOTION, + PASS_MOTION_WEIGHT, #ifdef __KERNEL_DEBUG__ - PASS_BVH_TRAVERSED_NODES, - PASS_BVH_TRAVERSED_INSTANCES, - PASS_BVH_INTERSECTIONS, - PASS_RAY_BOUNCES, + PASS_BVH_TRAVERSED_NODES, + PASS_BVH_TRAVERSED_INSTANCES, + PASS_BVH_INTERSECTIONS, + PASS_RAY_BOUNCES, #endif - PASS_RENDER_TIME, - PASS_CRYPTOMATTE, - PASS_CATEGORY_MAIN_END = 31, - - PASS_MIST = 32, - PASS_EMISSION, - PASS_BACKGROUND, - PASS_AO, - PASS_SHADOW, - PASS_LIGHT, /* no real pass, used to force use_light_pass */ - PASS_DIFFUSE_DIRECT, - PASS_DIFFUSE_INDIRECT, - PASS_DIFFUSE_COLOR, - PASS_GLOSSY_DIRECT, - PASS_GLOSSY_INDIRECT, - PASS_GLOSSY_COLOR, - PASS_TRANSMISSION_DIRECT, - PASS_TRANSMISSION_INDIRECT, - PASS_TRANSMISSION_COLOR, - PASS_SUBSURFACE_DIRECT, - PASS_SUBSURFACE_INDIRECT, - PASS_SUBSURFACE_COLOR, - PASS_VOLUME_DIRECT, - PASS_VOLUME_INDIRECT, - /* No Scatter color since it's tricky to define what it would even mean. */ - PASS_CATEGORY_LIGHT_END = 63, + PASS_RENDER_TIME, + PASS_CRYPTOMATTE, + PASS_CATEGORY_MAIN_END = 31, + + PASS_MIST = 32, + PASS_EMISSION, + PASS_BACKGROUND, + PASS_AO, + PASS_SHADOW, + PASS_LIGHT, /* no real pass, used to force use_light_pass */ + PASS_DIFFUSE_DIRECT, + PASS_DIFFUSE_INDIRECT, + PASS_DIFFUSE_COLOR, + PASS_GLOSSY_DIRECT, + PASS_GLOSSY_INDIRECT, + PASS_GLOSSY_COLOR, + PASS_TRANSMISSION_DIRECT, + PASS_TRANSMISSION_INDIRECT, + PASS_TRANSMISSION_COLOR, + PASS_SUBSURFACE_DIRECT, + PASS_SUBSURFACE_INDIRECT, + PASS_SUBSURFACE_COLOR, + PASS_VOLUME_DIRECT, + PASS_VOLUME_INDIRECT, + /* No Scatter color since it's tricky to define what it would even mean. */ + PASS_CATEGORY_LIGHT_END = 63, } PassType; #define PASS_ANY (~0) typedef enum CryptomatteType { - CRYPT_NONE = 0, - CRYPT_OBJECT = (1 << 0), - CRYPT_MATERIAL = (1 << 1), - CRYPT_ASSET = (1 << 2), - CRYPT_ACCURATE = (1 << 3), + CRYPT_NONE = 0, + CRYPT_OBJECT = (1 << 0), + CRYPT_MATERIAL = (1 << 1), + CRYPT_ASSET = (1 << 2), + CRYPT_ACCURATE = (1 << 3), } CryptomatteType; typedef enum DenoisingPassOffsets { - DENOISING_PASS_NORMAL = 0, - DENOISING_PASS_NORMAL_VAR = 3, - DENOISING_PASS_ALBEDO = 6, - DENOISING_PASS_ALBEDO_VAR = 9, - DENOISING_PASS_DEPTH = 12, - DENOISING_PASS_DEPTH_VAR = 13, - DENOISING_PASS_SHADOW_A = 14, - DENOISING_PASS_SHADOW_B = 17, - DENOISING_PASS_COLOR = 20, - DENOISING_PASS_COLOR_VAR = 23, - DENOISING_PASS_CLEAN = 26, - - DENOISING_PASS_PREFILTERED_DEPTH = 0, - DENOISING_PASS_PREFILTERED_NORMAL = 1, - DENOISING_PASS_PREFILTERED_SHADOWING = 4, - DENOISING_PASS_PREFILTERED_ALBEDO = 5, - DENOISING_PASS_PREFILTERED_COLOR = 8, - DENOISING_PASS_PREFILTERED_VARIANCE = 11, - DENOISING_PASS_PREFILTERED_INTENSITY = 14, - - DENOISING_PASS_SIZE_BASE = 26, - DENOISING_PASS_SIZE_CLEAN = 3, - DENOISING_PASS_SIZE_PREFILTERED = 15, + DENOISING_PASS_NORMAL = 0, + DENOISING_PASS_NORMAL_VAR = 3, + DENOISING_PASS_ALBEDO = 6, + DENOISING_PASS_ALBEDO_VAR = 9, + DENOISING_PASS_DEPTH = 12, + DENOISING_PASS_DEPTH_VAR = 13, + DENOISING_PASS_SHADOW_A = 14, + DENOISING_PASS_SHADOW_B = 17, + DENOISING_PASS_COLOR = 20, + DENOISING_PASS_COLOR_VAR = 23, + DENOISING_PASS_CLEAN = 26, + + DENOISING_PASS_PREFILTERED_DEPTH = 0, + DENOISING_PASS_PREFILTERED_NORMAL = 1, + DENOISING_PASS_PREFILTERED_SHADOWING = 4, + DENOISING_PASS_PREFILTERED_ALBEDO = 5, + DENOISING_PASS_PREFILTERED_COLOR = 8, + DENOISING_PASS_PREFILTERED_VARIANCE = 11, + DENOISING_PASS_PREFILTERED_INTENSITY = 14, + + DENOISING_PASS_SIZE_BASE = 26, + DENOISING_PASS_SIZE_CLEAN = 3, + DENOISING_PASS_SIZE_PREFILTERED = 15, } DenoisingPassOffsets; typedef enum eBakePassFilter { - BAKE_FILTER_NONE = 0, - BAKE_FILTER_DIRECT = (1 << 0), - BAKE_FILTER_INDIRECT = (1 << 1), - BAKE_FILTER_COLOR = (1 << 2), - BAKE_FILTER_DIFFUSE = (1 << 3), - BAKE_FILTER_GLOSSY = (1 << 4), - BAKE_FILTER_TRANSMISSION = (1 << 5), - BAKE_FILTER_SUBSURFACE = (1 << 6), - BAKE_FILTER_EMISSION = (1 << 7), - BAKE_FILTER_AO = (1 << 8), + BAKE_FILTER_NONE = 0, + BAKE_FILTER_DIRECT = (1 << 0), + BAKE_FILTER_INDIRECT = (1 << 1), + BAKE_FILTER_COLOR = (1 << 2), + BAKE_FILTER_DIFFUSE = (1 << 3), + BAKE_FILTER_GLOSSY = (1 << 4), + BAKE_FILTER_TRANSMISSION = (1 << 5), + BAKE_FILTER_SUBSURFACE = (1 << 6), + BAKE_FILTER_EMISSION = (1 << 7), + BAKE_FILTER_AO = (1 << 8), } eBakePassFilter; typedef enum BakePassFilterCombos { - BAKE_FILTER_COMBINED = ( - BAKE_FILTER_DIRECT | - BAKE_FILTER_INDIRECT | - BAKE_FILTER_DIFFUSE | - BAKE_FILTER_GLOSSY | - BAKE_FILTER_TRANSMISSION | - BAKE_FILTER_SUBSURFACE | - BAKE_FILTER_EMISSION | - BAKE_FILTER_AO), - BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE), - BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY), - BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION), - BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE), - BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE), - BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY), - BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION), - BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE), + BAKE_FILTER_COMBINED = (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE | + BAKE_FILTER_GLOSSY | BAKE_FILTER_TRANSMISSION | BAKE_FILTER_SUBSURFACE | + BAKE_FILTER_EMISSION | BAKE_FILTER_AO), + BAKE_FILTER_DIFFUSE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_DIFFUSE), + BAKE_FILTER_GLOSSY_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_GLOSSY), + BAKE_FILTER_TRANSMISSION_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_TRANSMISSION), + BAKE_FILTER_SUBSURFACE_DIRECT = (BAKE_FILTER_DIRECT | BAKE_FILTER_SUBSURFACE), + BAKE_FILTER_DIFFUSE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_DIFFUSE), + BAKE_FILTER_GLOSSY_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_GLOSSY), + BAKE_FILTER_TRANSMISSION_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_TRANSMISSION), + BAKE_FILTER_SUBSURFACE_INDIRECT = (BAKE_FILTER_INDIRECT | BAKE_FILTER_SUBSURFACE), } BakePassFilterCombos; typedef enum DenoiseFlag { - DENOISING_CLEAN_DIFFUSE_DIR = (1 << 0), - DENOISING_CLEAN_DIFFUSE_IND = (1 << 1), - DENOISING_CLEAN_GLOSSY_DIR = (1 << 2), - DENOISING_CLEAN_GLOSSY_IND = (1 << 3), - DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4), - DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5), - DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6), - DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7), - DENOISING_CLEAN_ALL_PASSES = (1 << 8)-1, + DENOISING_CLEAN_DIFFUSE_DIR = (1 << 0), + DENOISING_CLEAN_DIFFUSE_IND = (1 << 1), + DENOISING_CLEAN_GLOSSY_DIR = (1 << 2), + DENOISING_CLEAN_GLOSSY_IND = (1 << 3), + DENOISING_CLEAN_TRANSMISSION_DIR = (1 << 4), + DENOISING_CLEAN_TRANSMISSION_IND = (1 << 5), + DENOISING_CLEAN_SUBSURFACE_DIR = (1 << 6), + DENOISING_CLEAN_SUBSURFACE_IND = (1 << 7), + DENOISING_CLEAN_ALL_PASSES = (1 << 8) - 1, } DenoiseFlag; #ifdef __KERNEL_DEBUG__ @@ -476,173 +471,171 @@ typedef enum DenoiseFlag { * really important here. */ typedef struct DebugData { - int num_bvh_traversed_nodes; - int num_bvh_traversed_instances; - int num_bvh_intersections; - int num_ray_bounces; + int num_bvh_traversed_nodes; + int num_bvh_traversed_instances; + int num_bvh_intersections; + int num_ray_bounces; } DebugData; #endif typedef ccl_addr_space struct PathRadianceState { #ifdef __PASSES__ - float3 diffuse; - float3 glossy; - float3 transmission; - float3 subsurface; - float3 scatter; + float3 diffuse; + float3 glossy; + float3 transmission; + float3 subsurface; + float3 scatter; - float3 direct; + float3 direct; #endif } PathRadianceState; typedef ccl_addr_space struct PathRadiance { #ifdef __PASSES__ - int use_light_pass; + int use_light_pass; #endif - float transparent; - float3 emission; + float transparent; + float3 emission; #ifdef __PASSES__ - float3 background; - float3 ao; - - float3 indirect; - float3 direct_emission; - - float3 color_diffuse; - float3 color_glossy; - float3 color_transmission; - float3 color_subsurface; - - float3 direct_diffuse; - float3 direct_glossy; - float3 direct_transmission; - float3 direct_subsurface; - float3 direct_scatter; - - float3 indirect_diffuse; - float3 indirect_glossy; - float3 indirect_transmission; - float3 indirect_subsurface; - float3 indirect_scatter; - - float4 shadow; - float mist; + float3 background; + float3 ao; + + float3 indirect; + float3 direct_emission; + + float3 color_diffuse; + float3 color_glossy; + float3 color_transmission; + float3 color_subsurface; + + float3 direct_diffuse; + float3 direct_glossy; + float3 direct_transmission; + float3 direct_subsurface; + float3 direct_scatter; + + float3 indirect_diffuse; + float3 indirect_glossy; + float3 indirect_transmission; + float3 indirect_subsurface; + float3 indirect_scatter; + + float4 shadow; + float mist; #endif - struct PathRadianceState state; + struct PathRadianceState state; #ifdef __SHADOW_TRICKS__ - /* Total light reachable across the path, ignoring shadow blocked queries. */ - float3 path_total; - /* Total light reachable across the path with shadow blocked queries - * applied here. - * - * Dividing this figure by path_total will give estimate of shadow pass. - */ - float3 path_total_shaded; - - /* Color of the background on which shadow is alpha-overed. */ - float3 shadow_background_color; - - /* Path radiance sum and throughput at the moment when ray hits shadow - * catcher object. - */ - float shadow_throughput; - - /* Accumulated transparency along the path after shadow catcher bounce. */ - float shadow_transparency; - - /* Indicate if any shadow catcher data is set. */ - int has_shadow_catcher; + /* Total light reachable across the path, ignoring shadow blocked queries. */ + float3 path_total; + /* Total light reachable across the path with shadow blocked queries + * applied here. + * + * Dividing this figure by path_total will give estimate of shadow pass. + */ + float3 path_total_shaded; + + /* Color of the background on which shadow is alpha-overed. */ + float3 shadow_background_color; + + /* Path radiance sum and throughput at the moment when ray hits shadow + * catcher object. + */ + float shadow_throughput; + + /* Accumulated transparency along the path after shadow catcher bounce. */ + float shadow_transparency; + + /* Indicate if any shadow catcher data is set. */ + int has_shadow_catcher; #endif #ifdef __DENOISING_FEATURES__ - float3 denoising_normal; - float3 denoising_albedo; - float denoising_depth; -#endif /* __DENOISING_FEATURES__ */ + float3 denoising_normal; + float3 denoising_albedo; + float denoising_depth; +#endif /* __DENOISING_FEATURES__ */ #ifdef __KERNEL_DEBUG__ - DebugData debug_data; -#endif /* __KERNEL_DEBUG__ */ + DebugData debug_data; +#endif /* __KERNEL_DEBUG__ */ } PathRadiance; typedef struct BsdfEval { #ifdef __PASSES__ - int use_light_pass; + int use_light_pass; #endif - float3 diffuse; + float3 diffuse; #ifdef __PASSES__ - float3 glossy; - float3 transmission; - float3 transparent; - float3 subsurface; - float3 scatter; + float3 glossy; + float3 transmission; + float3 transparent; + float3 subsurface; + float3 scatter; #endif #ifdef __SHADOW_TRICKS__ - float3 sum_no_mis; + float3 sum_no_mis; #endif } BsdfEval; /* Shader Flag */ typedef enum ShaderFlag { - SHADER_SMOOTH_NORMAL = (1 << 31), - SHADER_CAST_SHADOW = (1 << 30), - SHADER_AREA_LIGHT = (1 << 29), - SHADER_USE_MIS = (1 << 28), - SHADER_EXCLUDE_DIFFUSE = (1 << 27), - SHADER_EXCLUDE_GLOSSY = (1 << 26), - SHADER_EXCLUDE_TRANSMIT = (1 << 25), - SHADER_EXCLUDE_CAMERA = (1 << 24), - SHADER_EXCLUDE_SCATTER = (1 << 23), - SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE|SHADER_EXCLUDE_GLOSSY|SHADER_EXCLUDE_TRANSMIT|SHADER_EXCLUDE_CAMERA|SHADER_EXCLUDE_SCATTER), - - SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS|SHADER_EXCLUDE_ANY) + SHADER_SMOOTH_NORMAL = (1 << 31), + SHADER_CAST_SHADOW = (1 << 30), + SHADER_AREA_LIGHT = (1 << 29), + SHADER_USE_MIS = (1 << 28), + SHADER_EXCLUDE_DIFFUSE = (1 << 27), + SHADER_EXCLUDE_GLOSSY = (1 << 26), + SHADER_EXCLUDE_TRANSMIT = (1 << 25), + SHADER_EXCLUDE_CAMERA = (1 << 24), + SHADER_EXCLUDE_SCATTER = (1 << 23), + SHADER_EXCLUDE_ANY = (SHADER_EXCLUDE_DIFFUSE | SHADER_EXCLUDE_GLOSSY | SHADER_EXCLUDE_TRANSMIT | + SHADER_EXCLUDE_CAMERA | SHADER_EXCLUDE_SCATTER), + + SHADER_MASK = ~(SHADER_SMOOTH_NORMAL | SHADER_CAST_SHADOW | SHADER_AREA_LIGHT | SHADER_USE_MIS | + SHADER_EXCLUDE_ANY) } ShaderFlag; /* Light Type */ typedef enum LightType { - LIGHT_POINT, - LIGHT_DISTANT, - LIGHT_BACKGROUND, - LIGHT_AREA, - LIGHT_SPOT, - LIGHT_TRIANGLE + LIGHT_POINT, + LIGHT_DISTANT, + LIGHT_BACKGROUND, + LIGHT_AREA, + LIGHT_SPOT, + LIGHT_TRIANGLE } LightType; /* Camera Type */ -enum CameraType { - CAMERA_PERSPECTIVE, - CAMERA_ORTHOGRAPHIC, - CAMERA_PANORAMA -}; +enum CameraType { CAMERA_PERSPECTIVE, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA }; /* Panorama Type */ enum PanoramaType { - PANORAMA_EQUIRECTANGULAR = 0, - PANORAMA_FISHEYE_EQUIDISTANT = 1, - PANORAMA_FISHEYE_EQUISOLID = 2, - PANORAMA_MIRRORBALL = 3, + PANORAMA_EQUIRECTANGULAR = 0, + PANORAMA_FISHEYE_EQUIDISTANT = 1, + PANORAMA_FISHEYE_EQUISOLID = 2, + PANORAMA_MIRRORBALL = 3, - PANORAMA_NUM_TYPES, + PANORAMA_NUM_TYPES, }; /* Differential */ typedef struct differential3 { - float3 dx; - float3 dy; + float3 dx; + float3 dy; } differential3; typedef struct differential { - float dx; - float dy; + float dx; + float dy; } differential; /* Ray */ @@ -657,21 +650,21 @@ typedef struct Ray { * is fixed. */ #ifndef __KERNEL_OPENCL_AMD__ - float3 P; /* origin */ - float3 D; /* direction */ + float3 P; /* origin */ + float3 D; /* direction */ - float t; /* length of the ray */ - float time; /* time (for motion blur) */ + float t; /* length of the ray */ + float time; /* time (for motion blur) */ #else - float t; /* length of the ray */ - float time; /* time (for motion blur) */ - float3 P; /* origin */ - float3 D; /* direction */ + float t; /* length of the ray */ + float time; /* time (for motion blur) */ + float3 P; /* origin */ + float3 D; /* direction */ #endif #ifdef __RAY_DIFFERENTIALS__ - differential3 dP; - differential3 dD; + differential3 dP; + differential3 dD; #endif } Ray; @@ -679,42 +672,42 @@ typedef struct Ray { typedef struct Intersection { #ifdef __EMBREE__ - float3 Ng; + float3 Ng; #endif - float t, u, v; - int prim; - int object; - int type; + float t, u, v; + int prim; + int object; + int type; #ifdef __KERNEL_DEBUG__ - int num_traversed_nodes; - int num_traversed_instances; - int num_intersections; + int num_traversed_nodes; + int num_traversed_instances; + int num_intersections; #endif } Intersection; /* Primitives */ typedef enum PrimitiveType { - PRIMITIVE_NONE = 0, - PRIMITIVE_TRIANGLE = (1 << 0), - PRIMITIVE_MOTION_TRIANGLE = (1 << 1), - PRIMITIVE_CURVE = (1 << 2), - PRIMITIVE_MOTION_CURVE = (1 << 3), - /* Lamp primitive is not included below on purpose, - * since it is no real traceable primitive. - */ - PRIMITIVE_LAMP = (1 << 4), - - PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE), - PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE), - PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE), - PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE), - - /* Total number of different traceable primitives. - * NOTE: This is an actual value, not a bitflag. - */ - PRIMITIVE_NUM_TOTAL = 4, + PRIMITIVE_NONE = 0, + PRIMITIVE_TRIANGLE = (1 << 0), + PRIMITIVE_MOTION_TRIANGLE = (1 << 1), + PRIMITIVE_CURVE = (1 << 2), + PRIMITIVE_MOTION_CURVE = (1 << 3), + /* Lamp primitive is not included below on purpose, + * since it is no real traceable primitive. + */ + PRIMITIVE_LAMP = (1 << 4), + + PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE | PRIMITIVE_MOTION_TRIANGLE), + PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE | PRIMITIVE_MOTION_CURVE), + PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE | PRIMITIVE_MOTION_CURVE), + PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE | PRIMITIVE_ALL_CURVE), + + /* Total number of different traceable primitives. + * NOTE: This is an actual value, not a bitflag. + */ + PRIMITIVE_NUM_TOTAL = 4, } PrimitiveType; #define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << PRIMITIVE_NUM_TOTAL) | (type)) @@ -723,68 +716,68 @@ typedef enum PrimitiveType { /* Attributes */ typedef enum AttributePrimitive { - ATTR_PRIM_TRIANGLE = 0, - ATTR_PRIM_CURVE, - ATTR_PRIM_SUBD, + ATTR_PRIM_TRIANGLE = 0, + ATTR_PRIM_CURVE, + ATTR_PRIM_SUBD, - ATTR_PRIM_TYPES + ATTR_PRIM_TYPES } AttributePrimitive; typedef enum AttributeElement { - ATTR_ELEMENT_NONE, - ATTR_ELEMENT_OBJECT, - ATTR_ELEMENT_MESH, - ATTR_ELEMENT_FACE, - ATTR_ELEMENT_VERTEX, - ATTR_ELEMENT_VERTEX_MOTION, - ATTR_ELEMENT_CORNER, - ATTR_ELEMENT_CORNER_BYTE, - ATTR_ELEMENT_CURVE, - ATTR_ELEMENT_CURVE_KEY, - ATTR_ELEMENT_CURVE_KEY_MOTION, - ATTR_ELEMENT_VOXEL + ATTR_ELEMENT_NONE, + ATTR_ELEMENT_OBJECT, + ATTR_ELEMENT_MESH, + ATTR_ELEMENT_FACE, + ATTR_ELEMENT_VERTEX, + ATTR_ELEMENT_VERTEX_MOTION, + ATTR_ELEMENT_CORNER, + ATTR_ELEMENT_CORNER_BYTE, + ATTR_ELEMENT_CURVE, + ATTR_ELEMENT_CURVE_KEY, + ATTR_ELEMENT_CURVE_KEY_MOTION, + ATTR_ELEMENT_VOXEL } AttributeElement; typedef enum AttributeStandard { - ATTR_STD_NONE = 0, - ATTR_STD_VERTEX_NORMAL, - ATTR_STD_FACE_NORMAL, - ATTR_STD_UV, - ATTR_STD_UV_TANGENT, - ATTR_STD_UV_TANGENT_SIGN, - ATTR_STD_GENERATED, - ATTR_STD_GENERATED_TRANSFORM, - ATTR_STD_POSITION_UNDEFORMED, - ATTR_STD_POSITION_UNDISPLACED, - ATTR_STD_MOTION_VERTEX_POSITION, - ATTR_STD_MOTION_VERTEX_NORMAL, - ATTR_STD_PARTICLE, - ATTR_STD_CURVE_INTERCEPT, - ATTR_STD_CURVE_RANDOM, - ATTR_STD_PTEX_FACE_ID, - ATTR_STD_PTEX_UV, - ATTR_STD_VOLUME_DENSITY, - ATTR_STD_VOLUME_COLOR, - ATTR_STD_VOLUME_FLAME, - ATTR_STD_VOLUME_HEAT, - ATTR_STD_VOLUME_TEMPERATURE, - ATTR_STD_VOLUME_VELOCITY, - ATTR_STD_POINTINESS, - ATTR_STD_NUM, - - ATTR_STD_NOT_FOUND = ~0 + ATTR_STD_NONE = 0, + ATTR_STD_VERTEX_NORMAL, + ATTR_STD_FACE_NORMAL, + ATTR_STD_UV, + ATTR_STD_UV_TANGENT, + ATTR_STD_UV_TANGENT_SIGN, + ATTR_STD_GENERATED, + ATTR_STD_GENERATED_TRANSFORM, + ATTR_STD_POSITION_UNDEFORMED, + ATTR_STD_POSITION_UNDISPLACED, + ATTR_STD_MOTION_VERTEX_POSITION, + ATTR_STD_MOTION_VERTEX_NORMAL, + ATTR_STD_PARTICLE, + ATTR_STD_CURVE_INTERCEPT, + ATTR_STD_CURVE_RANDOM, + ATTR_STD_PTEX_FACE_ID, + ATTR_STD_PTEX_UV, + ATTR_STD_VOLUME_DENSITY, + ATTR_STD_VOLUME_COLOR, + ATTR_STD_VOLUME_FLAME, + ATTR_STD_VOLUME_HEAT, + ATTR_STD_VOLUME_TEMPERATURE, + ATTR_STD_VOLUME_VELOCITY, + ATTR_STD_POINTINESS, + ATTR_STD_NUM, + + ATTR_STD_NOT_FOUND = ~0 } AttributeStandard; typedef enum AttributeFlag { - ATTR_FINAL_SIZE = (1 << 0), - ATTR_SUBDIVIDED = (1 << 1), + ATTR_FINAL_SIZE = (1 << 0), + ATTR_SUBDIVIDED = (1 << 1), } AttributeFlag; typedef struct AttributeDescriptor { - AttributeElement element; - NodeAttributeType type; - uint flags; /* see enum AttributeFlag */ - int offset; + AttributeElement element; + NodeAttributeType type; + uint flags; /* see enum AttributeFlag */ + int offset; } AttributeDescriptor; /* Closure data */ @@ -794,7 +787,7 @@ typedef struct AttributeDescriptor { # define MAX_CLOSURE 1 # else # ifndef __MAX_CLOSURE__ -# define MAX_CLOSURE 64 +# define MAX_CLOSURE 64 # else # define MAX_CLOSURE __MAX_CLOSURE__ # endif @@ -815,16 +808,18 @@ typedef struct AttributeDescriptor { * we assume to be the maximum required alignment for any struct. */ #define SHADER_CLOSURE_BASE \ - float3 weight; \ - ClosureType type; \ - float sample_weight; \ - float3 N + float3 weight; \ + ClosureType type; \ + float sample_weight; \ + float3 N -typedef ccl_addr_space struct ccl_align(16) ShaderClosure { - SHADER_CLOSURE_BASE; +typedef ccl_addr_space struct ccl_align(16) ShaderClosure +{ + SHADER_CLOSURE_BASE; - float data[10]; /* pad to 80 bytes */ -} ShaderClosure; + float data[10]; /* pad to 80 bytes */ +} +ShaderClosure; /* Shader Data * @@ -833,272 +828,253 @@ typedef ccl_addr_space struct ccl_align(16) ShaderClosure { */ enum ShaderDataFlag { - /* Runtime flags. */ - - /* Set when ray hits backside of surface. */ - SD_BACKFACING = (1 << 0), - /* Shader has non-zero emission. */ - SD_EMISSION = (1 << 1), - /* Shader has BSDF closure. */ - SD_BSDF = (1 << 2), - /* Shader has non-singular BSDF closure. */ - SD_BSDF_HAS_EVAL = (1 << 3), - /* Shader has BSSRDF closure. */ - SD_BSSRDF = (1 << 4), - /* Shader has holdout closure. */ - SD_HOLDOUT = (1 << 5), - /* Shader has non-zero volume extinction. */ - SD_EXTINCTION = (1 << 6), - /* Shader has have volume phase (scatter) closure. */ - SD_SCATTER = (1 << 7), - /* Shader has transparent closure. */ - SD_TRANSPARENT = (1 << 9), - /* BSDF requires LCG for evaluation. */ - SD_BSDF_NEEDS_LCG = (1 << 10), - - SD_CLOSURE_FLAGS = (SD_EMISSION | - SD_BSDF | - SD_BSDF_HAS_EVAL | - SD_BSSRDF | - SD_HOLDOUT | - SD_EXTINCTION | - SD_SCATTER | - SD_BSDF_NEEDS_LCG), - - /* Shader flags. */ - - /* direct light sample */ - SD_USE_MIS = (1 << 16), - /* Has transparent shadow. */ - SD_HAS_TRANSPARENT_SHADOW = (1 << 17), - /* Has volume shader. */ - SD_HAS_VOLUME = (1 << 18), - /* Has only volume shader, no surface. */ - SD_HAS_ONLY_VOLUME = (1 << 19), - /* Has heterogeneous volume. */ - SD_HETEROGENEOUS_VOLUME = (1 << 20), - /* BSSRDF normal uses bump. */ - SD_HAS_BSSRDF_BUMP = (1 << 21), - /* Use equiangular volume sampling */ - SD_VOLUME_EQUIANGULAR = (1 << 22), - /* Use multiple importance volume sampling. */ - SD_VOLUME_MIS = (1 << 23), - /* Use cubic interpolation for voxels. */ - SD_VOLUME_CUBIC = (1 << 24), - /* Has data connected to the displacement input or uses bump map. */ - SD_HAS_BUMP = (1 << 25), - /* Has true displacement. */ - SD_HAS_DISPLACEMENT = (1 << 26), - /* Has constant emission (value stored in __shaders) */ - SD_HAS_CONSTANT_EMISSION = (1 << 27), - /* Needs to access attributes */ - SD_NEED_ATTRIBUTES = (1 << 28), - - SD_SHADER_FLAGS = (SD_USE_MIS | - SD_HAS_TRANSPARENT_SHADOW | - SD_HAS_VOLUME | - SD_HAS_ONLY_VOLUME | - SD_HETEROGENEOUS_VOLUME | - SD_HAS_BSSRDF_BUMP | - SD_VOLUME_EQUIANGULAR | - SD_VOLUME_MIS | - SD_VOLUME_CUBIC | - SD_HAS_BUMP | - SD_HAS_DISPLACEMENT | - SD_HAS_CONSTANT_EMISSION | - SD_NEED_ATTRIBUTES) + /* Runtime flags. */ + + /* Set when ray hits backside of surface. */ + SD_BACKFACING = (1 << 0), + /* Shader has non-zero emission. */ + SD_EMISSION = (1 << 1), + /* Shader has BSDF closure. */ + SD_BSDF = (1 << 2), + /* Shader has non-singular BSDF closure. */ + SD_BSDF_HAS_EVAL = (1 << 3), + /* Shader has BSSRDF closure. */ + SD_BSSRDF = (1 << 4), + /* Shader has holdout closure. */ + SD_HOLDOUT = (1 << 5), + /* Shader has non-zero volume extinction. */ + SD_EXTINCTION = (1 << 6), + /* Shader has have volume phase (scatter) closure. */ + SD_SCATTER = (1 << 7), + /* Shader has transparent closure. */ + SD_TRANSPARENT = (1 << 9), + /* BSDF requires LCG for evaluation. */ + SD_BSDF_NEEDS_LCG = (1 << 10), + + SD_CLOSURE_FLAGS = (SD_EMISSION | SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSSRDF | SD_HOLDOUT | + SD_EXTINCTION | SD_SCATTER | SD_BSDF_NEEDS_LCG), + + /* Shader flags. */ + + /* direct light sample */ + SD_USE_MIS = (1 << 16), + /* Has transparent shadow. */ + SD_HAS_TRANSPARENT_SHADOW = (1 << 17), + /* Has volume shader. */ + SD_HAS_VOLUME = (1 << 18), + /* Has only volume shader, no surface. */ + SD_HAS_ONLY_VOLUME = (1 << 19), + /* Has heterogeneous volume. */ + SD_HETEROGENEOUS_VOLUME = (1 << 20), + /* BSSRDF normal uses bump. */ + SD_HAS_BSSRDF_BUMP = (1 << 21), + /* Use equiangular volume sampling */ + SD_VOLUME_EQUIANGULAR = (1 << 22), + /* Use multiple importance volume sampling. */ + SD_VOLUME_MIS = (1 << 23), + /* Use cubic interpolation for voxels. */ + SD_VOLUME_CUBIC = (1 << 24), + /* Has data connected to the displacement input or uses bump map. */ + SD_HAS_BUMP = (1 << 25), + /* Has true displacement. */ + SD_HAS_DISPLACEMENT = (1 << 26), + /* Has constant emission (value stored in __shaders) */ + SD_HAS_CONSTANT_EMISSION = (1 << 27), + /* Needs to access attributes */ + SD_NEED_ATTRIBUTES = (1 << 28), + + SD_SHADER_FLAGS = (SD_USE_MIS | SD_HAS_TRANSPARENT_SHADOW | SD_HAS_VOLUME | SD_HAS_ONLY_VOLUME | + SD_HETEROGENEOUS_VOLUME | SD_HAS_BSSRDF_BUMP | SD_VOLUME_EQUIANGULAR | + SD_VOLUME_MIS | SD_VOLUME_CUBIC | SD_HAS_BUMP | SD_HAS_DISPLACEMENT | + SD_HAS_CONSTANT_EMISSION | SD_NEED_ATTRIBUTES) }; - /* Object flags. */ +/* Object flags. */ enum ShaderDataObjectFlag { - /* Holdout for camera rays. */ - SD_OBJECT_HOLDOUT_MASK = (1 << 0), - /* Has object motion blur. */ - SD_OBJECT_MOTION = (1 << 1), - /* Vertices have transform applied. */ - SD_OBJECT_TRANSFORM_APPLIED = (1 << 2), - /* Vertices have negative scale applied. */ - SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3), - /* Object has a volume shader. */ - SD_OBJECT_HAS_VOLUME = (1 << 4), - /* Object intersects AABB of an object with volume shader. */ - SD_OBJECT_INTERSECTS_VOLUME = (1 << 5), - /* Has position for motion vertices. */ - SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6), - /* object is used to catch shadows */ - SD_OBJECT_SHADOW_CATCHER = (1 << 7), - /* object has volume attributes */ - SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8), - - SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | - SD_OBJECT_MOTION | - SD_OBJECT_TRANSFORM_APPLIED | - SD_OBJECT_NEGATIVE_SCALE_APPLIED | - SD_OBJECT_HAS_VOLUME | - SD_OBJECT_INTERSECTS_VOLUME | - SD_OBJECT_SHADOW_CATCHER | - SD_OBJECT_HAS_VOLUME_ATTRIBUTES) + /* Holdout for camera rays. */ + SD_OBJECT_HOLDOUT_MASK = (1 << 0), + /* Has object motion blur. */ + SD_OBJECT_MOTION = (1 << 1), + /* Vertices have transform applied. */ + SD_OBJECT_TRANSFORM_APPLIED = (1 << 2), + /* Vertices have negative scale applied. */ + SD_OBJECT_NEGATIVE_SCALE_APPLIED = (1 << 3), + /* Object has a volume shader. */ + SD_OBJECT_HAS_VOLUME = (1 << 4), + /* Object intersects AABB of an object with volume shader. */ + SD_OBJECT_INTERSECTS_VOLUME = (1 << 5), + /* Has position for motion vertices. */ + SD_OBJECT_HAS_VERTEX_MOTION = (1 << 6), + /* object is used to catch shadows */ + SD_OBJECT_SHADOW_CATCHER = (1 << 7), + /* object has volume attributes */ + SD_OBJECT_HAS_VOLUME_ATTRIBUTES = (1 << 8), + + SD_OBJECT_FLAGS = (SD_OBJECT_HOLDOUT_MASK | SD_OBJECT_MOTION | SD_OBJECT_TRANSFORM_APPLIED | + SD_OBJECT_NEGATIVE_SCALE_APPLIED | SD_OBJECT_HAS_VOLUME | + SD_OBJECT_INTERSECTS_VOLUME | SD_OBJECT_SHADOW_CATCHER | + SD_OBJECT_HAS_VOLUME_ATTRIBUTES) }; typedef ccl_addr_space struct ShaderData { - /* position */ - float3 P; - /* smooth normal for shading */ - float3 N; - /* true geometric normal */ - float3 Ng; - /* view/incoming direction */ - float3 I; - /* shader id */ - int shader; - /* booleans describing shader, see ShaderDataFlag */ - int flag; - /* booleans describing object of the shader, see ShaderDataObjectFlag */ - int object_flag; - - /* primitive id if there is one, ~0 otherwise */ - int prim; - - /* combined type and curve segment for hair */ - int type; - - /* parametric coordinates - * - barycentric weights for triangles */ - float u; - float v; - /* object id if there is one, ~0 otherwise */ - int object; - /* lamp id if there is one, ~0 otherwise */ - int lamp; - - /* motion blur sample time */ - float time; - - /* length of the ray being shaded */ - float ray_length; + /* position */ + float3 P; + /* smooth normal for shading */ + float3 N; + /* true geometric normal */ + float3 Ng; + /* view/incoming direction */ + float3 I; + /* shader id */ + int shader; + /* booleans describing shader, see ShaderDataFlag */ + int flag; + /* booleans describing object of the shader, see ShaderDataObjectFlag */ + int object_flag; + + /* primitive id if there is one, ~0 otherwise */ + int prim; + + /* combined type and curve segment for hair */ + int type; + + /* parametric coordinates + * - barycentric weights for triangles */ + float u; + float v; + /* object id if there is one, ~0 otherwise */ + int object; + /* lamp id if there is one, ~0 otherwise */ + int lamp; + + /* motion blur sample time */ + float time; + + /* length of the ray being shaded */ + float ray_length; #ifdef __RAY_DIFFERENTIALS__ - /* differential of P. these are orthogonal to Ng, not N */ - differential3 dP; - /* differential of I */ - differential3 dI; - /* differential of u, v */ - differential du; - differential dv; + /* differential of P. these are orthogonal to Ng, not N */ + differential3 dP; + /* differential of I */ + differential3 dI; + /* differential of u, v */ + differential du; + differential dv; #endif #ifdef __DPDU__ - /* differential of P w.r.t. parametric coordinates. note that dPdu is - * not readily suitable as a tangent for shading on triangles. */ - float3 dPdu; - float3 dPdv; + /* differential of P w.r.t. parametric coordinates. note that dPdu is + * not readily suitable as a tangent for shading on triangles. */ + float3 dPdu; + float3 dPdv; #endif #ifdef __OBJECT_MOTION__ - /* object <-> world space transformations, cached to avoid - * re-interpolating them constantly for shading */ - Transform ob_tfm; - Transform ob_itfm; + /* object <-> world space transformations, cached to avoid + * re-interpolating them constantly for shading */ + Transform ob_tfm; + Transform ob_itfm; #endif - /* ray start position, only set for backgrounds */ - float3 ray_P; - differential3 ray_dP; + /* ray start position, only set for backgrounds */ + float3 ray_P; + differential3 ray_dP; #ifdef __OSL__ - struct KernelGlobals *osl_globals; - struct PathState *osl_path_state; + struct KernelGlobals *osl_globals; + struct PathState *osl_path_state; #endif - /* LCG state for closures that require additional random numbers. */ - uint lcg_state; + /* LCG state for closures that require additional random numbers. */ + uint lcg_state; - /* Closure data, we store a fixed array of closures */ - int num_closure; - int num_closure_left; - float randb_closure; - float3 svm_closure_weight; + /* Closure data, we store a fixed array of closures */ + int num_closure; + int num_closure_left; + float randb_closure; + float3 svm_closure_weight; - /* Closure weights summed directly, so we can evaluate - * emission and shadow transparency with MAX_CLOSURE 0. */ - float3 closure_emission_background; - float3 closure_transparent_extinction; + /* Closure weights summed directly, so we can evaluate + * emission and shadow transparency with MAX_CLOSURE 0. */ + float3 closure_emission_background; + float3 closure_transparent_extinction; - /* At the end so we can adjust size in ShaderDataTinyStorage. */ - struct ShaderClosure closure[MAX_CLOSURE]; + /* At the end so we can adjust size in ShaderDataTinyStorage. */ + struct ShaderClosure closure[MAX_CLOSURE]; } ShaderData; typedef ccl_addr_space struct ShaderDataTinyStorage { - char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE]; + char pad[sizeof(ShaderData) - sizeof(ShaderClosure) * MAX_CLOSURE]; } ShaderDataTinyStorage; -#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData*)shader_data_tiny_storage) +#define AS_SHADER_DATA(shader_data_tiny_storage) ((ShaderData *)shader_data_tiny_storage) /* Path State */ #ifdef __VOLUME__ typedef struct VolumeStack { - int object; - int shader; + int object; + int shader; } VolumeStack; #endif typedef struct PathState { - /* see enum PathRayFlag */ - int flag; - - /* random number generator state */ - uint rng_hash; /* per pixel hash */ - int rng_offset; /* dimension offset */ - int sample; /* path sample number */ - int num_samples; /* total number of times this path will be sampled */ - float branch_factor; /* number of branches in indirect paths */ - - /* bounce counting */ - int bounce; - int diffuse_bounce; - int glossy_bounce; - int transmission_bounce; - int transparent_bounce; + /* see enum PathRayFlag */ + int flag; + + /* random number generator state */ + uint rng_hash; /* per pixel hash */ + int rng_offset; /* dimension offset */ + int sample; /* path sample number */ + int num_samples; /* total number of times this path will be sampled */ + float branch_factor; /* number of branches in indirect paths */ + + /* bounce counting */ + int bounce; + int diffuse_bounce; + int glossy_bounce; + int transmission_bounce; + int transparent_bounce; #ifdef __DENOISING_FEATURES__ - float denoising_feature_weight; -#endif /* __DENOISING_FEATURES__ */ + float denoising_feature_weight; +#endif /* __DENOISING_FEATURES__ */ - /* multiple importance sampling */ - float min_ray_pdf; /* smallest bounce pdf over entire path up to now */ - float ray_pdf; /* last bounce pdf */ + /* multiple importance sampling */ + float min_ray_pdf; /* smallest bounce pdf over entire path up to now */ + float ray_pdf; /* last bounce pdf */ #ifdef __LAMP_MIS__ - float ray_t; /* accumulated distance through transparent surfaces */ + float ray_t; /* accumulated distance through transparent surfaces */ #endif - /* volume rendering */ + /* volume rendering */ #ifdef __VOLUME__ - int volume_bounce; - int volume_bounds_bounce; - VolumeStack volume_stack[VOLUME_STACK_SIZE]; + int volume_bounce; + int volume_bounds_bounce; + VolumeStack volume_stack[VOLUME_STACK_SIZE]; #endif } PathState; /* Struct to gather multiple nearby intersections. */ typedef struct LocalIntersection { - Ray ray; - float3 weight[LOCAL_MAX_HITS]; + Ray ray; + float3 weight[LOCAL_MAX_HITS]; - int num_hits; - struct Intersection hits[LOCAL_MAX_HITS]; - float3 Ng[LOCAL_MAX_HITS]; + int num_hits; + struct Intersection hits[LOCAL_MAX_HITS]; + float3 Ng[LOCAL_MAX_HITS]; } LocalIntersection; /* Subsurface */ /* Struct to gather SSS indirect rays and delay tracing them. */ typedef struct SubsurfaceIndirectRays { - PathState state[BSSRDF_MAX_HITS]; + PathState state[BSSRDF_MAX_HITS]; - int num_rays; + int num_rays; - struct Ray rays[BSSRDF_MAX_HITS]; - float3 throughputs[BSSRDF_MAX_HITS]; - struct PathRadianceState L_state[BSSRDF_MAX_HITS]; + struct Ray rays[BSSRDF_MAX_HITS]; + float3 throughputs[BSSRDF_MAX_HITS]; + struct PathRadianceState L_state[BSSRDF_MAX_HITS]; } SubsurfaceIndirectRays; static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high."); @@ -1109,424 +1085,424 @@ static_assert(BSSRDF_MAX_HITS <= LOCAL_MAX_HITS, "BSSRDF hits too high."); * do not use float3 because its size may not be the same on all devices. */ typedef struct KernelCamera { - /* type */ - int type; - - /* panorama */ - int panorama_type; - float fisheye_fov; - float fisheye_lens; - float4 equirectangular_range; - - /* stereo */ - float interocular_offset; - float convergence_distance; - float pole_merge_angle_from; - float pole_merge_angle_to; - - /* matrices */ - Transform cameratoworld; - ProjectionTransform rastertocamera; - - /* differentials */ - float4 dx; - float4 dy; - - /* depth of field */ - float aperturesize; - float blades; - float bladesrotation; - float focaldistance; - - /* motion blur */ - float shuttertime; - int num_motion_steps, have_perspective_motion; - - /* clipping */ - float nearclip; - float cliplength; - - /* sensor size */ - float sensorwidth; - float sensorheight; - - /* render size */ - float width, height; - int resolution; - - /* anamorphic lens bokeh */ - float inv_aperture_ratio; - - int is_inside_volume; - - /* more matrices */ - ProjectionTransform screentoworld; - ProjectionTransform rastertoworld; - ProjectionTransform ndctoworld; - ProjectionTransform worldtoscreen; - ProjectionTransform worldtoraster; - ProjectionTransform worldtondc; - Transform worldtocamera; - - /* Stores changes in the projeciton matrix. Use for camera zoom motion - * blur and motion pass output for perspective camera. */ - ProjectionTransform perspective_pre; - ProjectionTransform perspective_post; - - /* Transforms for motion pass. */ - Transform motion_pass_pre; - Transform motion_pass_post; - - int shutter_table_offset; - - /* Rolling shutter */ - int rolling_shutter_type; - float rolling_shutter_duration; - - int pad; + /* type */ + int type; + + /* panorama */ + int panorama_type; + float fisheye_fov; + float fisheye_lens; + float4 equirectangular_range; + + /* stereo */ + float interocular_offset; + float convergence_distance; + float pole_merge_angle_from; + float pole_merge_angle_to; + + /* matrices */ + Transform cameratoworld; + ProjectionTransform rastertocamera; + + /* differentials */ + float4 dx; + float4 dy; + + /* depth of field */ + float aperturesize; + float blades; + float bladesrotation; + float focaldistance; + + /* motion blur */ + float shuttertime; + int num_motion_steps, have_perspective_motion; + + /* clipping */ + float nearclip; + float cliplength; + + /* sensor size */ + float sensorwidth; + float sensorheight; + + /* render size */ + float width, height; + int resolution; + + /* anamorphic lens bokeh */ + float inv_aperture_ratio; + + int is_inside_volume; + + /* more matrices */ + ProjectionTransform screentoworld; + ProjectionTransform rastertoworld; + ProjectionTransform ndctoworld; + ProjectionTransform worldtoscreen; + ProjectionTransform worldtoraster; + ProjectionTransform worldtondc; + Transform worldtocamera; + + /* Stores changes in the projeciton matrix. Use for camera zoom motion + * blur and motion pass output for perspective camera. */ + ProjectionTransform perspective_pre; + ProjectionTransform perspective_post; + + /* Transforms for motion pass. */ + Transform motion_pass_pre; + Transform motion_pass_post; + + int shutter_table_offset; + + /* Rolling shutter */ + int rolling_shutter_type; + float rolling_shutter_duration; + + int pad; } KernelCamera; static_assert_align(KernelCamera, 16); typedef struct KernelFilm { - float exposure; - int pass_flag; - int light_pass_flag; - int pass_stride; - int use_light_pass; - - int pass_combined; - int pass_depth; - int pass_normal; - int pass_motion; - - int pass_motion_weight; - int pass_uv; - int pass_object_id; - int pass_material_id; - - int pass_diffuse_color; - int pass_glossy_color; - int pass_transmission_color; - int pass_subsurface_color; - - int pass_diffuse_indirect; - int pass_glossy_indirect; - int pass_transmission_indirect; - int pass_subsurface_indirect; - int pass_volume_indirect; - - int pass_diffuse_direct; - int pass_glossy_direct; - int pass_transmission_direct; - int pass_subsurface_direct; - int pass_volume_direct; - - int pass_emission; - int pass_background; - int pass_ao; - float pass_alpha_threshold; - - int pass_shadow; - float pass_shadow_scale; - int filter_table_offset; - int cryptomatte_passes; - int cryptomatte_depth; - int pass_cryptomatte; - - int pass_mist; - float mist_start; - float mist_inv_depth; - float mist_falloff; - - int pass_denoising_data; - int pass_denoising_clean; - int denoising_flags; - - /* XYZ to rendering color space transform. float4 instead of float3 to - * ensure consistent padding/alignment across devices. */ - float4 xyz_to_r; - float4 xyz_to_g; - float4 xyz_to_b; - float4 rgb_to_y; + float exposure; + int pass_flag; + int light_pass_flag; + int pass_stride; + int use_light_pass; + + int pass_combined; + int pass_depth; + int pass_normal; + int pass_motion; + + int pass_motion_weight; + int pass_uv; + int pass_object_id; + int pass_material_id; + + int pass_diffuse_color; + int pass_glossy_color; + int pass_transmission_color; + int pass_subsurface_color; + + int pass_diffuse_indirect; + int pass_glossy_indirect; + int pass_transmission_indirect; + int pass_subsurface_indirect; + int pass_volume_indirect; + + int pass_diffuse_direct; + int pass_glossy_direct; + int pass_transmission_direct; + int pass_subsurface_direct; + int pass_volume_direct; + + int pass_emission; + int pass_background; + int pass_ao; + float pass_alpha_threshold; + + int pass_shadow; + float pass_shadow_scale; + int filter_table_offset; + int cryptomatte_passes; + int cryptomatte_depth; + int pass_cryptomatte; + + int pass_mist; + float mist_start; + float mist_inv_depth; + float mist_falloff; + + int pass_denoising_data; + int pass_denoising_clean; + int denoising_flags; + + /* XYZ to rendering color space transform. float4 instead of float3 to + * ensure consistent padding/alignment across devices. */ + float4 xyz_to_r; + float4 xyz_to_g; + float4 xyz_to_b; + float4 rgb_to_y; #ifdef __KERNEL_DEBUG__ - int pass_bvh_traversed_nodes; - int pass_bvh_traversed_instances; - int pass_bvh_intersections; - int pass_ray_bounces; + int pass_bvh_traversed_nodes; + int pass_bvh_traversed_instances; + int pass_bvh_intersections; + int pass_ray_bounces; #endif } KernelFilm; static_assert_align(KernelFilm, 16); typedef struct KernelBackground { - /* only shader index */ - int surface_shader; - int volume_shader; - int transparent; - float transparent_roughness_squared_threshold; - - /* ambient occlusion */ - float ao_factor; - float ao_distance; - float ao_bounces_factor; - float ao_pad; + /* only shader index */ + int surface_shader; + int volume_shader; + int transparent; + float transparent_roughness_squared_threshold; + + /* ambient occlusion */ + float ao_factor; + float ao_distance; + float ao_bounces_factor; + float ao_pad; } KernelBackground; static_assert_align(KernelBackground, 16); typedef struct KernelIntegrator { - /* emission */ - int use_direct_light; - int use_ambient_occlusion; - int num_distribution; - int num_all_lights; - float pdf_triangles; - float pdf_lights; - int pdf_background_res_x; - int pdf_background_res_y; - float light_inv_rr_threshold; - - /* light portals */ - float portal_pdf; - int num_portals; - int portal_offset; - - /* bounces */ - int max_bounce; - - int max_diffuse_bounce; - int max_glossy_bounce; - int max_transmission_bounce; - int max_volume_bounce; - - int ao_bounces; - - /* transparent */ - int transparent_max_bounce; - int transparent_shadows; - - /* caustics */ - int caustics_reflective; - int caustics_refractive; - float filter_glossy; - - /* seed */ - int seed; - - /* clamp */ - float sample_clamp_direct; - float sample_clamp_indirect; - - /* branched path */ - int branched; - int volume_decoupled; - int diffuse_samples; - int glossy_samples; - int transmission_samples; - int ao_samples; - int mesh_light_samples; - int subsurface_samples; - int sample_all_lights_direct; - int sample_all_lights_indirect; - - /* mis */ - int use_lamp_mis; - - /* sampler */ - int sampling_pattern; - int aa_samples; - - /* volume render */ - int use_volumes; - int volume_max_steps; - float volume_step_size; - int volume_samples; - - int start_sample; - - int max_closures; - - int pad1, pad2, pad3; + /* emission */ + int use_direct_light; + int use_ambient_occlusion; + int num_distribution; + int num_all_lights; + float pdf_triangles; + float pdf_lights; + int pdf_background_res_x; + int pdf_background_res_y; + float light_inv_rr_threshold; + + /* light portals */ + float portal_pdf; + int num_portals; + int portal_offset; + + /* bounces */ + int max_bounce; + + int max_diffuse_bounce; + int max_glossy_bounce; + int max_transmission_bounce; + int max_volume_bounce; + + int ao_bounces; + + /* transparent */ + int transparent_max_bounce; + int transparent_shadows; + + /* caustics */ + int caustics_reflective; + int caustics_refractive; + float filter_glossy; + + /* seed */ + int seed; + + /* clamp */ + float sample_clamp_direct; + float sample_clamp_indirect; + + /* branched path */ + int branched; + int volume_decoupled; + int diffuse_samples; + int glossy_samples; + int transmission_samples; + int ao_samples; + int mesh_light_samples; + int subsurface_samples; + int sample_all_lights_direct; + int sample_all_lights_indirect; + + /* mis */ + int use_lamp_mis; + + /* sampler */ + int sampling_pattern; + int aa_samples; + + /* volume render */ + int use_volumes; + int volume_max_steps; + float volume_step_size; + int volume_samples; + + int start_sample; + + int max_closures; + + int pad1, pad2, pad3; } KernelIntegrator; static_assert_align(KernelIntegrator, 16); typedef enum KernelBVHLayout { - BVH_LAYOUT_NONE = 0, - - BVH_LAYOUT_BVH2 = (1 << 0), - BVH_LAYOUT_BVH4 = (1 << 1), - BVH_LAYOUT_BVH8 = (1 << 2), - BVH_LAYOUT_EMBREE = (1 << 3), - BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8, - BVH_LAYOUT_ALL = (unsigned int)(-1), + BVH_LAYOUT_NONE = 0, + + BVH_LAYOUT_BVH2 = (1 << 0), + BVH_LAYOUT_BVH4 = (1 << 1), + BVH_LAYOUT_BVH8 = (1 << 2), + BVH_LAYOUT_EMBREE = (1 << 3), + BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8, + BVH_LAYOUT_ALL = (unsigned int)(-1), } KernelBVHLayout; typedef struct KernelBVH { - /* Own BVH */ - int root; - int have_motion; - int have_curves; - int have_instancing; - int bvh_layout; - int use_bvh_steps; - - /* Embree */ + /* Own BVH */ + int root; + int have_motion; + int have_curves; + int have_instancing; + int bvh_layout; + int use_bvh_steps; + + /* Embree */ #ifdef __EMBREE__ - RTCScene scene; + RTCScene scene; # ifndef __KERNEL_64_BIT__ - int pad1; + int pad1; # endif #else - int pad1, pad2; + int pad1, pad2; #endif } KernelBVH; static_assert_align(KernelBVH, 16); typedef enum CurveFlag { - /* runtime flags */ - CURVE_KN_BACKFACING = 1, /* backside of cylinder? */ - CURVE_KN_ENCLOSEFILTER = 2, /* don't consider strands surrounding start point? */ - CURVE_KN_INTERPOLATE = 4, /* render as a curve? */ - CURVE_KN_ACCURATE = 8, /* use accurate intersections test? */ - CURVE_KN_INTERSECTCORRECTION = 16, /* correct for width after determing closest midpoint? */ - CURVE_KN_TRUETANGENTGNORMAL = 32, /* use tangent normal for geometry? */ - CURVE_KN_RIBBONS = 64, /* use flat curve ribbons */ + /* runtime flags */ + CURVE_KN_BACKFACING = 1, /* backside of cylinder? */ + CURVE_KN_ENCLOSEFILTER = 2, /* don't consider strands surrounding start point? */ + CURVE_KN_INTERPOLATE = 4, /* render as a curve? */ + CURVE_KN_ACCURATE = 8, /* use accurate intersections test? */ + CURVE_KN_INTERSECTCORRECTION = 16, /* correct for width after determing closest midpoint? */ + CURVE_KN_TRUETANGENTGNORMAL = 32, /* use tangent normal for geometry? */ + CURVE_KN_RIBBONS = 64, /* use flat curve ribbons */ } CurveFlag; typedef struct KernelCurves { - int curveflags; - int subdivisions; + int curveflags; + int subdivisions; - float minimum_width; - float maximum_width; + float minimum_width; + float maximum_width; } KernelCurves; static_assert_align(KernelCurves, 16); typedef struct KernelTables { - int beckmann_offset; - int pad1, pad2, pad3; + int beckmann_offset; + int pad1, pad2, pad3; } KernelTables; static_assert_align(KernelTables, 16); typedef struct KernelData { - KernelCamera cam; - KernelFilm film; - KernelBackground background; - KernelIntegrator integrator; - KernelBVH bvh; - KernelCurves curve; - KernelTables tables; + KernelCamera cam; + KernelFilm film; + KernelBackground background; + KernelIntegrator integrator; + KernelBVH bvh; + KernelCurves curve; + KernelTables tables; } KernelData; static_assert_align(KernelData, 16); /* Kernel data structures. */ typedef struct KernelObject { - Transform tfm; - Transform itfm; + Transform tfm; + Transform itfm; - float surface_area; - float pass_id; - float random_number; - int particle_index; + float surface_area; + float pass_id; + float random_number; + int particle_index; - float dupli_generated[3]; - float dupli_uv[2]; + float dupli_generated[3]; + float dupli_uv[2]; - int numkeys; - int numsteps; - int numverts; + int numkeys; + int numsteps; + int numverts; - uint patch_map_offset; - uint attribute_map_offset; - uint motion_offset; - uint pad1; + uint patch_map_offset; + uint attribute_map_offset; + uint motion_offset; + uint pad1; - float cryptomatte_object; - float cryptomatte_asset; - float pad2, pad3; + float cryptomatte_object; + float cryptomatte_asset; + float pad2, pad3; } KernelObject; static_assert_align(KernelObject, 16); typedef struct KernelSpotLight { - float radius; - float invarea; - float spot_angle; - float spot_smooth; - float dir[3]; - float pad; + float radius; + float invarea; + float spot_angle; + float spot_smooth; + float dir[3]; + float pad; } KernelSpotLight; /* PointLight is SpotLight with only radius and invarea being used. */ typedef struct KernelAreaLight { - float axisu[3]; - float invarea; - float axisv[3]; - float pad1; - float dir[3]; - float pad2; + float axisu[3]; + float invarea; + float axisv[3]; + float pad1; + float dir[3]; + float pad2; } KernelAreaLight; typedef struct KernelDistantLight { - float radius; - float cosangle; - float invarea; - float pad; + float radius; + float cosangle; + float invarea; + float pad; } KernelDistantLight; typedef struct KernelLight { - int type; - float co[3]; - int shader_id; - int samples; - float max_bounces; - float random; - Transform tfm; - Transform itfm; - union { - KernelSpotLight spot; - KernelAreaLight area; - KernelDistantLight distant; - }; + int type; + float co[3]; + int shader_id; + int samples; + float max_bounces; + float random; + Transform tfm; + Transform itfm; + union { + KernelSpotLight spot; + KernelAreaLight area; + KernelDistantLight distant; + }; } KernelLight; static_assert_align(KernelLight, 16); typedef struct KernelLightDistribution { - float totarea; - int prim; - union { - struct { - int shader_flag; - int object_id; - } mesh_light; - struct { - float pad; - float size; - } lamp; - }; + float totarea; + int prim; + union { + struct { + int shader_flag; + int object_id; + } mesh_light; + struct { + float pad; + float size; + } lamp; + }; } KernelLightDistribution; static_assert_align(KernelLightDistribution, 16); typedef struct KernelParticle { - int index; - float age; - float lifetime; - float size; - float4 rotation; - /* Only xyz are used of the following. float4 instead of float3 are used - * to ensure consistent padding/alignment across devices. */ - float4 location; - float4 velocity; - float4 angular_velocity; + int index; + float age; + float lifetime; + float size; + float4 rotation; + /* Only xyz are used of the following. float4 instead of float3 are used + * to ensure consistent padding/alignment across devices. */ + float4 location; + float4 velocity; + float4 angular_velocity; } KernelParticle; static_assert_align(KernelParticle, 16); typedef struct KernelShader { - float constant_emission[3]; - float cryptomatte_id; - int flags; - int pass_id; - int pad2, pad3; + float constant_emission[3]; + float cryptomatte_id; + int flags; + int pass_id; + int pad2, pad3; } KernelShader; static_assert_align(KernelShader, 16); @@ -1545,88 +1521,93 @@ static_assert_align(KernelShader, 16); /* Queue names */ enum QueueNumber { - /* All active rays and regenerated rays are enqueued here. */ - QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0, - - /* All - * 1. Background-hit rays, - * 2. Rays that has exited path-iteration but needs to update output buffer - * 3. Rays to be regenerated - * are enqueued here. - */ - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - - /* All rays for which a shadow ray should be cast to determine radiance - * contribution for AO are enqueued here. - */ - QUEUE_SHADOW_RAY_CAST_AO_RAYS, - - /* All rays for which a shadow ray should be cast to determine radiance - * contributing for direct lighting are enqueued here. - */ - QUEUE_SHADOW_RAY_CAST_DL_RAYS, - - /* Rays sorted according to shader->id */ - QUEUE_SHADER_SORTED_RAYS, + /* All active rays and regenerated rays are enqueued here. */ + QUEUE_ACTIVE_AND_REGENERATED_RAYS = 0, + + /* All + * 1. Background-hit rays, + * 2. Rays that has exited path-iteration but needs to update output buffer + * 3. Rays to be regenerated + * are enqueued here. + */ + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + + /* All rays for which a shadow ray should be cast to determine radiance + * contribution for AO are enqueued here. + */ + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + + /* All rays for which a shadow ray should be cast to determine radiance + * contributing for direct lighting are enqueued here. + */ + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + + /* Rays sorted according to shader->id */ + QUEUE_SHADER_SORTED_RAYS, #ifdef __BRANCHED_PATH__ - /* All rays moving to next iteration of the indirect loop for light */ - QUEUE_LIGHT_INDIRECT_ITER, - /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */ - QUEUE_INACTIVE_RAYS, + /* All rays moving to next iteration of the indirect loop for light */ + QUEUE_LIGHT_INDIRECT_ITER, + /* Queue of all inactive rays. These are candidates for sharing work of indirect loops */ + QUEUE_INACTIVE_RAYS, # ifdef __VOLUME__ - /* All rays moving to next iteration of the indirect loop for volumes */ - QUEUE_VOLUME_INDIRECT_ITER, + /* All rays moving to next iteration of the indirect loop for volumes */ + QUEUE_VOLUME_INDIRECT_ITER, # endif # ifdef __SUBSURFACE__ - /* All rays moving to next iteration of the indirect loop for subsurface */ - QUEUE_SUBSURFACE_INDIRECT_ITER, + /* All rays moving to next iteration of the indirect loop for subsurface */ + QUEUE_SUBSURFACE_INDIRECT_ITER, # endif -#endif /* __BRANCHED_PATH__ */ +#endif /* __BRANCHED_PATH__ */ - NUM_QUEUES + NUM_QUEUES }; /* We use RAY_STATE_MASK to get ray_state */ #define RAY_STATE_MASK 0x0F #define RAY_FLAG_MASK 0xF0 enum RayState { - RAY_INVALID = 0, - /* Denotes ray is actively involved in path-iteration. */ - RAY_ACTIVE, - /* Denotes ray has completed processing all samples and is inactive. */ - RAY_INACTIVE, - /* Denotes ray has exited path-iteration and needs to update output buffer. */ - RAY_UPDATE_BUFFER, - /* Denotes ray needs to skip most surface shader work. */ - RAY_HAS_ONLY_VOLUME, - /* Donotes ray has hit background */ - RAY_HIT_BACKGROUND, - /* Denotes ray has to be regenerated */ - RAY_TO_REGENERATE, - /* Denotes ray has been regenerated */ - RAY_REGENERATED, - /* Denotes ray is moving to next iteration of the branched indirect loop */ - RAY_LIGHT_INDIRECT_NEXT_ITER, - RAY_VOLUME_INDIRECT_NEXT_ITER, - RAY_SUBSURFACE_INDIRECT_NEXT_ITER, - - /* Ray flags */ - - /* Flags to denote that the ray is currently evaluating the branched indirect loop */ - RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4), - RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5), - RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6), - RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT), - - /* Ray is evaluating an iteration of an indirect loop for another thread */ - RAY_BRANCHED_INDIRECT_SHARED = (1 << 7), + RAY_INVALID = 0, + /* Denotes ray is actively involved in path-iteration. */ + RAY_ACTIVE, + /* Denotes ray has completed processing all samples and is inactive. */ + RAY_INACTIVE, + /* Denotes ray has exited path-iteration and needs to update output buffer. */ + RAY_UPDATE_BUFFER, + /* Denotes ray needs to skip most surface shader work. */ + RAY_HAS_ONLY_VOLUME, + /* Donotes ray has hit background */ + RAY_HIT_BACKGROUND, + /* Denotes ray has to be regenerated */ + RAY_TO_REGENERATE, + /* Denotes ray has been regenerated */ + RAY_REGENERATED, + /* Denotes ray is moving to next iteration of the branched indirect loop */ + RAY_LIGHT_INDIRECT_NEXT_ITER, + RAY_VOLUME_INDIRECT_NEXT_ITER, + RAY_SUBSURFACE_INDIRECT_NEXT_ITER, + + /* Ray flags */ + + /* Flags to denote that the ray is currently evaluating the branched indirect loop */ + RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4), + RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5), + RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6), + RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | + RAY_BRANCHED_SUBSURFACE_INDIRECT), + + /* Ray is evaluating an iteration of an indirect loop for another thread */ + RAY_BRANCHED_INDIRECT_SHARED = (1 << 7), }; -#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state)) -#define IS_STATE(ray_state, ray_index, state) ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state)) -#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag)) -#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag))) +#define ASSIGN_RAY_STATE(ray_state, ray_index, state) \ + (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state)) +#define IS_STATE(ray_state, ray_index, state) \ + ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state)) +#define ADD_RAY_FLAG(ray_state, ray_index, flag) \ + (ray_state[ray_index] = (ray_state[ray_index] | flag)) +#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) \ + (ray_state[ray_index] = (ray_state[ray_index] & (~flag))) #define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag) /* Patches */ @@ -1642,17 +1623,17 @@ enum RayState { /* Work Tiles */ typedef struct WorkTile { - uint x, y, w, h; + uint x, y, w, h; - uint start_sample; - uint num_samples; + uint start_sample; + uint num_samples; - uint offset; - uint stride; + uint offset; + uint stride; - ccl_global float *buffer; + ccl_global float *buffer; } WorkTile; CCL_NAMESPACE_END -#endif /* __KERNEL_TYPES_H__ */ +#endif /* __KERNEL_TYPES_H__ */ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 44c8f795d2c..e024003252f 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -19,9 +19,9 @@ CCL_NAMESPACE_BEGIN /* Events for probalistic scattering */ typedef enum VolumeIntegrateResult { - VOLUME_PATH_SCATTERED = 0, - VOLUME_PATH_ATTENUATED = 1, - VOLUME_PATH_MISSED = 2 + VOLUME_PATH_SCATTERED = 0, + VOLUME_PATH_ATTENUATED = 1, + VOLUME_PATH_MISSED = 2 } VolumeIntegrateResult; /* Volume shader properties @@ -30,9 +30,9 @@ typedef enum VolumeIntegrateResult { * sigma_t = sigma_a + sigma_s */ typedef struct VolumeShaderCoefficients { - float3 sigma_t; - float3 sigma_s; - float3 emission; + float3 sigma_t; + float3 sigma_s; + float3 emission; } VolumeShaderCoefficients; #ifdef __VOLUME__ @@ -44,16 +44,16 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg, float3 P, float3 *extinction) { - sd->P = P; - shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW); - - if(sd->flag & SD_EXTINCTION) { - *extinction = sd->closure_transparent_extinction; - return true; - } - else { - return false; - } + sd->P = P; + shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW); + + if (sd->flag & SD_EXTINCTION) { + *extinction = sd->closure_transparent_extinction; + return true; + } + else { + return false; + } } /* evaluate shader to get absorption, scattering and emission at P */ @@ -63,97 +63,97 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, float3 P, VolumeShaderCoefficients *coeff) { - sd->P = P; - shader_eval_volume(kg, sd, state, state->volume_stack, state->flag); + sd->P = P; + shader_eval_volume(kg, sd, state, state->volume_stack, state->flag); - if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION))) - return false; + if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) + return false; - coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f); - coeff->sigma_t = (sd->flag & SD_EXTINCTION)? sd->closure_transparent_extinction: - make_float3(0.0f, 0.0f, 0.0f); - coeff->emission = (sd->flag & SD_EMISSION)? sd->closure_emission_background: - make_float3(0.0f, 0.0f, 0.0f); + coeff->sigma_s = make_float3(0.0f, 0.0f, 0.0f); + coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : + make_float3(0.0f, 0.0f, 0.0f); + coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : + make_float3(0.0f, 0.0f, 0.0f); - if(sd->flag & SD_SCATTER) { - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; + if (sd->flag & SD_SCATTER) { + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_VOLUME(sc->type)) - coeff->sigma_s += sc->weight; - } - } + if (CLOSURE_IS_VOLUME(sc->type)) + coeff->sigma_s += sc->weight; + } + } - return true; + return true; } -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ ccl_device float3 volume_color_transmittance(float3 sigma, float t) { - return exp3(-sigma * t); + return exp3(-sigma * t); } ccl_device float kernel_volume_channel_get(float3 value, int channel) { - return (channel == 0)? value.x: ((channel == 1)? value.y: value.z); + return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z); } #ifdef __VOLUME__ ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack) { - for(int i = 0; stack[i].shader != SHADER_NONE; i++) { - int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags; - - if(shader_flag & SD_HETEROGENEOUS_VOLUME) { - return true; - } - else if(shader_flag & SD_NEED_ATTRIBUTES) { - /* We want to render world or objects without any volume grids - * as homogenous, but can only verify this at runtime since other - * heterogenous volume objects may be using the same shader. */ - int object = stack[i].object; - if(object != OBJECT_NONE) { - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) { - return true; - } - } - } - } - - return false; + for (int i = 0; stack[i].shader != SHADER_NONE; i++) { + int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags; + + if (shader_flag & SD_HETEROGENEOUS_VOLUME) { + return true; + } + else if (shader_flag & SD_NEED_ATTRIBUTES) { + /* We want to render world or objects without any volume grids + * as homogenous, but can only verify this at runtime since other + * heterogenous volume objects may be using the same shader. */ + int object = stack[i].object; + if (object != OBJECT_NONE) { + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_HAS_VOLUME_ATTRIBUTES) { + return true; + } + } + } + } + + return false; } ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stack) { - if(kernel_data.integrator.num_all_lights == 0) - return 0; + if (kernel_data.integrator.num_all_lights == 0) + return 0; - int method = -1; + int method = -1; - for(int i = 0; stack[i].shader != SHADER_NONE; i++) { - int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags; + for (int i = 0; stack[i].shader != SHADER_NONE; i++) { + int shader_flag = kernel_tex_fetch(__shaders, (stack[i].shader & SHADER_MASK)).flags; - if(shader_flag & SD_VOLUME_MIS) { - return SD_VOLUME_MIS; - } - else if(shader_flag & SD_VOLUME_EQUIANGULAR) { - if(method == 0) - return SD_VOLUME_MIS; + if (shader_flag & SD_VOLUME_MIS) { + return SD_VOLUME_MIS; + } + else if (shader_flag & SD_VOLUME_EQUIANGULAR) { + if (method == 0) + return SD_VOLUME_MIS; - method = SD_VOLUME_EQUIANGULAR; - } - else { - if(method == SD_VOLUME_EQUIANGULAR) - return SD_VOLUME_MIS; + method = SD_VOLUME_EQUIANGULAR; + } + else { + if (method == SD_VOLUME_EQUIANGULAR) + return SD_VOLUME_MIS; - method = 0; - } - } + method = 0; + } + } - return method; + return method; } ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg, @@ -162,16 +162,16 @@ ccl_device_inline void kernel_volume_step_init(KernelGlobals *kg, float *step_size, float *step_offset) { - const int max_steps = kernel_data.integrator.volume_max_steps; - float step = min(kernel_data.integrator.volume_step_size, t); + const int max_steps = kernel_data.integrator.volume_max_steps; + float step = min(kernel_data.integrator.volume_step_size, t); - /* compute exact steps in advance for malloc */ - if(t > max_steps * step) { - step = t / (float)max_steps; - } + /* compute exact steps in advance for malloc */ + if (t > max_steps * step) { + step = t / (float)max_steps; + } - *step_size = step; - *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step; + *step_size = step; + *step_offset = path_state_rng_1D_hash(kg, state, 0x1e31d8a4) * step; } /* Volume Shadows @@ -187,10 +187,10 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, ShaderData *sd, float3 *throughput) { - float3 sigma_t; + float3 sigma_t; - if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t)) - *throughput *= volume_color_transmittance(sigma_t, ray->t); + if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t)) + *throughput *= volume_color_transmittance(sigma_t, ray->t); } /* heterogeneous volume: integrate stepping through the volume until we @@ -201,57 +201,57 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, ShaderData *sd, float3 *throughput) { - float3 tp = *throughput; - const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ - - /* prepare for stepping */ - int max_steps = kernel_data.integrator.volume_max_steps; - float step_offset, step_size; - kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); - - /* compute extinction at the start */ - float t = 0.0f; - - float3 sum = make_float3(0.0f, 0.0f, 0.0f); - - for(int i = 0; i < max_steps; i++) { - /* advance to new position */ - float new_t = min(ray->t, (i+1) * step_size); - - /* use random position inside this segment to sample shader, adjust - * for last step that is shorter than other steps. */ - if(new_t == ray->t) { - step_offset *= (new_t - t) / step_size; - } - - float3 new_P = ray->P + ray->D * (t + step_offset); - float3 sigma_t; - - /* compute attenuation over segment */ - if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) { - /* Compute expf() only for every Nth step, to save some calculations - * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */ - - sum += (-sigma_t * (new_t - t)); - if((i & 0x07) == 0) { /* ToDo: Other interval? */ - tp = *throughput * exp3(sum); - - /* stop if nearly all light is blocked */ - if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) - break; - } - } - - /* stop if at the end of the volume */ - t = new_t; - if(t == ray->t) { - /* Update throughput in case we haven't done it above */ - tp = *throughput * exp3(sum); - break; - } - } - - *throughput = tp; + float3 tp = *throughput; + const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ + + /* prepare for stepping */ + int max_steps = kernel_data.integrator.volume_max_steps; + float step_offset, step_size; + kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); + + /* compute extinction at the start */ + float t = 0.0f; + + float3 sum = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < max_steps; i++) { + /* advance to new position */ + float new_t = min(ray->t, (i + 1) * step_size); + + /* use random position inside this segment to sample shader, adjust + * for last step that is shorter than other steps. */ + if (new_t == ray->t) { + step_offset *= (new_t - t) / step_size; + } + + float3 new_P = ray->P + ray->D * (t + step_offset); + float3 sigma_t; + + /* compute attenuation over segment */ + if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) { + /* Compute expf() only for every Nth step, to save some calculations + * because exp(a)*exp(b) = exp(a+b), also do a quick tp_eps check then. */ + + sum += (-sigma_t * (new_t - t)); + if ((i & 0x07) == 0) { /* ToDo: Other interval? */ + tp = *throughput * exp3(sum); + + /* stop if nearly all light is blocked */ + if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) + break; + } + } + + /* stop if at the end of the volume */ + t = new_t; + if (t == ray->t) { + /* Update throughput in case we haven't done it above */ + tp = *throughput * exp3(sum); + break; + } + } + + *throughput = tp; } /* get the volume attenuation over line segment defined by ray, with the @@ -262,422 +262,433 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, Ray *ray, float3 *throughput) { - shader_setup_from_volume(kg, shadow_sd, ray); + shader_setup_from_volume(kg, shadow_sd, ray); - if(volume_stack_is_heterogeneous(kg, state->volume_stack)) - kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput); - else - kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput); + if (volume_stack_is_heterogeneous(kg, state->volume_stack)) + kernel_volume_shadow_heterogeneous(kg, state, ray, shadow_sd, throughput); + else + kernel_volume_shadow_homogeneous(kg, state, ray, shadow_sd, throughput); } -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ /* Equi-angular sampling as in: * "Importance Sampling Techniques for Path Tracing in Participating Media" */ ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf) { - float t = ray->t; - - float delta = dot((light_P - ray->P) , ray->D); - float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); - if(UNLIKELY(D == 0.0f)) { - *pdf = 0.0f; - return 0.0f; - } - float theta_a = -atan2f(delta, D); - float theta_b = atan2f(t - delta, D); - float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); - if(UNLIKELY(theta_b == theta_a)) { - *pdf = 0.0f; - return 0.0f; - } - *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); - - return min(t, delta + t_); /* min is only for float precision errors */ + float t = ray->t; + + float delta = dot((light_P - ray->P), ray->D); + float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + if (UNLIKELY(D == 0.0f)) { + *pdf = 0.0f; + return 0.0f; + } + float theta_a = -atan2f(delta, D); + float theta_b = atan2f(t - delta, D); + float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); + if (UNLIKELY(theta_b == theta_a)) { + *pdf = 0.0f; + return 0.0f; + } + *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); + + return min(t, delta + t_); /* min is only for float precision errors */ } ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t) { - float delta = dot((light_P - ray->P) , ray->D); - float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); - if(UNLIKELY(D == 0.0f)) { - return 0.0f; - } + float delta = dot((light_P - ray->P), ray->D); + float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + if (UNLIKELY(D == 0.0f)) { + return 0.0f; + } - float t = ray->t; - float t_ = sample_t - delta; + float t = ray->t; + float t_ = sample_t - delta; - float theta_a = -atan2f(delta, D); - float theta_b = atan2f(t - delta, D); - if(UNLIKELY(theta_b == theta_a)) { - return 0.0f; - } + float theta_a = -atan2f(delta, D); + float theta_b = atan2f(t - delta, D); + if (UNLIKELY(theta_b == theta_a)) { + return 0.0f; + } - float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); + float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); - return pdf; + return pdf; } /* Distance sampling */ -ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf) +ccl_device float kernel_volume_distance_sample( + float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf) { - /* xi is [0, 1[ so log(0) should never happen, division by zero is - * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ - float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); - float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel); + /* xi is [0, 1[ so log(0) should never happen, division by zero is + * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ + float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel); - float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t); + float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t); - *transmittance = volume_color_transmittance(sigma_t, sample_t); - *pdf = safe_divide_color(sigma_t * *transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance); + *transmittance = volume_color_transmittance(sigma_t, sample_t); + *pdf = safe_divide_color(sigma_t * *transmittance, + make_float3(1.0f, 1.0f, 1.0f) - full_transmittance); - /* todo: optimization: when taken together with hit/miss decision, - * the full_transmittance cancels out drops out and xi does not - * need to be remapped */ + /* todo: optimization: when taken together with hit/miss decision, + * the full_transmittance cancels out drops out and xi does not + * need to be remapped */ - return sample_t; + return sample_t; } ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t) { - float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); - float3 transmittance = volume_color_transmittance(sigma_t, sample_t); + float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + float3 transmittance = volume_color_transmittance(sigma_t, sample_t); - return safe_divide_color(sigma_t * transmittance, make_float3(1.0f, 1.0f, 1.0f) - full_transmittance); + return safe_divide_color(sigma_t * transmittance, + make_float3(1.0f, 1.0f, 1.0f) - full_transmittance); } /* Emission */ -ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t) +ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, + int closure_flag, + float3 transmittance, + float t) { - /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t - * this goes to E * t as sigma_t goes to zero - * - * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ - float3 emission = coeff->emission; - - if(closure_flag & SD_EXTINCTION) { - float3 sigma_t = coeff->sigma_t; - - emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t; - emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t; - emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t; - } - else - emission *= t; - - return emission; + /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t + * this goes to E * t as sigma_t goes to zero + * + * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ + float3 emission = coeff->emission; + + if (closure_flag & SD_EXTINCTION) { + float3 sigma_t = coeff->sigma_t; + + emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t; + emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t; + emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t; + } + else + emission *= t; + + return emission; } /* Volume Path */ -ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf) +ccl_device int kernel_volume_sample_channel(float3 albedo, + float3 throughput, + float rand, + float3 *pdf) { - /* Sample color channel proportional to throughput and single scattering - * albedo, to significantly reduce noise with many bounce, following: - * - * "Practical and Controllable Subsurface Scattering for Production Path - * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ - float3 weights = fabs(throughput * albedo); - float sum_weights = weights.x + weights.y + weights.z; - float3 weights_pdf; - - if(sum_weights > 0.0f) { - weights_pdf = weights/sum_weights; - } - else { - weights_pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f); - } - - *pdf = weights_pdf; - - /* OpenCL does not support -> on float3, so don't use pdf->x. */ - if(rand < weights_pdf.x) { - return 0; - } - else if(rand < weights_pdf.x + weights_pdf.y) { - return 1; - } - else { - return 2; - } + /* Sample color channel proportional to throughput and single scattering + * albedo, to significantly reduce noise with many bounce, following: + * + * "Practical and Controllable Subsurface Scattering for Production Path + * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ + float3 weights = fabs(throughput * albedo); + float sum_weights = weights.x + weights.y + weights.z; + float3 weights_pdf; + + if (sum_weights > 0.0f) { + weights_pdf = weights / sum_weights; + } + else { + weights_pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f); + } + + *pdf = weights_pdf; + + /* OpenCL does not support -> on float3, so don't use pdf->x. */ + if (rand < weights_pdf.x) { + return 0; + } + else if (rand < weights_pdf.x + weights_pdf.y) { + return 1; + } + else { + return 2; + } } #ifdef __VOLUME__ /* homogeneous volume: assume shader evaluation at the start gives * the volume shading coefficient for the entire line segment */ -ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous( - KernelGlobals *kg, - ccl_addr_space PathState *state, - Ray *ray, - ShaderData *sd, - PathRadiance *L, - ccl_addr_space float3 *throughput, - bool probalistic_scatter) +ccl_device VolumeIntegrateResult +kernel_volume_integrate_homogeneous(KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + ShaderData *sd, + PathRadiance *L, + ccl_addr_space float3 *throughput, + bool probalistic_scatter) { - VolumeShaderCoefficients coeff; - - if(!volume_shader_sample(kg, sd, state, ray->P, &coeff)) - return VOLUME_PATH_MISSED; - - int closure_flag = sd->flag; - float t = ray->t; - float3 new_tp; - -#ifdef __VOLUME_SCATTER__ - /* randomly scatter, and if we do t is shortened */ - if(closure_flag & SD_SCATTER) { - /* Sample channel, use MIS with balance heuristic. */ - float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - float3 channel_pdf; - int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf); - - /* decide if we will hit or miss */ - bool scatter = true; - float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); - - if(probalistic_scatter) { - float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel); - float sample_transmittance = expf(-sample_sigma_t * t); - - if(1.0f - xi >= sample_transmittance) { - scatter = true; - - /* rescale random number so we can reuse it */ - xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance); - - } - else - scatter = false; - } - - if(scatter) { - /* scattering */ - float3 pdf; - float3 transmittance; - float sample_t; - - /* distance sampling */ - sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf); - - /* modify pdf for hit/miss decision */ - if(probalistic_scatter) - pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t); - - new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf); - t = sample_t; - } - else { - /* no scattering */ - float3 transmittance = volume_color_transmittance(coeff.sigma_t, t); - float pdf = dot(channel_pdf, transmittance); - new_tp = *throughput * transmittance / pdf; - } - } - else -#endif - if(closure_flag & SD_EXTINCTION) { - /* absorption only, no sampling needed */ - float3 transmittance = volume_color_transmittance(coeff.sigma_t, t); - new_tp = *throughput * transmittance; - } - else { - new_tp = *throughput; - } - - /* integrate emission attenuated by extinction */ - if(L && (closure_flag & SD_EMISSION)) { - float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t); - float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t); - path_radiance_accum_emission(L, state, *throughput, emission); - } - - /* modify throughput */ - if(closure_flag & SD_EXTINCTION) { - *throughput = new_tp; - - /* prepare to scatter to new direction */ - if(t < ray->t) { - /* adjust throughput and move to new location */ - sd->P = ray->P + t*ray->D; - - return VOLUME_PATH_SCATTERED; - } - } - - return VOLUME_PATH_ATTENUATED; + VolumeShaderCoefficients coeff; + + if (!volume_shader_sample(kg, sd, state, ray->P, &coeff)) + return VOLUME_PATH_MISSED; + + int closure_flag = sd->flag; + float t = ray->t; + float3 new_tp; + +# ifdef __VOLUME_SCATTER__ + /* randomly scatter, and if we do t is shortened */ + if (closure_flag & SD_SCATTER) { + /* Sample channel, use MIS with balance heuristic. */ + float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); + float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + float3 channel_pdf; + int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf); + + /* decide if we will hit or miss */ + bool scatter = true; + float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); + + if (probalistic_scatter) { + float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel); + float sample_transmittance = expf(-sample_sigma_t * t); + + if (1.0f - xi >= sample_transmittance) { + scatter = true; + + /* rescale random number so we can reuse it */ + xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance); + } + else + scatter = false; + } + + if (scatter) { + /* scattering */ + float3 pdf; + float3 transmittance; + float sample_t; + + /* distance sampling */ + sample_t = kernel_volume_distance_sample( + ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf); + + /* modify pdf for hit/miss decision */ + if (probalistic_scatter) + pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t); + + new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf); + t = sample_t; + } + else { + /* no scattering */ + float3 transmittance = volume_color_transmittance(coeff.sigma_t, t); + float pdf = dot(channel_pdf, transmittance); + new_tp = *throughput * transmittance / pdf; + } + } + else +# endif + if (closure_flag & SD_EXTINCTION) { + /* absorption only, no sampling needed */ + float3 transmittance = volume_color_transmittance(coeff.sigma_t, t); + new_tp = *throughput * transmittance; + } + else { + new_tp = *throughput; + } + + /* integrate emission attenuated by extinction */ + if (L && (closure_flag & SD_EMISSION)) { + float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t); + float3 emission = kernel_volume_emission_integrate( + &coeff, closure_flag, transmittance, ray->t); + path_radiance_accum_emission(L, state, *throughput, emission); + } + + /* modify throughput */ + if (closure_flag & SD_EXTINCTION) { + *throughput = new_tp; + + /* prepare to scatter to new direction */ + if (t < ray->t) { + /* adjust throughput and move to new location */ + sd->P = ray->P + t * ray->D; + + return VOLUME_PATH_SCATTERED; + } + } + + return VOLUME_PATH_ATTENUATED; } /* heterogeneous volume distance sampling: integrate stepping through the * volume until we reach the end, get absorbed entirely, or run out of * iterations. this does probabilistically scatter or get transmitted through * for path tracing where we don't want to branch. */ -ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( - KernelGlobals *kg, - ccl_addr_space PathState *state, - Ray *ray, - ShaderData *sd, - PathRadiance *L, - ccl_addr_space float3 *throughput) +ccl_device VolumeIntegrateResult +kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, + ccl_addr_space PathState *state, + Ray *ray, + ShaderData *sd, + PathRadiance *L, + ccl_addr_space float3 *throughput) { - float3 tp = *throughput; - const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ - - /* prepare for stepping */ - int max_steps = kernel_data.integrator.volume_max_steps; - float step_offset, step_size; - kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); - - /* compute coefficients at the start */ - float t = 0.0f; - float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f); - - /* pick random color channel, we use the Veach one-sample - * model with balance heuristic for the channels */ - float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); - float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - bool has_scatter = false; - - for(int i = 0; i < max_steps; i++) { - /* advance to new position */ - float new_t = min(ray->t, (i+1) * step_size); - float dt = new_t - t; - - /* use random position inside this segment to sample shader, - * for last shorter step we remap it to fit within the segment. */ - if(new_t == ray->t) { - step_offset *= (new_t - t) / step_size; - } - - float3 new_P = ray->P + ray->D * (t + step_offset); - VolumeShaderCoefficients coeff; - - /* compute segment */ - if(volume_shader_sample(kg, sd, state, new_P, &coeff)) { - int closure_flag = sd->flag; - float3 new_tp; - float3 transmittance; - bool scatter = false; - - /* distance sampling */ -#ifdef __VOLUME_SCATTER__ - if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) { - has_scatter = true; - - /* Sample channel, use MIS with balance heuristic. */ - float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); - float3 channel_pdf; - int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf); - - /* compute transmittance over full step */ - transmittance = volume_color_transmittance(coeff.sigma_t, dt); - - /* decide if we will scatter or continue */ - float sample_transmittance = kernel_volume_channel_get(transmittance, channel); - - if(1.0f - xi >= sample_transmittance) { - /* compute sampling distance */ - float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel); - float new_dt = -logf(1.0f - xi)/sample_sigma_t; - new_t = t + new_dt; - - /* transmittance and pdf */ - float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); - float3 pdf = coeff.sigma_t * new_transmittance; - - /* throughput */ - new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf); - scatter = true; - } - else { - /* throughput */ - float pdf = dot(channel_pdf, transmittance); - new_tp = tp * transmittance / pdf; - - /* remap xi so we can reuse it and keep thing stratified */ - xi = 1.0f - (1.0f - xi)/sample_transmittance; - } - } - else -#endif - if(closure_flag & SD_EXTINCTION) { - /* absorption only, no sampling needed */ - transmittance = volume_color_transmittance(coeff.sigma_t, dt); - new_tp = tp * transmittance; - } - else { - new_tp = tp; - } - - /* integrate emission attenuated by absorption */ - if(L && (closure_flag & SD_EMISSION)) { - float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt); - path_radiance_accum_emission(L, state, tp, emission); - } - - /* modify throughput */ - if(closure_flag & SD_EXTINCTION) { - tp = new_tp; - - /* stop if nearly all light blocked */ - if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) { - tp = make_float3(0.0f, 0.0f, 0.0f); - break; - } - } - - /* prepare to scatter to new direction */ - if(scatter) { - /* adjust throughput and move to new location */ - sd->P = ray->P + new_t*ray->D; - *throughput = tp; - - return VOLUME_PATH_SCATTERED; - } - else { - /* accumulate transmittance */ - accum_transmittance *= transmittance; - } - } - - /* stop if at the end of the volume */ - t = new_t; - if(t == ray->t) - break; - } - - *throughput = tp; - - return VOLUME_PATH_ATTENUATED; + float3 tp = *throughput; + const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ + + /* prepare for stepping */ + int max_steps = kernel_data.integrator.volume_max_steps; + float step_offset, step_size; + kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); + + /* compute coefficients at the start */ + float t = 0.0f; + float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f); + + /* pick random color channel, we use the Veach one-sample + * model with balance heuristic for the channels */ + float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); + float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); + bool has_scatter = false; + + for (int i = 0; i < max_steps; i++) { + /* advance to new position */ + float new_t = min(ray->t, (i + 1) * step_size); + float dt = new_t - t; + + /* use random position inside this segment to sample shader, + * for last shorter step we remap it to fit within the segment. */ + if (new_t == ray->t) { + step_offset *= (new_t - t) / step_size; + } + + float3 new_P = ray->P + ray->D * (t + step_offset); + VolumeShaderCoefficients coeff; + + /* compute segment */ + if (volume_shader_sample(kg, sd, state, new_P, &coeff)) { + int closure_flag = sd->flag; + float3 new_tp; + float3 transmittance; + bool scatter = false; + + /* distance sampling */ +# ifdef __VOLUME_SCATTER__ + if ((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) { + has_scatter = true; + + /* Sample channel, use MIS with balance heuristic. */ + float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + float3 channel_pdf; + int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf); + + /* compute transmittance over full step */ + transmittance = volume_color_transmittance(coeff.sigma_t, dt); + + /* decide if we will scatter or continue */ + float sample_transmittance = kernel_volume_channel_get(transmittance, channel); + + if (1.0f - xi >= sample_transmittance) { + /* compute sampling distance */ + float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel); + float new_dt = -logf(1.0f - xi) / sample_sigma_t; + new_t = t + new_dt; + + /* transmittance and pdf */ + float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + float3 pdf = coeff.sigma_t * new_transmittance; + + /* throughput */ + new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf); + scatter = true; + } + else { + /* throughput */ + float pdf = dot(channel_pdf, transmittance); + new_tp = tp * transmittance / pdf; + + /* remap xi so we can reuse it and keep thing stratified */ + xi = 1.0f - (1.0f - xi) / sample_transmittance; + } + } + else +# endif + if (closure_flag & SD_EXTINCTION) { + /* absorption only, no sampling needed */ + transmittance = volume_color_transmittance(coeff.sigma_t, dt); + new_tp = tp * transmittance; + } + else { + new_tp = tp; + } + + /* integrate emission attenuated by absorption */ + if (L && (closure_flag & SD_EMISSION)) { + float3 emission = kernel_volume_emission_integrate( + &coeff, closure_flag, transmittance, dt); + path_radiance_accum_emission(L, state, tp, emission); + } + + /* modify throughput */ + if (closure_flag & SD_EXTINCTION) { + tp = new_tp; + + /* stop if nearly all light blocked */ + if (tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) { + tp = make_float3(0.0f, 0.0f, 0.0f); + break; + } + } + + /* prepare to scatter to new direction */ + if (scatter) { + /* adjust throughput and move to new location */ + sd->P = ray->P + new_t * ray->D; + *throughput = tp; + + return VOLUME_PATH_SCATTERED; + } + else { + /* accumulate transmittance */ + accum_transmittance *= transmittance; + } + } + + /* stop if at the end of the volume */ + t = new_t; + if (t == ray->t) + break; + } + + *throughput = tp; + + return VOLUME_PATH_ATTENUATED; } /* get the volume attenuation and emission over line segment defined by * ray, with the assumption that there are no surfaces blocking light * between the endpoints. distance sampling is used to decide if we will * scatter or not. */ -ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate( - KernelGlobals *kg, - ccl_addr_space PathState *state, - ShaderData *sd, - Ray *ray, - PathRadiance *L, - ccl_addr_space float3 *throughput, - bool heterogeneous) +ccl_device_noinline VolumeIntegrateResult +kernel_volume_integrate(KernelGlobals *kg, + ccl_addr_space PathState *state, + ShaderData *sd, + Ray *ray, + PathRadiance *L, + ccl_addr_space float3 *throughput, + bool heterogeneous) { - shader_setup_from_volume(kg, sd, ray); + shader_setup_from_volume(kg, sd, ray); - if(heterogeneous) - return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput); - else - return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true); + if (heterogeneous) + return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput); + else + return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, true); } -#ifndef __SPLIT_KERNEL__ +# ifndef __SPLIT_KERNEL__ /* Decoupled Volume Sampling * * VolumeSegment is list of coefficients and transmittance stored at all steps @@ -689,26 +700,26 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate( * no support for malloc/free and too much stack usage with a fix size array. */ typedef struct VolumeStep { - float3 sigma_s; /* scatter coefficient */ - float3 sigma_t; /* extinction coefficient */ - float3 accum_transmittance; /* accumulated transmittance including this step */ - float3 cdf_distance; /* cumulative density function for distance sampling */ - float t; /* distance at end of this step */ - float shade_t; /* jittered distance where shading was done in step */ - int closure_flag; /* shader evaluation closure flags */ + float3 sigma_s; /* scatter coefficient */ + float3 sigma_t; /* extinction coefficient */ + float3 accum_transmittance; /* accumulated transmittance including this step */ + float3 cdf_distance; /* cumulative density function for distance sampling */ + float t; /* distance at end of this step */ + float shade_t; /* jittered distance where shading was done in step */ + int closure_flag; /* shader evaluation closure flags */ } VolumeStep; typedef struct VolumeSegment { - VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */ - VolumeStep *steps; /* recorded steps */ - int numsteps; /* number of steps */ - int closure_flag; /* accumulated closure flags from all steps */ + VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */ + VolumeStep *steps; /* recorded steps */ + int numsteps; /* number of steps */ + int closure_flag; /* accumulated closure flags from all steps */ - float3 accum_emission; /* accumulated emission at end of segment */ - float3 accum_transmittance; /* accumulated transmittance at end of segment */ - float3 accum_albedo; /* accumulated average albedo over segment */ + float3 accum_emission; /* accumulated emission at end of segment */ + float3 accum_transmittance; /* accumulated transmittance at end of segment */ + float3 accum_albedo; /* accumulated average albedo over segment */ - int sampling_method; /* volume sampling method */ + int sampling_method; /* volume sampling method */ } VolumeSegment; /* record volume steps to the end of the volume. @@ -717,400 +728,412 @@ typedef struct VolumeSegment { * but the entire segment is needed to do always scattering, rather than probabilistically * hitting or missing the volume. if we don't know the transmittance at the end of the * volume we can't generate stratified distance samples up to that transmittance */ -#ifdef __VOLUME_DECOUPLED__ -ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state, - Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous) +# ifdef __VOLUME_DECOUPLED__ +ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, + PathState *state, + Ray *ray, + ShaderData *sd, + VolumeSegment *segment, + bool heterogeneous) { - const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ - - /* prepare for volume stepping */ - int max_steps; - float step_size, step_offset; - - if(heterogeneous) { - max_steps = kernel_data.integrator.volume_max_steps; - kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); - -#ifdef __KERNEL_CPU__ - /* NOTE: For the branched path tracing it's possible to have direct - * and indirect light integration both having volume segments allocated. - * We detect this using index in the pre-allocated memory. Currently we - * only support two segments allocated at a time, if more needed some - * modifications to the KernelGlobals will be needed. - * - * This gives us restrictions that decoupled record should only happen - * in the stack manner, meaning if there's subsequent call of decoupled - * record it'll need to free memory before it's caller frees memory. - */ - const int index = kg->decoupled_volume_steps_index; - assert(index < sizeof(kg->decoupled_volume_steps) / - sizeof(*kg->decoupled_volume_steps)); - if(kg->decoupled_volume_steps[index] == NULL) { - kg->decoupled_volume_steps[index] = - (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); - } - segment->steps = kg->decoupled_volume_steps[index]; - ++kg->decoupled_volume_steps_index; -#else - segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); -#endif - } - else { - max_steps = 1; - step_size = ray->t; - step_offset = 0.0f; - segment->steps = &segment->stack_step; - } - - /* init accumulation variables */ - float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f); - float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f); - float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f); - float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f); - float t = 0.0f; - - segment->numsteps = 0; - segment->closure_flag = 0; - bool is_last_step_empty = false; - - VolumeStep *step = segment->steps; - - for(int i = 0; i < max_steps; i++, step++) { - /* advance to new position */ - float new_t = min(ray->t, (i+1) * step_size); - float dt = new_t - t; - - /* use random position inside this segment to sample shader, - * for last shorter step we remap it to fit within the segment. */ - if(new_t == ray->t) { - step_offset *= (new_t - t) / step_size; - } - - float3 new_P = ray->P + ray->D * (t + step_offset); - VolumeShaderCoefficients coeff; - - /* compute segment */ - if(volume_shader_sample(kg, sd, state, new_P, &coeff)) { - int closure_flag = sd->flag; - float3 sigma_t = coeff.sigma_t; - - /* compute average albedo for channel sampling */ - if(closure_flag & SD_SCATTER) { - accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t); - } - - /* compute accumulated transmittance */ - float3 transmittance = volume_color_transmittance(sigma_t, dt); - - /* compute emission attenuated by absorption */ - if(closure_flag & SD_EMISSION) { - float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt); - accum_emission += accum_transmittance * emission; - } - - accum_transmittance *= transmittance; - - /* compute pdf for distance sampling */ - float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s; - cdf_distance = cdf_distance + pdf_distance; - - /* write step data */ - step->sigma_t = sigma_t; - step->sigma_s = coeff.sigma_s; - step->closure_flag = closure_flag; - - segment->closure_flag |= closure_flag; - - is_last_step_empty = false; - segment->numsteps++; - } - else { - if(is_last_step_empty) { - /* consecutive empty step, merge */ - step--; - } - else { - /* store empty step */ - step->sigma_t = make_float3(0.0f, 0.0f, 0.0f); - step->sigma_s = make_float3(0.0f, 0.0f, 0.0f); - step->closure_flag = 0; - - segment->numsteps++; - is_last_step_empty = true; - } - } - - step->accum_transmittance = accum_transmittance; - step->cdf_distance = cdf_distance; - step->t = new_t; - step->shade_t = t + step_offset; - - /* stop if at the end of the volume */ - t = new_t; - if(t == ray->t) - break; - - /* stop if nearly all light blocked */ - if(accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && accum_transmittance.z < tp_eps) - break; - } - - /* store total emission and transmittance */ - segment->accum_emission = accum_emission; - segment->accum_transmittance = accum_transmittance; - segment->accum_albedo = accum_albedo; - - /* normalize cumulative density function for distance sampling */ - VolumeStep *last_step = segment->steps + segment->numsteps - 1; - - if(!is_zero(last_step->cdf_distance)) { - VolumeStep *step = &segment->steps[0]; - int numsteps = segment->numsteps; - float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance); - - for(int i = 0; i < numsteps; i++, step++) - step->cdf_distance *= inv_cdf_distance_sum; - } + const float tp_eps = 1e-6f; /* todo: this is likely not the right value */ + + /* prepare for volume stepping */ + int max_steps; + float step_size, step_offset; + + if (heterogeneous) { + max_steps = kernel_data.integrator.volume_max_steps; + kernel_volume_step_init(kg, state, ray->t, &step_size, &step_offset); + +# ifdef __KERNEL_CPU__ + /* NOTE: For the branched path tracing it's possible to have direct + * and indirect light integration both having volume segments allocated. + * We detect this using index in the pre-allocated memory. Currently we + * only support two segments allocated at a time, if more needed some + * modifications to the KernelGlobals will be needed. + * + * This gives us restrictions that decoupled record should only happen + * in the stack manner, meaning if there's subsequent call of decoupled + * record it'll need to free memory before it's caller frees memory. + */ + const int index = kg->decoupled_volume_steps_index; + assert(index < sizeof(kg->decoupled_volume_steps) / sizeof(*kg->decoupled_volume_steps)); + if (kg->decoupled_volume_steps[index] == NULL) { + kg->decoupled_volume_steps[index] = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps); + } + segment->steps = kg->decoupled_volume_steps[index]; + ++kg->decoupled_volume_steps_index; +# else + segment->steps = (VolumeStep *)malloc(sizeof(VolumeStep) * max_steps); +# endif + } + else { + max_steps = 1; + step_size = ray->t; + step_offset = 0.0f; + segment->steps = &segment->stack_step; + } + + /* init accumulation variables */ + float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f); + float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f); + float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f); + float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f); + float t = 0.0f; + + segment->numsteps = 0; + segment->closure_flag = 0; + bool is_last_step_empty = false; + + VolumeStep *step = segment->steps; + + for (int i = 0; i < max_steps; i++, step++) { + /* advance to new position */ + float new_t = min(ray->t, (i + 1) * step_size); + float dt = new_t - t; + + /* use random position inside this segment to sample shader, + * for last shorter step we remap it to fit within the segment. */ + if (new_t == ray->t) { + step_offset *= (new_t - t) / step_size; + } + + float3 new_P = ray->P + ray->D * (t + step_offset); + VolumeShaderCoefficients coeff; + + /* compute segment */ + if (volume_shader_sample(kg, sd, state, new_P, &coeff)) { + int closure_flag = sd->flag; + float3 sigma_t = coeff.sigma_t; + + /* compute average albedo for channel sampling */ + if (closure_flag & SD_SCATTER) { + accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t); + } + + /* compute accumulated transmittance */ + float3 transmittance = volume_color_transmittance(sigma_t, dt); + + /* compute emission attenuated by absorption */ + if (closure_flag & SD_EMISSION) { + float3 emission = kernel_volume_emission_integrate( + &coeff, closure_flag, transmittance, dt); + accum_emission += accum_transmittance * emission; + } + + accum_transmittance *= transmittance; + + /* compute pdf for distance sampling */ + float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s; + cdf_distance = cdf_distance + pdf_distance; + + /* write step data */ + step->sigma_t = sigma_t; + step->sigma_s = coeff.sigma_s; + step->closure_flag = closure_flag; + + segment->closure_flag |= closure_flag; + + is_last_step_empty = false; + segment->numsteps++; + } + else { + if (is_last_step_empty) { + /* consecutive empty step, merge */ + step--; + } + else { + /* store empty step */ + step->sigma_t = make_float3(0.0f, 0.0f, 0.0f); + step->sigma_s = make_float3(0.0f, 0.0f, 0.0f); + step->closure_flag = 0; + + segment->numsteps++; + is_last_step_empty = true; + } + } + + step->accum_transmittance = accum_transmittance; + step->cdf_distance = cdf_distance; + step->t = new_t; + step->shade_t = t + step_offset; + + /* stop if at the end of the volume */ + t = new_t; + if (t == ray->t) + break; + + /* stop if nearly all light blocked */ + if (accum_transmittance.x < tp_eps && accum_transmittance.y < tp_eps && + accum_transmittance.z < tp_eps) + break; + } + + /* store total emission and transmittance */ + segment->accum_emission = accum_emission; + segment->accum_transmittance = accum_transmittance; + segment->accum_albedo = accum_albedo; + + /* normalize cumulative density function for distance sampling */ + VolumeStep *last_step = segment->steps + segment->numsteps - 1; + + if (!is_zero(last_step->cdf_distance)) { + VolumeStep *step = &segment->steps[0]; + int numsteps = segment->numsteps; + float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance); + + for (int i = 0; i < numsteps; i++, step++) + step->cdf_distance *= inv_cdf_distance_sum; + } } ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment) { - if(segment->steps != &segment->stack_step) { -#ifdef __KERNEL_CPU__ - /* NOTE: We only allow free last allocated segment. - * No random order of alloc/free is supported. - */ - assert(kg->decoupled_volume_steps_index > 0); - assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]); - --kg->decoupled_volume_steps_index; -#else - free(segment->steps); -#endif - } + if (segment->steps != &segment->stack_step) { +# ifdef __KERNEL_CPU__ + /* NOTE: We only allow free last allocated segment. + * No random order of alloc/free is supported. + */ + assert(kg->decoupled_volume_steps_index > 0); + assert(segment->steps == kg->decoupled_volume_steps[kg->decoupled_volume_steps_index - 1]); + --kg->decoupled_volume_steps_index; +# else + free(segment->steps); +# endif + } } -#endif /* __VOLUME_DECOUPLED__ */ +# endif /* __VOLUME_DECOUPLED__ */ /* scattering for homogeneous and heterogeneous volumes, using decoupled ray * marching. * * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */ -ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( - KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, - float3 *throughput, float rphase, float rscatter, - const VolumeSegment *segment, const float3 *light_P, bool probalistic_scatter) +ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(KernelGlobals *kg, + PathState *state, + Ray *ray, + ShaderData *sd, + float3 *throughput, + float rphase, + float rscatter, + const VolumeSegment *segment, + const float3 *light_P, + bool probalistic_scatter) { - kernel_assert(segment->closure_flag & SD_SCATTER); - - /* Sample color channel, use MIS with balance heuristic. */ - float3 channel_pdf; - int channel = kernel_volume_sample_channel(segment->accum_albedo, - *throughput, - rphase, - &channel_pdf); - - float xi = rscatter; - - /* probabilistic scattering decision based on transmittance */ - if(probalistic_scatter) { - float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel); - - if(1.0f - xi >= sample_transmittance) { - /* rescale random number so we can reuse it */ - xi = 1.0f - (1.0f - xi - sample_transmittance)/(1.0f - sample_transmittance); - } - else { - *throughput /= sample_transmittance; - return VOLUME_PATH_MISSED; - } - } - - VolumeStep *step; - float3 transmittance; - float pdf, sample_t; - float mis_weight = 1.0f; - bool distance_sample = true; - bool use_mis = false; - - if(segment->sampling_method && light_P) { - if(segment->sampling_method == SD_VOLUME_MIS) { - /* multiple importance sample: randomly pick between - * equiangular and distance sampling strategy */ - if(xi < 0.5f) { - xi *= 2.0f; - } - else { - xi = (xi - 0.5f)*2.0f; - distance_sample = false; - } - - use_mis = true; - } - else { - /* only equiangular sampling */ - distance_sample = false; - } - } - - /* distance sampling */ - if(distance_sample) { - /* find step in cdf */ - step = segment->steps; - - float prev_t = 0.0f; - float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f); - - if(segment->numsteps > 1) { - float prev_cdf = 0.0f; - float step_cdf = 1.0f; - float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f); - - for(int i = 0; ; i++, step++) { - /* todo: optimize using binary search */ - step_cdf = kernel_volume_channel_get(step->cdf_distance, channel); - - if(xi < step_cdf || i == segment->numsteps-1) - break; - - prev_cdf = step_cdf; - prev_t = step->t; - prev_cdf_distance = step->cdf_distance; - } - - /* remap xi so we can reuse it */ - xi = (xi - prev_cdf)/(step_cdf - prev_cdf); - - /* pdf for picking step */ - step_pdf_distance = step->cdf_distance - prev_cdf_distance; - } - - /* determine range in which we will sample */ - float step_t = step->t - prev_t; - - /* sample distance and compute transmittance */ - float3 distance_pdf; - sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf); - - /* modify pdf for hit/miss decision */ - if(probalistic_scatter) - distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance; - - pdf = dot(channel_pdf, distance_pdf * step_pdf_distance); - - /* multiple importance sampling */ - if(use_mis) { - float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t); - mis_weight = 2.0f*power_heuristic(pdf, equi_pdf); - } - } - /* equi-angular sampling */ - else { - /* sample distance */ - sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf); - - /* find step in which sampled distance is located */ - step = segment->steps; - - float prev_t = 0.0f; - float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f); - - if(segment->numsteps > 1) { - float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f); - - int numsteps = segment->numsteps; - int high = numsteps - 1; - int low = 0; - int mid; - - while(low < high) { - mid = (low + high) >> 1; - - if(sample_t < step[mid].t) - high = mid; - else if(sample_t >= step[mid + 1].t) - low = mid + 1; - else { - /* found our interval in step[mid] .. step[mid+1] */ - prev_t = step[mid].t; - prev_cdf_distance = step[mid].cdf_distance; - step += mid+1; - break; - } - } - - if(low >= numsteps - 1) { - prev_t = step[numsteps - 1].t; - prev_cdf_distance = step[numsteps-1].cdf_distance; - step += numsteps - 1; - } - - /* pdf for picking step with distance sampling */ - step_pdf_distance = step->cdf_distance - prev_cdf_distance; - } - - /* determine range in which we will sample */ - float step_t = step->t - prev_t; - float step_sample_t = sample_t - prev_t; - - /* compute transmittance */ - transmittance = volume_color_transmittance(step->sigma_t, step_sample_t); - - /* multiple importance sampling */ - if(use_mis) { - float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t); - float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance); - mis_weight = 2.0f*power_heuristic(pdf, distance_pdf); - } - } - if(sample_t < 0.0f || pdf == 0.0f) { - return VOLUME_PATH_MISSED; - } - - /* compute transmittance up to this step */ - if(step != segment->steps) - transmittance *= (step-1)->accum_transmittance; - - /* modify throughput */ - *throughput *= step->sigma_s * transmittance * (mis_weight / pdf); - - /* evaluate shader to create closures at shading point */ - if(segment->numsteps > 1) { - sd->P = ray->P + step->shade_t*ray->D; - - VolumeShaderCoefficients coeff; - volume_shader_sample(kg, sd, state, sd->P, &coeff); - } - - /* move to new position */ - sd->P = ray->P + sample_t*ray->D; - - return VOLUME_PATH_SCATTERED; + kernel_assert(segment->closure_flag & SD_SCATTER); + + /* Sample color channel, use MIS with balance heuristic. */ + float3 channel_pdf; + int channel = kernel_volume_sample_channel( + segment->accum_albedo, *throughput, rphase, &channel_pdf); + + float xi = rscatter; + + /* probabilistic scattering decision based on transmittance */ + if (probalistic_scatter) { + float sample_transmittance = kernel_volume_channel_get(segment->accum_transmittance, channel); + + if (1.0f - xi >= sample_transmittance) { + /* rescale random number so we can reuse it */ + xi = 1.0f - (1.0f - xi - sample_transmittance) / (1.0f - sample_transmittance); + } + else { + *throughput /= sample_transmittance; + return VOLUME_PATH_MISSED; + } + } + + VolumeStep *step; + float3 transmittance; + float pdf, sample_t; + float mis_weight = 1.0f; + bool distance_sample = true; + bool use_mis = false; + + if (segment->sampling_method && light_P) { + if (segment->sampling_method == SD_VOLUME_MIS) { + /* multiple importance sample: randomly pick between + * equiangular and distance sampling strategy */ + if (xi < 0.5f) { + xi *= 2.0f; + } + else { + xi = (xi - 0.5f) * 2.0f; + distance_sample = false; + } + + use_mis = true; + } + else { + /* only equiangular sampling */ + distance_sample = false; + } + } + + /* distance sampling */ + if (distance_sample) { + /* find step in cdf */ + step = segment->steps; + + float prev_t = 0.0f; + float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f); + + if (segment->numsteps > 1) { + float prev_cdf = 0.0f; + float step_cdf = 1.0f; + float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f); + + for (int i = 0;; i++, step++) { + /* todo: optimize using binary search */ + step_cdf = kernel_volume_channel_get(step->cdf_distance, channel); + + if (xi < step_cdf || i == segment->numsteps - 1) + break; + + prev_cdf = step_cdf; + prev_t = step->t; + prev_cdf_distance = step->cdf_distance; + } + + /* remap xi so we can reuse it */ + xi = (xi - prev_cdf) / (step_cdf - prev_cdf); + + /* pdf for picking step */ + step_pdf_distance = step->cdf_distance - prev_cdf_distance; + } + + /* determine range in which we will sample */ + float step_t = step->t - prev_t; + + /* sample distance and compute transmittance */ + float3 distance_pdf; + sample_t = prev_t + kernel_volume_distance_sample( + step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf); + + /* modify pdf for hit/miss decision */ + if (probalistic_scatter) + distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance; + + pdf = dot(channel_pdf, distance_pdf * step_pdf_distance); + + /* multiple importance sampling */ + if (use_mis) { + float equi_pdf = kernel_volume_equiangular_pdf(ray, *light_P, sample_t); + mis_weight = 2.0f * power_heuristic(pdf, equi_pdf); + } + } + /* equi-angular sampling */ + else { + /* sample distance */ + sample_t = kernel_volume_equiangular_sample(ray, *light_P, xi, &pdf); + + /* find step in which sampled distance is located */ + step = segment->steps; + + float prev_t = 0.0f; + float3 step_pdf_distance = make_float3(1.0f, 1.0f, 1.0f); + + if (segment->numsteps > 1) { + float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f); + + int numsteps = segment->numsteps; + int high = numsteps - 1; + int low = 0; + int mid; + + while (low < high) { + mid = (low + high) >> 1; + + if (sample_t < step[mid].t) + high = mid; + else if (sample_t >= step[mid + 1].t) + low = mid + 1; + else { + /* found our interval in step[mid] .. step[mid+1] */ + prev_t = step[mid].t; + prev_cdf_distance = step[mid].cdf_distance; + step += mid + 1; + break; + } + } + + if (low >= numsteps - 1) { + prev_t = step[numsteps - 1].t; + prev_cdf_distance = step[numsteps - 1].cdf_distance; + step += numsteps - 1; + } + + /* pdf for picking step with distance sampling */ + step_pdf_distance = step->cdf_distance - prev_cdf_distance; + } + + /* determine range in which we will sample */ + float step_t = step->t - prev_t; + float step_sample_t = sample_t - prev_t; + + /* compute transmittance */ + transmittance = volume_color_transmittance(step->sigma_t, step_sample_t); + + /* multiple importance sampling */ + if (use_mis) { + float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t); + float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance); + mis_weight = 2.0f * power_heuristic(pdf, distance_pdf); + } + } + if (sample_t < 0.0f || pdf == 0.0f) { + return VOLUME_PATH_MISSED; + } + + /* compute transmittance up to this step */ + if (step != segment->steps) + transmittance *= (step - 1)->accum_transmittance; + + /* modify throughput */ + *throughput *= step->sigma_s * transmittance * (mis_weight / pdf); + + /* evaluate shader to create closures at shading point */ + if (segment->numsteps > 1) { + sd->P = ray->P + step->shade_t * ray->D; + + VolumeShaderCoefficients coeff; + volume_shader_sample(kg, sd, state, sd->P, &coeff); + } + + /* move to new position */ + sd->P = ray->P + sample_t * ray->D; + + return VOLUME_PATH_SCATTERED; } -#endif /* __SPLIT_KERNEL */ +# endif /* __SPLIT_KERNEL */ /* decide if we need to use decoupled or not */ -ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method) +ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, + bool heterogeneous, + bool direct, + int sampling_method) { - /* decoupled ray marching for heterogeneous volumes not supported on the GPU, - * which also means equiangular and multiple importance sampling is not - * support for that case */ - if(!kernel_data.integrator.volume_decoupled) - return false; - -#ifdef __KERNEL_GPU__ - if(heterogeneous) - return false; -#endif - - /* equiangular and multiple importance sampling only implemented for decoupled */ - if(sampling_method != 0) - return true; - - /* for all light sampling use decoupled, reusing shader evaluations is - * typically faster in that case */ - if(direct) - return kernel_data.integrator.sample_all_lights_direct; - else - return kernel_data.integrator.sample_all_lights_indirect; + /* decoupled ray marching for heterogeneous volumes not supported on the GPU, + * which also means equiangular and multiple importance sampling is not + * support for that case */ + if (!kernel_data.integrator.volume_decoupled) + return false; + +# ifdef __KERNEL_GPU__ + if (heterogeneous) + return false; +# endif + + /* equiangular and multiple importance sampling only implemented for decoupled */ + if (sampling_method != 0) + return true; + + /* for all light sampling use decoupled, reusing shader evaluations is + * typically faster in that case */ + if (direct) + return kernel_data.integrator.sample_all_lights_direct; + else + return kernel_data.integrator.sample_all_lights_indirect; } /* Volume Stack @@ -1124,242 +1147,231 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, ccl_addr_space const Ray *ray, ccl_addr_space VolumeStack *stack) { - /* NULL ray happens in the baker, does it need proper initialization of - * camera in volume? - */ - if(!kernel_data.cam.is_inside_volume || ray == NULL) { - /* Camera is guaranteed to be in the air, only take background volume - * into account in this case. - */ - if(kernel_data.background.volume_shader != SHADER_NONE) { - stack[0].shader = kernel_data.background.volume_shader; - stack[0].object = PRIM_NONE; - stack[1].shader = SHADER_NONE; - } - else { - stack[0].shader = SHADER_NONE; - } - return; - } - - kernel_assert(state->flag & PATH_RAY_CAMERA); - - Ray volume_ray = *ray; - volume_ray.t = FLT_MAX; - - const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY); - int stack_index = 0, enclosed_index = 0; - -#ifdef __VOLUME_RECORD_ALL__ - Intersection hits[2*VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all(kg, - &volume_ray, - hits, - 2*VOLUME_STACK_SIZE, - visibility); - if(num_hits > 0) { - int enclosed_volumes[VOLUME_STACK_SIZE]; - Intersection *isect = hits; - - qsort(hits, num_hits, sizeof(Intersection), intersections_compare); - - for(uint hit = 0; hit < num_hits; ++hit, ++isect) { - shader_setup_from_ray(kg, stack_sd, isect, &volume_ray); - if(stack_sd->flag & SD_BACKFACING) { - bool need_add = true; - for(int i = 0; i < enclosed_index && need_add; ++i) { - /* If ray exited the volume and never entered to that volume - * it means that camera is inside such a volume. - */ - if(enclosed_volumes[i] == stack_sd->object) { - need_add = false; - } - } - for(int i = 0; i < stack_index && need_add; ++i) { - /* Don't add intersections twice. */ - if(stack[i].object == stack_sd->object) { - need_add = false; - break; - } - } - if(need_add && stack_index < VOLUME_STACK_SIZE - 1) { - stack[stack_index].object = stack_sd->object; - stack[stack_index].shader = stack_sd->shader; - ++stack_index; - } - } - else { - /* If ray from camera enters the volume, this volume shouldn't - * be added to the stack on exit. - */ - enclosed_volumes[enclosed_index++] = stack_sd->object; - } - } - } -#else - int enclosed_volumes[VOLUME_STACK_SIZE]; - int step = 0; - - while(stack_index < VOLUME_STACK_SIZE - 1 && - enclosed_index < VOLUME_STACK_SIZE - 1 && - step < 2 * VOLUME_STACK_SIZE) - { - Intersection isect; - if(!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) { - break; - } - - shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray); - if(stack_sd->flag & SD_BACKFACING) { - /* If ray exited the volume and never entered to that volume - * it means that camera is inside such a volume. - */ - bool need_add = true; - for(int i = 0; i < enclosed_index && need_add; ++i) { - /* If ray exited the volume and never entered to that volume - * it means that camera is inside such a volume. - */ - if(enclosed_volumes[i] == stack_sd->object) { - need_add = false; - } - } - for(int i = 0; i < stack_index && need_add; ++i) { - /* Don't add intersections twice. */ - if(stack[i].object == stack_sd->object) { - need_add = false; - break; - } - } - if(need_add) { - stack[stack_index].object = stack_sd->object; - stack[stack_index].shader = stack_sd->shader; - ++stack_index; - } - } - else { - /* If ray from camera enters the volume, this volume shouldn't - * be added to the stack on exit. - */ - enclosed_volumes[enclosed_index++] = stack_sd->object; - } - - /* Move ray forward. */ - volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); - ++step; - } -#endif - /* stack_index of 0 means quick checks outside of the kernel gave false - * positive, nothing to worry about, just we've wasted quite a few of - * ticks just to come into conclusion that camera is in the air. - * - * In this case we're doing the same above -- check whether background has - * volume. - */ - if(stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) { - stack[0].shader = kernel_data.background.volume_shader; - stack[0].object = PRIM_NONE; - stack[1].shader = SHADER_NONE; - } - else { - stack[stack_index].shader = SHADER_NONE; - } + /* NULL ray happens in the baker, does it need proper initialization of + * camera in volume? + */ + if (!kernel_data.cam.is_inside_volume || ray == NULL) { + /* Camera is guaranteed to be in the air, only take background volume + * into account in this case. + */ + if (kernel_data.background.volume_shader != SHADER_NONE) { + stack[0].shader = kernel_data.background.volume_shader; + stack[0].object = PRIM_NONE; + stack[1].shader = SHADER_NONE; + } + else { + stack[0].shader = SHADER_NONE; + } + return; + } + + kernel_assert(state->flag & PATH_RAY_CAMERA); + + Ray volume_ray = *ray; + volume_ray.t = FLT_MAX; + + const uint visibility = (state->flag & PATH_RAY_ALL_VISIBILITY); + int stack_index = 0, enclosed_index = 0; + +# ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2 * VOLUME_STACK_SIZE + 1]; + uint num_hits = scene_intersect_volume_all( + kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, visibility); + if (num_hits > 0) { + int enclosed_volumes[VOLUME_STACK_SIZE]; + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for (uint hit = 0; hit < num_hits; ++hit, ++isect) { + shader_setup_from_ray(kg, stack_sd, isect, &volume_ray); + if (stack_sd->flag & SD_BACKFACING) { + bool need_add = true; + for (int i = 0; i < enclosed_index && need_add; ++i) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + if (enclosed_volumes[i] == stack_sd->object) { + need_add = false; + } + } + for (int i = 0; i < stack_index && need_add; ++i) { + /* Don't add intersections twice. */ + if (stack[i].object == stack_sd->object) { + need_add = false; + break; + } + } + if (need_add && stack_index < VOLUME_STACK_SIZE - 1) { + stack[stack_index].object = stack_sd->object; + stack[stack_index].shader = stack_sd->shader; + ++stack_index; + } + } + else { + /* If ray from camera enters the volume, this volume shouldn't + * be added to the stack on exit. + */ + enclosed_volumes[enclosed_index++] = stack_sd->object; + } + } + } +# else + int enclosed_volumes[VOLUME_STACK_SIZE]; + int step = 0; + + while (stack_index < VOLUME_STACK_SIZE - 1 && enclosed_index < VOLUME_STACK_SIZE - 1 && + step < 2 * VOLUME_STACK_SIZE) { + Intersection isect; + if (!scene_intersect_volume(kg, &volume_ray, &isect, visibility)) { + break; + } + + shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray); + if (stack_sd->flag & SD_BACKFACING) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + bool need_add = true; + for (int i = 0; i < enclosed_index && need_add; ++i) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + if (enclosed_volumes[i] == stack_sd->object) { + need_add = false; + } + } + for (int i = 0; i < stack_index && need_add; ++i) { + /* Don't add intersections twice. */ + if (stack[i].object == stack_sd->object) { + need_add = false; + break; + } + } + if (need_add) { + stack[stack_index].object = stack_sd->object; + stack[stack_index].shader = stack_sd->shader; + ++stack_index; + } + } + else { + /* If ray from camera enters the volume, this volume shouldn't + * be added to the stack on exit. + */ + enclosed_volumes[enclosed_index++] = stack_sd->object; + } + + /* Move ray forward. */ + volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); + ++step; + } +# endif + /* stack_index of 0 means quick checks outside of the kernel gave false + * positive, nothing to worry about, just we've wasted quite a few of + * ticks just to come into conclusion that camera is in the air. + * + * In this case we're doing the same above -- check whether background has + * volume. + */ + if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) { + stack[0].shader = kernel_data.background.volume_shader; + stack[0].object = PRIM_NONE; + stack[1].shader = SHADER_NONE; + } + else { + stack[stack_index].shader = SHADER_NONE; + } } -ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack) +ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space VolumeStack *stack) { - /* todo: we should have some way for objects to indicate if they want the - * world shader to work inside them. excluding it by default is problematic - * because non-volume objects can't be assumed to be closed manifolds */ - - if(!(sd->flag & SD_HAS_VOLUME)) - return; - - if(sd->flag & SD_BACKFACING) { - /* exit volume object: remove from stack */ - for(int i = 0; stack[i].shader != SHADER_NONE; i++) { - if(stack[i].object == sd->object) { - /* shift back next stack entries */ - do { - stack[i] = stack[i+1]; - i++; - } - while(stack[i].shader != SHADER_NONE); - - return; - } - } - } - else { - /* enter volume object: add to stack */ - int i; - - for(i = 0; stack[i].shader != SHADER_NONE; i++) { - /* already in the stack? then we have nothing to do */ - if(stack[i].object == sd->object) - return; - } - - /* if we exceed the stack limit, ignore */ - if(i >= VOLUME_STACK_SIZE-1) - return; - - /* add to the end of the stack */ - stack[i].shader = sd->shader; - stack[i].object = sd->object; - stack[i+1].shader = SHADER_NONE; - } + /* todo: we should have some way for objects to indicate if they want the + * world shader to work inside them. excluding it by default is problematic + * because non-volume objects can't be assumed to be closed manifolds */ + + if (!(sd->flag & SD_HAS_VOLUME)) + return; + + if (sd->flag & SD_BACKFACING) { + /* exit volume object: remove from stack */ + for (int i = 0; stack[i].shader != SHADER_NONE; i++) { + if (stack[i].object == sd->object) { + /* shift back next stack entries */ + do { + stack[i] = stack[i + 1]; + i++; + } while (stack[i].shader != SHADER_NONE); + + return; + } + } + } + else { + /* enter volume object: add to stack */ + int i; + + for (i = 0; stack[i].shader != SHADER_NONE; i++) { + /* already in the stack? then we have nothing to do */ + if (stack[i].object == sd->object) + return; + } + + /* if we exceed the stack limit, ignore */ + if (i >= VOLUME_STACK_SIZE - 1) + return; + + /* add to the end of the stack */ + stack[i].shader = sd->shader; + stack[i].object = sd->object; + stack[i + 1].shader = SHADER_NONE; + } } -#ifdef __SUBSURFACE__ +# ifdef __SUBSURFACE__ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, ShaderData *stack_sd, Ray *ray, ccl_addr_space VolumeStack *stack) { - kernel_assert(kernel_data.integrator.use_volumes); - - Ray volume_ray = *ray; - -# ifdef __VOLUME_RECORD_ALL__ - Intersection hits[2*VOLUME_STACK_SIZE + 1]; - uint num_hits = scene_intersect_volume_all(kg, - &volume_ray, - hits, - 2*VOLUME_STACK_SIZE, - PATH_RAY_ALL_VISIBILITY); - if(num_hits > 0) { - Intersection *isect = hits; - - qsort(hits, num_hits, sizeof(Intersection), intersections_compare); - - for(uint hit = 0; hit < num_hits; ++hit, ++isect) { - shader_setup_from_ray(kg, stack_sd, isect, &volume_ray); - kernel_volume_stack_enter_exit(kg, stack_sd, stack); - } - } -# else - Intersection isect; - int step = 0; - float3 Pend = ray->P + ray->D*ray->t; - while(step < 2 * VOLUME_STACK_SIZE && - scene_intersect_volume(kg, - &volume_ray, - &isect, - PATH_RAY_ALL_VISIBILITY)) - { - shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray); - kernel_volume_stack_enter_exit(kg, stack_sd, stack); - - /* Move ray forward. */ - volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); - if(volume_ray.t != FLT_MAX) { - volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t); - } - ++step; - } -# endif + kernel_assert(kernel_data.integrator.use_volumes); + + Ray volume_ray = *ray; + +# ifdef __VOLUME_RECORD_ALL__ + Intersection hits[2 * VOLUME_STACK_SIZE + 1]; + uint num_hits = scene_intersect_volume_all( + kg, &volume_ray, hits, 2 * VOLUME_STACK_SIZE, PATH_RAY_ALL_VISIBILITY); + if (num_hits > 0) { + Intersection *isect = hits; + + qsort(hits, num_hits, sizeof(Intersection), intersections_compare); + + for (uint hit = 0; hit < num_hits; ++hit, ++isect) { + shader_setup_from_ray(kg, stack_sd, isect, &volume_ray); + kernel_volume_stack_enter_exit(kg, stack_sd, stack); + } + } +# else + Intersection isect; + int step = 0; + float3 Pend = ray->P + ray->D * ray->t; + while (step < 2 * VOLUME_STACK_SIZE && + scene_intersect_volume(kg, &volume_ray, &isect, PATH_RAY_ALL_VISIBILITY)) { + shader_setup_from_ray(kg, stack_sd, &isect, &volume_ray); + kernel_volume_stack_enter_exit(kg, stack_sd, stack); + + /* Move ray forward. */ + volume_ray.P = ray_offset(stack_sd->P, -stack_sd->Ng); + if (volume_ray.t != FLT_MAX) { + volume_ray.D = normalize_len(Pend - volume_ray.P, &volume_ray.t); + } + ++step; + } +# endif } -#endif +# endif /* Clean stack after the last bounce. * @@ -1378,15 +1390,15 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg, ccl_addr_space VolumeStack *volume_stack) { - if(kernel_data.background.volume_shader != SHADER_NONE) { - /* Keep the world's volume in stack. */ - volume_stack[1].shader = SHADER_NONE; - } - else { - volume_stack[0].shader = SHADER_NONE; - } + if (kernel_data.background.volume_shader != SHADER_NONE) { + /* Keep the world's volume in stack. */ + volume_stack[1].shader = SHADER_NONE; + } + else { + volume_stack[0].shader = SHADER_NONE; + } } -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h index 9667156eaf5..799561a7466 100644 --- a/intern/cycles/kernel/kernel_work_stealing.h +++ b/intern/cycles/kernel/kernel_work_stealing.h @@ -35,27 +35,26 @@ ccl_device bool get_next_work(KernelGlobals *kg, uint ray_index, ccl_private uint *global_work_index) { - /* With a small amount of work there may be more threads than work due to - * rounding up of global size, stop such threads immediately. */ - if(ray_index >= total_work_size) { - return false; - } + /* With a small amount of work there may be more threads than work due to + * rounding up of global size, stop such threads immediately. */ + if (ray_index >= total_work_size) { + return false; + } - /* Increase atomic work index counter in pool. */ - uint pool = ray_index / WORK_POOL_SIZE; - uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]); + /* Increase atomic work index counter in pool. */ + uint pool = ray_index / WORK_POOL_SIZE; + uint work_index = atomic_fetch_and_inc_uint32(&work_pools[pool]); - /* Map per-pool work index to a global work index. */ - uint global_size = ccl_global_size(0) * ccl_global_size(1); - kernel_assert(global_size % WORK_POOL_SIZE == 0); - kernel_assert(ray_index < global_size); + /* Map per-pool work index to a global work index. */ + uint global_size = ccl_global_size(0) * ccl_global_size(1); + kernel_assert(global_size % WORK_POOL_SIZE == 0); + kernel_assert(ray_index < global_size); - *global_work_index = (work_index / WORK_POOL_SIZE) * global_size - + (pool * WORK_POOL_SIZE) - + (work_index % WORK_POOL_SIZE); + *global_work_index = (work_index / WORK_POOL_SIZE) * global_size + (pool * WORK_POOL_SIZE) + + (work_index % WORK_POOL_SIZE); - /* Test if all work for this pool is done. */ - return (*global_work_index < total_work_size); + /* Test if all work for this pool is done. */ + return (*global_work_index < total_work_size); } #endif @@ -67,22 +66,22 @@ ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile, ccl_private uint *sample) { #ifdef __KERNEL_CUDA__ - /* Keeping threads for the same pixel together improves performance on CUDA. */ - uint sample_offset = global_work_index % tile->num_samples; - uint pixel_offset = global_work_index / tile->num_samples; -#else /* __KERNEL_CUDA__ */ - uint tile_pixels = tile->w * tile->h; - uint sample_offset = global_work_index / tile_pixels; - uint pixel_offset = global_work_index - sample_offset * tile_pixels; + /* Keeping threads for the same pixel together improves performance on CUDA. */ + uint sample_offset = global_work_index % tile->num_samples; + uint pixel_offset = global_work_index / tile->num_samples; +#else /* __KERNEL_CUDA__ */ + uint tile_pixels = tile->w * tile->h; + uint sample_offset = global_work_index / tile_pixels; + uint pixel_offset = global_work_index - sample_offset * tile_pixels; #endif /* __KERNEL_CUDA__ */ - uint y_offset = pixel_offset / tile->w; - uint x_offset = pixel_offset - y_offset * tile->w; + uint y_offset = pixel_offset / tile->w; + uint x_offset = pixel_offset - y_offset * tile->w; - *x = tile->x + x_offset; - *y = tile->y + y_offset; - *sample = tile->start_sample + sample_offset; + *x = tile->x + x_offset; + *y = tile->y + y_offset; + *sample = tile->start_sample + sample_offset; } CCL_NAMESPACE_END -#endif /* __KERNEL_WORK_STEALING_H__ */ +#endif /* __KERNEL_WORK_STEALING_H__ */ diff --git a/intern/cycles/kernel/kernels/cpu/filter.cpp b/intern/cycles/kernel/kernels/cpu/filter.cpp index 2ff1a392dc3..145a6b6ac40 100644 --- a/intern/cycles/kernel/kernels/cpu/filter.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter.cpp @@ -53,7 +53,7 @@ /* quiet unused define warnings */ #if defined(__KERNEL_SSE2__) - /* do nothing */ +/* do nothing */ #endif #include "kernel/filter/filter.h" diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp index 4a9e6047ecf..1d68214c8e7 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_avx.cpp @@ -32,7 +32,7 @@ # define __KERNEL_SSE41__ # define __KERNEL_AVX__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_avx diff --git a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp index c22ec576254..b6709fbc529 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_avx2.cpp @@ -33,7 +33,7 @@ # define __KERNEL_AVX__ # define __KERNEL_AVX2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_avx2 diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu.h b/intern/cycles/kernel/kernels/cpu/filter_cpu.h index 02c85562db8..1423b182ab8 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu.h @@ -25,7 +25,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, float *sampleV, float *sampleVV, float *bufferV, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset); @@ -38,7 +38,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, float *mean, float *variance, float scale, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset); @@ -49,9 +49,10 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, float *from, float *buffer, int out_offset, - int* prefilter_rect); + int *prefilter_rect); -void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, +void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, + int y, ccl_global float *image, ccl_global float *variance, ccl_global float *depth, @@ -59,22 +60,17 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, int *rect, int pass_stride); -void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, - float *mean, - float *variance, - float *a, - float *b, - int* prefilter_rect, - int r); +void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)( + int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r); -void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, +void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer, TileInfo *tiles, int x, int y, int storage_ofs, float *transform, int *rank, - int* rect, + int *rect, int pass_stride, int frame_stride, bool use_time, @@ -87,24 +83,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, float *variance_image, float *scale_image, float *difference_image, - int* rect, + int *rect, int stride, int channel_offset, int frame_offset, float a, float k_2); -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image, - float *out_image, - int* rect, - int stride, - int f); +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)( + float *difference_image, float *out_image, int *rect, int stride, int f); -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image, - float *out_image, - int* rect, - int stride, - int f); +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)( + float *difference_image, float *out_image, int *rect, int stride, int f); void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, int dy, @@ -113,7 +103,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, float *temp_image, float *out_image, float *accum_image, - int* rect, + int *rect, int channel_offset, int stride, int f); @@ -137,7 +127,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image, float *accum_image, - int* rect, + int *rect, int stride); void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, diff --git a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h index c29505880cb..3d4cb87e104 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/filter_cpu_impl.h @@ -25,12 +25,12 @@ #include "kernel/filter/filter_kernel.h" #ifdef KERNEL_STUB -# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!")) +# define STUB_ASSERT(arch, name) \ + assert(!(#name " kernel stub for architecture " #arch " was called!")) #endif CCL_NAMESPACE_BEGIN - /* Denoise filter */ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, @@ -42,23 +42,25 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(int sample, float *sampleVariance, float *sampleVarianceV, float *bufferVariance, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); + STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow); #else - kernel_filter_divide_shadow(sample, tile_info, - x, y, - unfilteredA, - unfilteredB, - sampleVariance, - sampleVarianceV, - bufferVariance, - load_int4(prefilter_rect), - buffer_pass_stride, - buffer_denoising_offset); + kernel_filter_divide_shadow(sample, + tile_info, + x, + y, + unfilteredA, + unfilteredB, + sampleVariance, + sampleVarianceV, + bufferVariance, + load_int4(prefilter_rect), + buffer_pass_stride, + buffer_denoising_offset); #endif } @@ -68,23 +70,28 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(int sample, int v_offset, int x, int y, - float *mean, float *variance, + float *mean, + float *variance, float scale, - int* prefilter_rect, + int *prefilter_rect, int buffer_pass_stride, int buffer_denoising_offset) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_get_feature); + STUB_ASSERT(KERNEL_ARCH, filter_get_feature); #else - kernel_filter_get_feature(sample, tile_info, - m_offset, v_offset, - x, y, - mean, variance, - scale, - load_int4(prefilter_rect), - buffer_pass_stride, - buffer_denoising_offset); + kernel_filter_get_feature(sample, + tile_info, + m_offset, + v_offset, + x, + y, + mean, + variance, + scale, + load_int4(prefilter_rect), + buffer_pass_stride, + buffer_denoising_offset); #endif } @@ -95,16 +102,18 @@ void KERNEL_FUNCTION_FULL_NAME(filter_write_feature)(int sample, float *from, float *buffer, int out_offset, - int* prefilter_rect) + int *prefilter_rect) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_write_feature); + STUB_ASSERT(KERNEL_ARCH, filter_write_feature); #else - kernel_filter_write_feature(sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect)); + kernel_filter_write_feature( + sample, x, y, load_int4(buffer_params), from, buffer, out_offset, load_int4(prefilter_rect)); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, +void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, + int y, ccl_global float *image, ccl_global float *variance, ccl_global float *depth, @@ -113,35 +122,31 @@ void KERNEL_FUNCTION_FULL_NAME(filter_detect_outliers)(int x, int y, int pass_stride) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers); + STUB_ASSERT(KERNEL_ARCH, filter_detect_outliers); #else - kernel_filter_detect_outliers(x, y, image, variance, depth, output, load_int4(rect), pass_stride); + kernel_filter_detect_outliers( + x, y, image, variance, depth, output, load_int4(rect), pass_stride); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y, - float *mean, - float *variance, - float *a, - float *b, - int* prefilter_rect, - int r) +void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)( + int x, int y, float *mean, float *variance, float *a, float *b, int *prefilter_rect, int r) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_combine_halves); + STUB_ASSERT(KERNEL_ARCH, filter_combine_halves); #else - kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r); + kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, +void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float *buffer, TileInfo *tile_info, int x, int y, int storage_ofs, float *transform, int *rank, - int* prefilter_rect, + int *prefilter_rect, int pass_stride, int frame_stride, bool use_time, @@ -149,21 +154,22 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(float* buffer, float pca_threshold) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_construct_transform); + STUB_ASSERT(KERNEL_ARCH, filter_construct_transform); #else - rank += storage_ofs; - transform += storage_ofs*TRANSFORM_SIZE; - kernel_filter_construct_transform(buffer, - tile_info, - x, y, - load_int4(prefilter_rect), - pass_stride, - frame_stride, - use_time, - transform, - rank, - radius, - pca_threshold); + rank += storage_ofs; + transform += storage_ofs * TRANSFORM_SIZE; + kernel_filter_construct_transform(buffer, + tile_info, + x, + y, + load_int4(prefilter_rect), + pass_stride, + frame_stride, + use_time, + transform, + rank, + radius, + pca_threshold); #endif } @@ -181,44 +187,40 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_difference)(int dx, float k_2) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_difference); #else - kernel_filter_nlm_calc_difference(dx, dy, - weight_image, - variance_image, - scale_image, - difference_image, - load_int4(rect), - stride, - channel_offset, - frame_offset, - a, k_2); + kernel_filter_nlm_calc_difference(dx, + dy, + weight_image, + variance_image, + scale_image, + difference_image, + load_int4(rect), + stride, + channel_offset, + frame_offset, + a, + k_2); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)(float *difference_image, - float *out_image, - int *rect, - int stride, - int f) +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_blur)( + float *difference_image, float *out_image, int *rect, int stride, int f) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_blur); #else - kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f); + kernel_filter_nlm_blur(difference_image, out_image, load_int4(rect), stride, f); #endif } -void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)(float *difference_image, - float *out_image, - int *rect, - int stride, - int f) +void KERNEL_FUNCTION_FULL_NAME(filter_nlm_calc_weight)( + float *difference_image, float *out_image, int *rect, int stride, int f) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_calc_weight); #else - kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f); + kernel_filter_nlm_calc_weight(difference_image, out_image, load_int4(rect), stride, f); #endif } @@ -235,17 +237,19 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_update_output)(int dx, int f) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_update_output); #else - kernel_filter_nlm_update_output(dx, dy, - difference_image, - image, - temp_image, - out_image, - accum_image, - load_int4(rect), - channel_offset, - stride, f); + kernel_filter_nlm_update_output(dx, + dy, + difference_image, + image, + temp_image, + out_image, + accum_image, + load_int4(rect), + channel_offset, + stride, + f); #endif } @@ -267,19 +271,24 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_construct_gramian)(int dx, bool use_time) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_construct_gramian); #else - kernel_filter_nlm_construct_gramian(dx, dy, t, - difference_image, - buffer, - transform, rank, - XtWX, XtWY, - load_int4(rect), - load_int4(filter_window), - stride, f, - pass_stride, - frame_offset, - use_time); + kernel_filter_nlm_construct_gramian(dx, + dy, + t, + difference_image, + buffer, + transform, + rank, + XtWX, + XtWY, + load_int4(rect), + load_int4(filter_window), + stride, + f, + pass_stride, + frame_offset, + use_time); #endif } @@ -289,9 +298,9 @@ void KERNEL_FUNCTION_FULL_NAME(filter_nlm_normalize)(float *out_image, int stride) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize); + STUB_ASSERT(KERNEL_ARCH, filter_nlm_normalize); #else - kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride); + kernel_filter_nlm_normalize(out_image, accum_image, load_int4(rect), stride); #endif } @@ -306,12 +315,12 @@ void KERNEL_FUNCTION_FULL_NAME(filter_finalize)(int x, int sample) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, filter_finalize); + STUB_ASSERT(KERNEL_ARCH, filter_finalize); #else - XtWX += storage_ofs*XTWX_SIZE; - XtWY += storage_ofs*XTWY_SIZE; - rank += storage_ofs; - kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample); + XtWX += storage_ofs * XTWX_SIZE; + XtWY += storage_ofs * XTWY_SIZE; + rank += storage_ofs; + kernel_filter_finalize(x, y, buffer, rank, 1, XtWX, XtWY, load_int4(buffer_params), sample); #endif } diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp index f7c9935f1d0..6c6c3e78696 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse2.cpp @@ -27,7 +27,7 @@ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) # define __KERNEL_SSE2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_sse2 diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp index 070b95a3505..e2243000331 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse3.cpp @@ -29,7 +29,7 @@ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_sse3 diff --git a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp index 254025be4e2..068889365e3 100644 --- a/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/filter_sse41.cpp @@ -31,7 +31,7 @@ # define __KERNEL_SSSE3__ # define __KERNEL_SSE41__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ #include "kernel/filter/filter.h" #define KERNEL_ARCH cpu_sse41 diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index de487f6123f..f2146302a27 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -53,7 +53,7 @@ /* quiet unused define warnings */ #if defined(__KERNEL_SSE2__) - /* do nothing */ +/* do nothing */ #endif #include "kernel/kernel.h" @@ -66,29 +66,27 @@ CCL_NAMESPACE_BEGIN void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size) { - if(strcmp(name, "__data") == 0) - memcpy(&kg->__data, host, size); - else - assert(0); + if (strcmp(name, "__data") == 0) + memcpy(&kg->__data, host, size); + else + assert(0); } -void kernel_tex_copy(KernelGlobals *kg, - const char *name, - void *mem, - size_t size) +void kernel_tex_copy(KernelGlobals *kg, const char *name, void *mem, size_t size) { - if(0) { - } + if (0) { + } #define KERNEL_TEX(type, tname) \ - else if(strcmp(name, #tname) == 0) { \ - kg->tname.data = (type*)mem; \ - kg->tname.width = size; \ - } + else if (strcmp(name, #tname) == 0) \ + { \ + kg->tname.data = (type *)mem; \ + kg->tname.width = size; \ + } #include "kernel/kernel_textures.h" - else { - assert(0); - } + else { + assert(0); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp index a645fb4d8dd..0656fc9dd00 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx.cpp @@ -32,7 +32,7 @@ # define __KERNEL_SSE41__ # define __KERNEL_AVX__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx diff --git a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp index 6bbb87727b9..5baafdc699e 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp @@ -33,7 +33,7 @@ # define __KERNEL_AVX__ # define __KERNEL_AVX2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h index 6bdb8546a24..f5d981fb71a 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h @@ -16,25 +16,24 @@ /* Templated common declaration part of all CPU kernels. */ -void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, - float *buffer, - int sample, - int x, int y, - int offset, - int stride); +void KERNEL_FUNCTION_FULL_NAME(path_trace)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride); void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, - int offset, int stride); + int x, + int y, + int offset, + int stride); void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, + int x, + int y, int offset, int stride); @@ -49,24 +48,28 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, /* Split kernels */ -void KERNEL_FUNCTION_FULL_NAME(data_init)( - KernelGlobals *kg, - ccl_constant KernelData *data, - ccl_global void *split_data_buffer, - int num_elements, - ccl_global char *ray_state, - int start_sample, - int end_sample, - int sx, int sy, int sw, int sh, int offset, int stride, - ccl_global int *Queue_index, - int queuesize, - ccl_global char *use_queues_flag, - ccl_global unsigned int *work_pool_wgs, - unsigned int num_samples, - ccl_global float *buffer); +void KERNEL_FUNCTION_FULL_NAME(data_init)(KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, + int start_sample, + int end_sample, + int sx, + int sy, + int sw, + int sh, + int offset, + int stride, + ccl_global int *Queue_index, + int queuesize, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pool_wgs, + unsigned int num_samples, + ccl_global float *buffer); #define DECLARE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data); + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * data); DECLARE_SPLIT_KERNEL_FUNCTION(path_init) DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect) diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index ae4fd85780d..4289e2bbb85 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -19,523 +19,508 @@ CCL_NAMESPACE_BEGIN -template<typename T> struct TextureInterpolator { +template<typename T> struct TextureInterpolator { #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ - u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ - u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } (void) 0 - - static ccl_always_inline float4 read(float4 r) - { - return r; - } - - static ccl_always_inline float4 read(uchar4 r) - { - float f = 1.0f / 255.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - - static ccl_always_inline float4 read(uchar r) - { - float f = r * (1.0f / 255.0f); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(float r) - { - /* TODO(dingto): Optimize this, so interpolation - * happens on float instead of float4 */ - return make_float4(r, r, r, 1.0f); - } - - static ccl_always_inline float4 read(half4 r) - { - return half4_to_float4(r); - } - - static ccl_always_inline float4 read(half r) - { - float f = half_to_float(r); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(uint16_t r) - { - float f = r*(1.0f/65535.0f); - return make_float4(f, f, f, 1.0f); - } - - static ccl_always_inline float4 read(ushort4 r) - { - float f = 1.0f/65535.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - - static ccl_always_inline float4 read(const T *data, - int x, int y, - int width, int height) - { - if(x < 0 || y < 0 || x >= width || y >= height) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[y * width + x]); - } - - static ccl_always_inline int wrap_periodic(int x, int width) - { - x %= width; - if(x < 0) - x += width; - return x; - } - - static ccl_always_inline int wrap_clamp(int x, int width) - { - return clamp(x, 0, width-1); - } - - static ccl_always_inline float frac(float x, int *ix) - { - int i = float_to_int(x) - ((x < 0.0f)? 1: 0); - *ix = i; - return x - (float)i; - } - - /* ******** 2D interpolation ******** */ - - static ccl_always_inline float4 interp_closest(const TextureInfo& info, - float x, float y) - { - const T *data = (const T*)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy; - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return read(data[ix + iy*width]); - } - - static ccl_always_inline float4 interp_linear(const TextureInfo& info, - float x, float y) - { - const T *data = (const T*)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy, nix, niy; - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - break; - case EXTENSION_CLIP: - nix = ix + 1; - niy = iy + 1; - break; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) + - (1.0f - ty) * tx * read(data, nix, iy, width, height) + - ty * (1.0f - tx) * read(data, ix, niy, width, height) + - ty * tx * read(data, nix, niy, width, height); - } - - static ccl_always_inline float4 interp_cubic(const TextureInfo& info, - float x, float y) - { - const T *data = (const T*)info.data; - const int width = info.width; - const int height = info.height; - int ix, iy, nix, niy; - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - int pix, piy, nnix, nniy; - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - break; - case EXTENSION_CLIP: - pix = ix - 1; - piy = iy - 1; - nix = ix + 1; - niy = iy + 1; - nnix = ix + 2; - nniy = iy + 2; - break; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {piy, iy, niy, nniy}; - float u[4], v[4]; - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ + { \ + u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ + u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \ + u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } \ + (void)0 + + static ccl_always_inline float4 read(float4 r) + { + return r; + } + + static ccl_always_inline float4 read(uchar4 r) + { + float f = 1.0f / 255.0f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + + static ccl_always_inline float4 read(uchar r) + { + float f = r * (1.0f / 255.0f); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(float r) + { + /* TODO(dingto): Optimize this, so interpolation + * happens on float instead of float4 */ + return make_float4(r, r, r, 1.0f); + } + + static ccl_always_inline float4 read(half4 r) + { + return half4_to_float4(r); + } + + static ccl_always_inline float4 read(half r) + { + float f = half_to_float(r); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(uint16_t r) + { + float f = r * (1.0f / 65535.0f); + return make_float4(f, f, f, 1.0f); + } + + static ccl_always_inline float4 read(ushort4 r) + { + float f = 1.0f / 65535.0f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + + static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height) + { + if (x < 0 || y < 0 || x >= width || y >= height) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return read(data[y * width + x]); + } + + static ccl_always_inline int wrap_periodic(int x, int width) + { + x %= width; + if (x < 0) + x += width; + return x; + } + + static ccl_always_inline int wrap_clamp(int x, int width) + { + return clamp(x, 0, width - 1); + } + + static ccl_always_inline float frac(float x, int *ix) + { + int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); + *ix = i; + return x - (float)i; + } + + /* ******** 2D interpolation ******** */ + + static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y) + { + const T *data = (const T *)info.data; + const int width = info.width; + const int height = info.height; + int ix, iy; + frac(x * (float)width, &ix); + frac(y * (float)height, &iy); + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return read(data[ix + iy * width]); + } + + static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y) + { + const T *data = (const T *)info.data; + const int width = info.width; + const int height = info.height; + int ix, iy, nix, niy; + const float tx = frac(x * (float)width - 0.5f, &ix); + const float ty = frac(y * (float)height - 0.5f, &iy); + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + break; + case EXTENSION_CLIP: + nix = ix + 1; + niy = iy + 1; + break; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) + + (1.0f - ty) * tx * read(data, nix, iy, width, height) + + ty * (1.0f - tx) * read(data, ix, niy, width, height) + + ty * tx * read(data, nix, niy, width, height); + } + + static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y) + { + const T *data = (const T *)info.data; + const int width = info.width; + const int height = info.height; + int ix, iy, nix, niy; + const float tx = frac(x * (float)width - 0.5f, &ix); + const float ty = frac(y * (float)height - 0.5f, &iy); + int pix, piy, nnix, nniy; + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + pix = wrap_periodic(ix - 1, width); + piy = wrap_periodic(iy - 1, height); + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + nnix = wrap_periodic(ix + 2, width); + nniy = wrap_periodic(iy + 2, height); + break; + case EXTENSION_CLIP: + pix = ix - 1; + piy = iy - 1; + nix = ix + 1; + niy = iy + 1; + nnix = ix + 2; + nniy = iy + 2; + break; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix - 1, width); + piy = wrap_clamp(iy - 1, height); + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + nnix = wrap_clamp(ix + 2, width); + nniy = wrap_clamp(iy + 2, height); + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {piy, iy, niy, nniy}; + float u[4], v[4]; + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ #define DATA(x, y) (read(data, xc[x], yc[y], width, height)) #define TERM(col) \ - (v[col] * (u[0] * DATA(0, col) + \ - u[1] * DATA(1, col) + \ - u[2] * DATA(2, col) + \ - u[3] * DATA(3, col))) + (v[col] * \ + (u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col))) - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); - /* Actual interpolation. */ - return TERM(0) + TERM(1) + TERM(2) + TERM(3); + /* Actual interpolation. */ + return TERM(0) + TERM(1) + TERM(2) + TERM(3); #undef TERM #undef DATA - } - - static ccl_always_inline float4 interp(const TextureInfo& info, - float x, float y) - { - if(UNLIKELY(!info.data)) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - switch(info.interpolation) { - case INTERPOLATION_CLOSEST: - return interp_closest(info, x, y); - case INTERPOLATION_LINEAR: - return interp_linear(info, x, y); - default: - return interp_cubic(info, x, y); - } - } - - /* ******** 3D interpolation ******** */ - - static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info, - float x, float y, float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - - frac(x*(float)width, &ix); - frac(y*(float)height, &iy); - frac(z*(float)depth, &iz); - - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const T *data = (const T*)info.data; - return read(data[ix + iy*width + iz*width*height]); - } - - static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info, - float x, float y, float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - int nix, niy, niz; - - float tx = frac(x*(float)width - 0.5f, &ix); - float ty = frac(y*(float)height - 0.5f, &iy); - float tz = frac(z*(float)depth - 0.5f, &iz); - - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const T *data = (const T*)info.data; - float4 r; - - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); - r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); - r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); - r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); - - r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); - r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); - r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); - r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); - - return r; - } - - /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are - * causing stack overflow issue in this function unless it is inlined. - * - * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization - * enabled. - */ + } + + static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y) + { + if (UNLIKELY(!info.data)) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + switch (info.interpolation) { + case INTERPOLATION_CLOSEST: + return interp_closest(info, x, y); + case INTERPOLATION_LINEAR: + return interp_linear(info, x, y); + default: + return interp_cubic(info, x, y); + } + } + + /* ******** 3D interpolation ******** */ + + static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info, + float x, + float y, + float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + + frac(x * (float)width, &ix); + frac(y * (float)height, &iy); + frac(z * (float)depth, &iz); + + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T *)info.data; + return read(data[ix + iy * width + iz * width * height]); + } + + static ccl_always_inline float4 interp_3d_linear(const TextureInfo &info, + float x, + float y, + float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + + float tx = frac(x * (float)width - 0.5f, &ix); + float ty = frac(y * (float)height - 0.5f, &iy); + float tz = frac(z * (float)depth - 0.5f, &iz); + + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + niz = wrap_periodic(iz + 1, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + niz = wrap_clamp(iz + 1, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const T *data = (const T *)info.data; + float4 r; + + r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * + read(data[ix + iy * width + iz * width * height]); + r += (1.0f - tz) * (1.0f - ty) * tx * read(data[nix + iy * width + iz * width * height]); + r += (1.0f - tz) * ty * (1.0f - tx) * read(data[ix + niy * width + iz * width * height]); + r += (1.0f - tz) * ty * tx * read(data[nix + niy * width + iz * width * height]); + + r += tz * (1.0f - ty) * (1.0f - tx) * read(data[ix + iy * width + niz * width * height]); + r += tz * (1.0f - ty) * tx * read(data[nix + iy * width + niz * width * height]); + r += tz * ty * (1.0f - tx) * read(data[ix + niy * width + niz * width * height]); + r += tz * ty * tx * read(data[nix + niy * width + niz * width * height]); + + return r; + } + + /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are + * causing stack overflow issue in this function unless it is inlined. + * + * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization + * enabled. + */ #if defined(__GNUC__) || defined(__clang__) - static ccl_always_inline + static ccl_always_inline #else - static ccl_never_inline + static ccl_never_inline #endif - float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z) - { - int width = info.width; - int height = info.height; - int depth = info.depth; - int ix, iy, iz; - int nix, niy, niz; - /* Tricubic b-spline interpolation. */ - const float tx = frac(x*(float)width - 0.5f, &ix); - const float ty = frac(y*(float)height - 0.5f, &iy); - const float tz = frac(z*(float)depth - 0.5f, &iz); - int pix, piy, piz, nnix, nniy, nniz; - - switch(info.extension) { - case EXTENSION_REPEAT: - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - iz = wrap_periodic(iz, depth); - - pix = wrap_periodic(ix-1, width); - piy = wrap_periodic(iy-1, height); - piz = wrap_periodic(iz-1, depth); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); - niz = wrap_periodic(iz+1, depth); - - nnix = wrap_periodic(ix+2, width); - nniy = wrap_periodic(iy+2, height); - nniz = wrap_periodic(iz+2, depth); - break; - case EXTENSION_CLIP: - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - ATTR_FALLTHROUGH; - case EXTENSION_EXTEND: - pix = wrap_clamp(ix-1, width); - piy = wrap_clamp(iy-1, height); - piz = wrap_clamp(iz-1, depth); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); - niz = wrap_clamp(iz+1, depth); - - nnix = wrap_clamp(ix+2, width); - nniy = wrap_clamp(iy+2, height); - nniz = wrap_clamp(iz+2, depth); - - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - iz = wrap_clamp(iz, depth); - break; - default: - kernel_assert(0); - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {width * piy, - width * iy, - width * niy, - width * nniy}; - const int zc[4] = {width * height * piz, - width * height * iz, - width * height * niz, - width * height * nniz}; - float u[4], v[4], w[4]; - - /* Some helper macro to keep code reasonable size, - * let compiler to inline all the matrix multiplications. - */ + float4 + interp_3d_tricubic(const TextureInfo &info, float x, float y, float z) + { + int width = info.width; + int height = info.height; + int depth = info.depth; + int ix, iy, iz; + int nix, niy, niz; + /* Tricubic b-spline interpolation. */ + const float tx = frac(x * (float)width - 0.5f, &ix); + const float ty = frac(y * (float)height - 0.5f, &iy); + const float tz = frac(z * (float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + switch (info.extension) { + case EXTENSION_REPEAT: + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix - 1, width); + piy = wrap_periodic(iy - 1, height); + piz = wrap_periodic(iz - 1, depth); + + nix = wrap_periodic(ix + 1, width); + niy = wrap_periodic(iy + 1, height); + niz = wrap_periodic(iz + 1, depth); + + nnix = wrap_periodic(ix + 2, width); + nniy = wrap_periodic(iy + 2, height); + nniz = wrap_periodic(iz + 2, depth); + break; + case EXTENSION_CLIP: + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + ATTR_FALLTHROUGH; + case EXTENSION_EXTEND: + pix = wrap_clamp(ix - 1, width); + piy = wrap_clamp(iy - 1, height); + piz = wrap_clamp(iz - 1, depth); + + nix = wrap_clamp(ix + 1, width); + niy = wrap_clamp(iy + 1, height); + niz = wrap_clamp(iz + 1, depth); + + nnix = wrap_clamp(ix + 2, width); + nniy = wrap_clamp(iy + 2, height); + nniz = wrap_clamp(iz + 2, depth); + + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + break; + default: + kernel_assert(0); + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, width * iy, width * niy, width * nniy}; + const int zc[4] = { + width * height * piz, width * height * iz, width * height * niz, width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) #define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + \ - u[1] * DATA(1, col, row) + \ - u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) + (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) #define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + \ - COL_TERM(1, row) + \ - COL_TERM(2, row) + \ - COL_TERM(3, row))) + (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row))) - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); - /* Actual interpolation. */ - const T *data = (const T*)info.data; - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + /* Actual interpolation. */ + const T *data = (const T *)info.data; + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); #undef COL_TERM #undef ROW_TERM #undef DATA - } - - static ccl_always_inline float4 interp_3d(const TextureInfo& info, - float x, float y, float z, - InterpolationType interp) - { - if(UNLIKELY(!info.data)) - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) { - case INTERPOLATION_CLOSEST: - return interp_3d_closest(info, x, y, z); - case INTERPOLATION_LINEAR: - return interp_3d_linear(info, x, y, z); - default: - return interp_3d_tricubic(info, x, y, z); - } - } + } + + static ccl_always_inline float4 + interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp) + { + if (UNLIKELY(!info.data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) { + case INTERPOLATION_CLOSEST: + return interp_3d_closest(info, x, y, z); + case INTERPOLATION_LINEAR: + return interp_3d_linear(info, x, y, z); + default: + return interp_3d_tricubic(info, x, y, z); + } + } #undef SET_CUBIC_SPLINE_WEIGHTS }; ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { - const TextureInfo& info = kernel_tex_fetch(__texture_info, id); - - switch(kernel_tex_type(id)) { - case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp(info, x, y); - case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp(info, x, y); - case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp(info, x, y); - case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp(info, x, y); - case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp(info, x, y); - case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp(info, x, y); - case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp(info, x, y); - case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp(info, x, y); - default: - assert(0); - return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); - } + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + + switch (kernel_tex_type(id)) { + case IMAGE_DATA_TYPE_HALF: + return TextureInterpolator<half>::interp(info, x, y); + case IMAGE_DATA_TYPE_BYTE: + return TextureInterpolator<uchar>::interp(info, x, y); + case IMAGE_DATA_TYPE_USHORT: + return TextureInterpolator<uint16_t>::interp(info, x, y); + case IMAGE_DATA_TYPE_FLOAT: + return TextureInterpolator<float>::interp(info, x, y); + case IMAGE_DATA_TYPE_HALF4: + return TextureInterpolator<half4>::interp(info, x, y); + case IMAGE_DATA_TYPE_BYTE4: + return TextureInterpolator<uchar4>::interp(info, x, y); + case IMAGE_DATA_TYPE_USHORT4: + return TextureInterpolator<ushort4>::interp(info, x, y); + case IMAGE_DATA_TYPE_FLOAT4: + return TextureInterpolator<float4>::interp(info, x, y); + default: + assert(0); + return make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } } -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) +ccl_device float4 kernel_tex_image_interp_3d( + KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) { - const TextureInfo& info = kernel_tex_fetch(__texture_info, id); - - switch(kernel_tex_type(id)) { - case IMAGE_DATA_TYPE_HALF: - return TextureInterpolator<half>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_BYTE: - return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_USHORT: - return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_FLOAT: - return TextureInterpolator<float>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_HALF4: - return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_BYTE4: - return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_USHORT4: - return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp); - case IMAGE_DATA_TYPE_FLOAT4: - return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp); - default: - assert(0); - return make_float4(TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); - } + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + + switch (kernel_tex_type(id)) { + case IMAGE_DATA_TYPE_HALF: + return TextureInterpolator<half>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_BYTE: + return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_USHORT: + return TextureInterpolator<uint16_t>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_FLOAT: + return TextureInterpolator<float>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_HALF4: + return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_BYTE4: + return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_USHORT4: + return TextureInterpolator<ushort4>::interp_3d(info, x, y, z, interp); + case IMAGE_DATA_TYPE_FLOAT4: + return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp); + default: + assert(0); + return make_float4( + TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h index 759b7e4c20d..9ca3f46b5b6 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h @@ -58,14 +58,15 @@ # include "kernel/split/kernel_next_iteration_setup.h" # include "kernel/split/kernel_indirect_subsurface.h" # include "kernel/split/kernel_buffer_update.h" -# endif /* __SPLIT_KERNEL__ */ +# endif /* __SPLIT_KERNEL__ */ #else -# define STUB_ASSERT(arch, name) assert(!(#name " kernel stub for architecture " #arch " was called!")) +# define STUB_ASSERT(arch, name) \ + assert(!(#name " kernel stub for architecture " #arch " was called!")) # ifdef __SPLIT_KERNEL__ # include "kernel/split/kernel_data_init.h" -# endif /* __SPLIT_KERNEL__ */ -#endif /* KERNEL_STUB */ +# endif /* __SPLIT_KERNEL__ */ +#endif /* KERNEL_STUB */ CCL_NAMESPACE_BEGIN @@ -73,31 +74,22 @@ CCL_NAMESPACE_BEGIN /* Path Tracing */ -void KERNEL_FUNCTION_FULL_NAME(path_trace)(KernelGlobals *kg, - float *buffer, - int sample, - int x, int y, - int offset, - int stride) +void KERNEL_FUNCTION_FULL_NAME(path_trace)( + KernelGlobals *kg, float *buffer, int sample, int x, int y, int offset, int stride) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, path_trace); -#else -# ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) { - kernel_branched_path_trace(kg, - buffer, - sample, - x, y, - offset, - stride); - } - else -# endif - { - kernel_path_trace(kg, buffer, sample, x, y, offset, stride); - } -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, path_trace); +# else +# ifdef __BRANCHED_PATH__ + if (kernel_data.integrator.branched) { + kernel_branched_path_trace(kg, buffer, sample, x, y, offset, stride); + } + else +# endif + { + kernel_path_trace(kg, buffer, sample, x, y, offset, stride); + } +# endif /* KERNEL_STUB */ } /* Film */ @@ -106,42 +98,32 @@ void KERNEL_FUNCTION_FULL_NAME(convert_to_byte)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, + int x, + int y, int offset, int stride) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, convert_to_byte); -#else - kernel_film_convert_to_byte(kg, - rgba, - buffer, - sample_scale, - x, y, - offset, - stride); -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, convert_to_byte); +# else + kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride); +# endif /* KERNEL_STUB */ } void KERNEL_FUNCTION_FULL_NAME(convert_to_half_float)(KernelGlobals *kg, uchar4 *rgba, float *buffer, float sample_scale, - int x, int y, + int x, + int y, int offset, int stride) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, convert_to_half_float); -#else - kernel_film_convert_to_half_float(kg, - rgba, - buffer, - sample_scale, - x, y, - offset, - stride); -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, convert_to_half_float); +# else + kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride); +# endif /* KERNEL_STUB */ } /* Shader Evaluate */ @@ -155,60 +137,53 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg, int offset, int sample) { -#ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, shader); -#else - if(type >= SHADER_EVAL_BAKE) { -# ifdef __BAKING__ - kernel_bake_evaluate(kg, - input, - output, - (ShaderEvalType)type, - filter, - i, - offset, - sample); -# endif - } - else if(type == SHADER_EVAL_DISPLACE) { - kernel_displace_evaluate(kg, input, output, i); - } - else { - kernel_background_evaluate(kg, input, output, i); - } -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB + STUB_ASSERT(KERNEL_ARCH, shader); +# else + if (type >= SHADER_EVAL_BAKE) { +# ifdef __BAKING__ + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, i, offset, sample); +# endif + } + else if (type == SHADER_EVAL_DISPLACE) { + kernel_displace_evaluate(kg, input, output, i); + } + else { + kernel_background_evaluate(kg, input, output, i); + } +# endif /* KERNEL_STUB */ } -#else /* __SPLIT_KERNEL__ */ +#else /* __SPLIT_KERNEL__ */ /* Split Kernel Path Tracing */ -#ifdef KERNEL_STUB -# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - STUB_ASSERT(KERNEL_ARCH, name); \ - } - -# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - STUB_ASSERT(KERNEL_ARCH, name); \ - } -#else -# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - kernel_##name(kg); \ - } - -# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ - void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData* /*data*/) \ - { \ - ccl_local type locals; \ - kernel_##name(kg, &locals); \ - } -#endif /* KERNEL_STUB */ +# ifdef KERNEL_STUB +# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + STUB_ASSERT(KERNEL_ARCH, name); \ + } + +# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + STUB_ASSERT(KERNEL_ARCH, name); \ + } +# else +# define DEFINE_SPLIT_KERNEL_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + kernel_##name(kg); \ + } + +# define DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(name, type) \ + void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals * kg, KernelData * /*data*/) \ + { \ + ccl_local type locals; \ + kernel_##name(kg, &locals); \ + } +# endif /* KERNEL_STUB */ DEFINE_SPLIT_KERNEL_FUNCTION(path_init) DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect) @@ -219,7 +194,8 @@ DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals) DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval) -DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals) +DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, + BackgroundAOLocals) DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint) DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao) @@ -228,7 +204,7 @@ DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(enqueue_inactive, uint) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint) DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface) DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint) -#endif /* __SPLIT_KERNEL__ */ +#endif /* __SPLIT_KERNEL__ */ #undef KERNEL_STUB #undef STUB_ASSERT diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp index c5e199b0a69..989f5e5aaa8 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split.cpp @@ -54,7 +54,7 @@ /* quiet unused define warnings */ #if defined(__KERNEL_SSE2__) - /* do nothing */ +/* do nothing */ #endif #include "kernel/kernel.h" diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp index 6ba3425a343..1b2e2516751 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp @@ -34,7 +34,7 @@ # define __KERNEL_SSE41__ # define __KERNEL_AVX__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp index 76b2d77ebb8..43b8bfbf864 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp @@ -35,7 +35,7 @@ # define __KERNEL_AVX__ # define __KERNEL_AVX2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_avx2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp index b468b6f44c8..9743789179d 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp @@ -29,7 +29,7 @@ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) # define __KERNEL_SSE2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp index 3e5792d0b17..1bec7633500 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp @@ -31,7 +31,7 @@ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse3 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp index 3629f21cd29..c0efc2350e9 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp @@ -32,7 +32,7 @@ # define __KERNEL_SSSE3__ # define __KERNEL_SSE41__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse41 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp index 57530c88710..173be8e93ce 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp @@ -27,7 +27,7 @@ # if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) # define __KERNEL_SSE2__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse2 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp index c607753bc4b..31273fe3344 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp @@ -29,7 +29,7 @@ # define __KERNEL_SSE3__ # define __KERNEL_SSSE3__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse3 diff --git a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp index a278554731c..1d020b7fee6 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp @@ -30,7 +30,7 @@ # define __KERNEL_SSSE3__ # define __KERNEL_SSE41__ # endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ +#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 */ #include "kernel/kernel.h" #define KERNEL_ARCH cpu_sse41 diff --git a/intern/cycles/kernel/kernels/cuda/kernel_config.h b/intern/cycles/kernel/kernels/cuda/kernel_config.h index 6d41dc15785..d9f349837a8 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_config.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_config.h @@ -81,7 +81,6 @@ # define CUDA_KERNEL_MAX_REGISTERS 64 # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72 - /* unknown architecture */ #else # error "Unknown or unsupported CUDA architecture, can't determine launch bounds" @@ -96,18 +95,19 @@ * given the maximum number of registers per thread. */ #define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ - __launch_bounds__( \ - threads_block_width*threads_block_width, \ - CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \ - ) + __launch_bounds__(threads_block_width *threads_block_width, \ + CUDA_MULTIPRESSOR_MAX_REGISTERS / \ + (threads_block_width * threads_block_width * thread_num_registers)) /* sanity checks */ -#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS +#if CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS # error "Maximum number of threads per block exceeded" #endif -#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS +#if CUDA_MULTIPRESSOR_MAX_REGISTERS / \ + (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH * CUDA_KERNEL_MAX_REGISTERS) > \ + CUDA_MULTIPROCESSOR_MAX_BLOCKS # error "Maximum number of blocks per multiprocessor exceeded" #endif diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h index 37cfbbcb235..7c68f08ea10 100644 --- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h +++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h @@ -17,174 +17,165 @@ /* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */ ccl_device float cubic_w0(float a) { - return (1.0f/6.0f)*(a*(a*(-a + 3.0f) - 3.0f) + 1.0f); + return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); } ccl_device float cubic_w1(float a) { - return (1.0f/6.0f)*(a*a*(3.0f*a - 6.0f) + 4.0f); + return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); } ccl_device float cubic_w2(float a) { - return (1.0f/6.0f)*(a*(a*(-3.0f*a + 3.0f) + 3.0f) + 1.0f); + return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); } ccl_device float cubic_w3(float a) { - return (1.0f/6.0f)*(a*a*a); + return (1.0f / 6.0f) * (a * a * a); } /* g0 and g1 are the two amplitude functions. */ ccl_device float cubic_g0(float a) { - return cubic_w0(a) + cubic_w1(a); + return cubic_w0(a) + cubic_w1(a); } ccl_device float cubic_g1(float a) { - return cubic_w2(a) + cubic_w3(a); + return cubic_w2(a) + cubic_w3(a); } /* h0 and h1 are the two offset functions */ ccl_device float cubic_h0(float a) { - /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ - return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f; + /* Note +0.5 offset to compensate for CUDA linear filtering convention. */ + return -1.0f + cubic_w1(a) / (cubic_w0(a) + cubic_w1(a)) + 0.5f; } ccl_device float cubic_h1(float a) { - return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; + return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f; } /* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */ template<typename T> -ccl_device T kernel_tex_image_interp_bicubic(const TextureInfo& info, CUtexObject tex, float x, float y) +ccl_device T +kernel_tex_image_interp_bicubic(const TextureInfo &info, CUtexObject tex, float x, float y) { - x = (x * info.width) - 0.5f; - y = (y * info.height) - 0.5f; - - float px = floor(x); - float py = floor(y); - float fx = x - px; - float fy = y - py; - - float g0x = cubic_g0(fx); - float g1x = cubic_g1(fx); - float x0 = (px + cubic_h0(fx)) / info.width; - float x1 = (px + cubic_h1(fx)) / info.width; - float y0 = (py + cubic_h0(fy)) / info.height; - float y1 = (py + cubic_h1(fy)) / info.height; - - return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + - g1x * tex2D<T>(tex, x1, y0)) + - cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + - g1x * tex2D<T>(tex, x1, y1)); + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + + float px = floor(x); + float py = floor(y); + float fx = x - px; + float fy = y - py; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + + return cubic_g0(fy) * (g0x * tex2D<T>(tex, x0, y0) + g1x * tex2D<T>(tex, x1, y0)) + + cubic_g1(fy) * (g0x * tex2D<T>(tex, x0, y1) + g1x * tex2D<T>(tex, x1, y1)); } /* Fast tricubic texture lookup using 8 trilinear lookups. */ template<typename T> -ccl_device T kernel_tex_image_interp_bicubic_3d(const TextureInfo& info, CUtexObject tex, float x, float y, float z) +ccl_device T kernel_tex_image_interp_bicubic_3d( + const TextureInfo &info, CUtexObject tex, float x, float y, float z) { - x = (x * info.width) - 0.5f; - y = (y * info.height) - 0.5f; - z = (z * info.depth) - 0.5f; - - float px = floor(x); - float py = floor(y); - float pz = floor(z); - float fx = x - px; - float fy = y - py; - float fz = z - pz; - - float g0x = cubic_g0(fx); - float g1x = cubic_g1(fx); - float g0y = cubic_g0(fy); - float g1y = cubic_g1(fy); - float g0z = cubic_g0(fz); - float g1z = cubic_g1(fz); - - float x0 = (px + cubic_h0(fx)) / info.width; - float x1 = (px + cubic_h1(fx)) / info.width; - float y0 = (py + cubic_h0(fy)) / info.height; - float y1 = (py + cubic_h1(fy)) / info.height; - float z0 = (pz + cubic_h0(fz)) / info.depth; - float z1 = (pz + cubic_h1(fz)) / info.depth; - - return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + - g1x * tex3D<T>(tex, x1, y0, z0)) + - g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + - g1x * tex3D<T>(tex, x1, y1, z0))) + - g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + - g1x * tex3D<T>(tex, x1, y0, z1)) + - g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + - g1x * tex3D<T>(tex, x1, y1, z1))); + x = (x * info.width) - 0.5f; + y = (y * info.height) - 0.5f; + z = (z * info.depth) - 0.5f; + + float px = floor(x); + float py = floor(y); + float pz = floor(z); + float fx = x - px; + float fy = y - py; + float fz = z - pz; + + float g0x = cubic_g0(fx); + float g1x = cubic_g1(fx); + float g0y = cubic_g0(fy); + float g1y = cubic_g1(fy); + float g0z = cubic_g0(fz); + float g1z = cubic_g1(fz); + + float x0 = (px + cubic_h0(fx)) / info.width; + float x1 = (px + cubic_h1(fx)) / info.width; + float y0 = (py + cubic_h0(fy)) / info.height; + float y1 = (py + cubic_h1(fy)) / info.height; + float z0 = (pz + cubic_h0(fz)) / info.depth; + float z1 = (pz + cubic_h1(fz)) / info.depth; + + return g0z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z0) + g1x * tex3D<T>(tex, x1, y0, z0)) + + g1y * (g0x * tex3D<T>(tex, x0, y1, z0) + g1x * tex3D<T>(tex, x1, y1, z0))) + + g1z * (g0y * (g0x * tex3D<T>(tex, x0, y0, z1) + g1x * tex3D<T>(tex, x1, y0, z1)) + + g1y * (g0x * tex3D<T>(tex, x0, y1, z1) + g1x * tex3D<T>(tex, x1, y1, z1))); } ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { - const TextureInfo& info = kernel_tex_fetch(__texture_info, id); - CUtexObject tex = (CUtexObject)info.data; - - /* float4, byte4, ushort4 and half4 */ - const int texture_type = kernel_tex_type(id); - if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || - texture_type == IMAGE_DATA_TYPE_BYTE4 || - texture_type == IMAGE_DATA_TYPE_HALF4 || - texture_type == IMAGE_DATA_TYPE_USHORT4) - { - if(info.interpolation == INTERPOLATION_CUBIC) { - return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y); - } - else { - return tex2D<float4>(tex, x, y); - } - } - /* float, byte and half */ - else { - float f; - - if(info.interpolation == INTERPOLATION_CUBIC) { - f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y); - } - else { - f = tex2D<float>(tex, x, y); - } - - return make_float4(f, f, f, 1.0f); - } + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + CUtexObject tex = (CUtexObject)info.data; + + /* float4, byte4, ushort4 and half4 */ + const int texture_type = kernel_tex_type(id); + if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { + if (info.interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic<float4>(info, tex, x, y); + } + else { + return tex2D<float4>(tex, x, y); + } + } + /* float, byte and half */ + else { + float f; + + if (info.interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic<float>(info, tex, x, y); + } + else { + f = tex2D<float>(tex, x, y); + } + + return make_float4(f, f, f, 1.0f); + } } -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) +ccl_device float4 kernel_tex_image_interp_3d( + KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp) { - const TextureInfo& info = kernel_tex_fetch(__texture_info, id); - CUtexObject tex = (CUtexObject)info.data; - uint interpolation = (interp == INTERPOLATION_NONE)? info.interpolation: interp; - - const int texture_type = kernel_tex_type(id); - if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || - texture_type == IMAGE_DATA_TYPE_BYTE4 || - texture_type == IMAGE_DATA_TYPE_HALF4 || - texture_type == IMAGE_DATA_TYPE_USHORT4) - { - if(interpolation == INTERPOLATION_CUBIC) { - return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z); - } - else { - return tex3D<float4>(tex, x, y, z); - } - } - else { - float f; - - if(interpolation == INTERPOLATION_CUBIC) { - f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z); - } - else { - f = tex3D<float>(tex, x, y, z); - } - - return make_float4(f, f, f, 1.0f); - } + const TextureInfo &info = kernel_tex_fetch(__texture_info, id); + CUtexObject tex = (CUtexObject)info.data; + uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp; + + const int texture_type = kernel_tex_type(id); + if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || + texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4) { + if (interpolation == INTERPOLATION_CUBIC) { + return kernel_tex_image_interp_bicubic_3d<float4>(info, tex, x, y, z); + } + else { + return tex3D<float4>(tex, x, y, z); + } + } + else { + float f; + + if (interpolation == INTERPOLATION_CUBIC) { + f = kernel_tex_image_interp_bicubic_3d<float>(info, tex, x, y, z); + } + else { + f = tex3D<float>(tex, x, y, z); + } + + return make_float4(f, f, f, 1.0f); + } } diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h index 79af831c2fb..b6390679331 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h @@ -16,254 +16,257 @@ /* For OpenCL we do manual lookup and interpolation. */ -ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) { - const uint tex_offset = id -#define KERNEL_TEX(type, name) + 1 +ccl_device_inline ccl_global TextureInfo *kernel_tex_info(KernelGlobals *kg, uint id) +{ + const uint tex_offset = id +#define KERNEL_TEX(type, name) +1 #include "kernel/kernel_textures.h" - ; + ; - return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset]; + return &((ccl_global TextureInfo *)kg->buffers[0])[tex_offset]; } -#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)] +#define tex_fetch(type, info, index) \ + ((ccl_global type *)(kg->buffers[info->cl_buffer] + info->data))[(index)] ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) { - x %= width; - if(x < 0) - x += width; - return x; + x %= width; + if (x < 0) + x += width; + return x; } ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width) { - return clamp(x, 0, width-1); + return clamp(x, 0, width - 1); } -ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_global TextureInfo *info, int id, int offset) +ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, + const ccl_global TextureInfo *info, + int id, + int offset) { - const int texture_type = kernel_tex_type(id); - - /* Float4 */ - if(texture_type == IMAGE_DATA_TYPE_FLOAT4) { - return tex_fetch(float4, info, offset); - } - /* Byte4 */ - else if(texture_type == IMAGE_DATA_TYPE_BYTE4) { - uchar4 r = tex_fetch(uchar4, info, offset); - float f = 1.0f/255.0f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - /* Ushort4 */ - else if(texture_type == IMAGE_DATA_TYPE_USHORT4) { - ushort4 r = tex_fetch(ushort4, info, offset); - float f = 1.0f/65535.f; - return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); - } - /* Float */ - else if(texture_type == IMAGE_DATA_TYPE_FLOAT) { - float f = tex_fetch(float, info, offset); - return make_float4(f, f, f, 1.0f); - } - /* UShort */ - else if(texture_type == IMAGE_DATA_TYPE_USHORT) { - ushort r = tex_fetch(ushort, info, offset); - float f = r * (1.0f / 65535.0f); - return make_float4(f, f, f, 1.0f); - } - /* Byte */ + const int texture_type = kernel_tex_type(id); + + /* Float4 */ + if (texture_type == IMAGE_DATA_TYPE_FLOAT4) { + return tex_fetch(float4, info, offset); + } + /* Byte4 */ + else if (texture_type == IMAGE_DATA_TYPE_BYTE4) { + uchar4 r = tex_fetch(uchar4, info, offset); + float f = 1.0f / 255.0f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + /* Ushort4 */ + else if (texture_type == IMAGE_DATA_TYPE_USHORT4) { + ushort4 r = tex_fetch(ushort4, info, offset); + float f = 1.0f / 65535.f; + return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); + } + /* Float */ + else if (texture_type == IMAGE_DATA_TYPE_FLOAT) { + float f = tex_fetch(float, info, offset); + return make_float4(f, f, f, 1.0f); + } + /* UShort */ + else if (texture_type == IMAGE_DATA_TYPE_USHORT) { + ushort r = tex_fetch(ushort, info, offset); + float f = r * (1.0f / 65535.0f); + return make_float4(f, f, f, 1.0f); + } + /* Byte */ #ifdef cl_khr_fp16 - /* half and half4 are optional in OpenCL */ - else if(texture_type == IMAGE_DATA_TYPE_HALF) { - float f = tex_fetch(half, info, offset); - return make_float4(f, f, f, 1.0f); - } - else if(texture_type == IMAGE_DATA_TYPE_HALF4) { - half4 r = tex_fetch(half4, info, offset); - return make_float4(r.x, r.y, r.z, r.w); - } + /* half and half4 are optional in OpenCL */ + else if (texture_type == IMAGE_DATA_TYPE_HALF) { + float f = tex_fetch(half, info, offset); + return make_float4(f, f, f, 1.0f); + } + else if (texture_type == IMAGE_DATA_TYPE_HALF4) { + half4 r = tex_fetch(half4, info, offset); + return make_float4(r.x, r.y, r.z, r.w); + } #endif - else { - uchar r = tex_fetch(uchar, info, offset); - float f = r * (1.0f/255.0f); - return make_float4(f, f, f, 1.0f); - } + else { + uchar r = tex_fetch(uchar, info, offset); + float f = r * (1.0f / 255.0f); + return make_float4(f, f, f, 1.0f); + } } ccl_device_inline float4 svm_image_texture_read_2d(KernelGlobals *kg, int id, int x, int y) { - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - - /* Wrap */ - if(info->extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info->width); - y = svm_image_texture_wrap_periodic(y, info->height); - } - else { - x = svm_image_texture_wrap_clamp(x, info->width); - y = svm_image_texture_wrap_clamp(y, info->height); - } - - int offset = x + info->width * y; - return svm_image_texture_read(kg, info, id, offset); + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + /* Wrap */ + if (info->extension == EXTENSION_REPEAT) { + x = svm_image_texture_wrap_periodic(x, info->width); + y = svm_image_texture_wrap_periodic(y, info->height); + } + else { + x = svm_image_texture_wrap_clamp(x, info->width); + y = svm_image_texture_wrap_clamp(y, info->height); + } + + int offset = x + info->width * y; + return svm_image_texture_read(kg, info, id, offset); } ccl_device_inline float4 svm_image_texture_read_3d(KernelGlobals *kg, int id, int x, int y, int z) { - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - - /* Wrap */ - if(info->extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info->width); - y = svm_image_texture_wrap_periodic(y, info->height); - z = svm_image_texture_wrap_periodic(z, info->depth); - } - else { - x = svm_image_texture_wrap_clamp(x, info->width); - y = svm_image_texture_wrap_clamp(y, info->height); - z = svm_image_texture_wrap_clamp(z, info->depth); - } - - int offset = x + info->width * y + info->width * info->height * z; - return svm_image_texture_read(kg, info, id, offset); + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + /* Wrap */ + if (info->extension == EXTENSION_REPEAT) { + x = svm_image_texture_wrap_periodic(x, info->width); + y = svm_image_texture_wrap_periodic(y, info->height); + z = svm_image_texture_wrap_periodic(z, info->depth); + } + else { + x = svm_image_texture_wrap_clamp(x, info->width); + y = svm_image_texture_wrap_clamp(y, info->height); + z = svm_image_texture_wrap_clamp(z, info->depth); + } + + int offset = x + info->width * y + info->width * info->height * z; + return svm_image_texture_read(kg, info, id, offset); } - ccl_device_inline float svm_image_texture_frac(float x, int *ix) { - int i = float_to_int(x) - ((x < 0.0f)? 1: 0); - *ix = i; - return x - (float)i; + int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); + *ix = i; + return x - (float)i; } #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ - u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ - u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } (void) 0 + { \ + u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ + u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \ + u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } \ + (void)0 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) { - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - - if(info->extension == EXTENSION_CLIP) { - if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - - if(info->interpolation == INTERPOLATION_CLOSEST) { - /* Closest interpolation. */ - int ix, iy; - svm_image_texture_frac(x*info->width, &ix); - svm_image_texture_frac(y*info->height, &iy); - - return svm_image_texture_read_2d(kg, id, ix, iy); - } - else if(info->interpolation == INTERPOLATION_LINEAR) { - /* Bilinear interpolation. */ - int ix, iy; - float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy); - - float4 r; - r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy); - r += (1.0f - ty)*tx*svm_image_texture_read_2d(kg, id, ix+1, iy); - r += ty*(1.0f - tx)*svm_image_texture_read_2d(kg, id, ix, iy+1); - r += ty*tx*svm_image_texture_read_2d(kg, id, ix+1, iy+1); - return r; - } - else { - /* Bicubic interpolation. */ - int ix, iy; - float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy); - - float u[4], v[4]; - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - for(int y = 0; y < 4; y++) { - for(int x = 0; x < 4; x++) { - float weight = u[x]*v[y]; - r += weight*svm_image_texture_read_2d(kg, id, ix+x-1, iy+y-1); - } - } - return r; - } + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + if (info->extension == EXTENSION_CLIP) { + if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + + if (info->interpolation == INTERPOLATION_CLOSEST) { + /* Closest interpolation. */ + int ix, iy; + svm_image_texture_frac(x * info->width, &ix); + svm_image_texture_frac(y * info->height, &iy); + + return svm_image_texture_read_2d(kg, id, ix, iy); + } + else if (info->interpolation == INTERPOLATION_LINEAR) { + /* Bilinear interpolation. */ + int ix, iy; + float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); + float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); + + float4 r; + r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy); + r += (1.0f - ty) * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy); + r += ty * (1.0f - tx) * svm_image_texture_read_2d(kg, id, ix, iy + 1); + r += ty * tx * svm_image_texture_read_2d(kg, id, ix + 1, iy + 1); + return r; + } + else { + /* Bicubic interpolation. */ + int ix, iy; + float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); + float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); + + float u[4], v[4]; + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + + float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + float weight = u[x] * v[y]; + r += weight * svm_image_texture_read_2d(kg, id, ix + x - 1, iy + y - 1); + } + } + return r; + } } - -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp) +ccl_device float4 +kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp) { - const ccl_global TextureInfo *info = kernel_tex_info(kg, id); - - if(info->extension == EXTENSION_CLIP) { - if(x < 0.0f || y < 0.0f || z < 0.0f || - x > 1.0f || y > 1.0f || z > 1.0f) - { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - - uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp; - - if(interpolation == INTERPOLATION_CLOSEST) { - /* Closest interpolation. */ - int ix, iy, iz; - svm_image_texture_frac(x*info->width, &ix); - svm_image_texture_frac(y*info->height, &iy); - svm_image_texture_frac(z*info->depth, &iz); - - return svm_image_texture_read_3d(kg, id, ix, iy, iz); - } - else if(interpolation == INTERPOLATION_LINEAR) { - /* Bilinear interpolation. */ - int ix, iy, iz; - float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy); - float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz); - - float4 r; - r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz); - r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz); - r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz); - r += (1.0f - tz)*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz); - - r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy, iz+1); - r += tz*(1.0f - ty)*tx*svm_image_texture_read_3d(kg, id, ix+1, iy, iz+1); - r += tz*ty*(1.0f - tx)*svm_image_texture_read_3d(kg, id, ix, iy+1, iz+1); - r += tz*ty*tx*svm_image_texture_read_3d(kg, id, ix+1, iy+1, iz+1); - return r; - } - else { - /* Bicubic interpolation. */ - int ix, iy, iz; - float tx = svm_image_texture_frac(x*info->width - 0.5f, &ix); - float ty = svm_image_texture_frac(y*info->height - 0.5f, &iy); - float tz = svm_image_texture_frac(z*info->depth - 0.5f, &iz); - - float u[4], v[4], w[4]; - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - for(int z = 0; z < 4; z++) { - for(int y = 0; y < 4; y++) { - for(int x = 0; x < 4; x++) { - float weight = u[x]*v[y]*w[z]; - r += weight*svm_image_texture_read_3d(kg, id, ix+x-1, iy+y-1, iz+z-1); - } - } - } - return r; - } + const ccl_global TextureInfo *info = kernel_tex_info(kg, id); + + if (info->extension == EXTENSION_CLIP) { + if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) { + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + + uint interpolation = (interp == INTERPOLATION_NONE) ? info->interpolation : interp; + + if (interpolation == INTERPOLATION_CLOSEST) { + /* Closest interpolation. */ + int ix, iy, iz; + svm_image_texture_frac(x * info->width, &ix); + svm_image_texture_frac(y * info->height, &iy); + svm_image_texture_frac(z * info->depth, &iz); + + return svm_image_texture_read_3d(kg, id, ix, iy, iz); + } + else if (interpolation == INTERPOLATION_LINEAR) { + /* Bilinear interpolation. */ + int ix, iy, iz; + float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); + float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); + float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz); + + float4 r; + r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz); + r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz); + r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz); + r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz); + + r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy, iz + 1); + r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy, iz + 1); + r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(kg, id, ix, iy + 1, iz + 1); + r += tz * ty * tx * svm_image_texture_read_3d(kg, id, ix + 1, iy + 1, iz + 1); + return r; + } + else { + /* Bicubic interpolation. */ + int ix, iy, iz; + float tx = svm_image_texture_frac(x * info->width - 0.5f, &ix); + float ty = svm_image_texture_frac(y * info->height - 0.5f, &iy); + float tz = svm_image_texture_frac(z * info->depth - 0.5f, &iz); + + float u[4], v[4], w[4]; + SET_CUBIC_SPLINE_WEIGHTS(u, tx); + SET_CUBIC_SPLINE_WEIGHTS(v, ty); + SET_CUBIC_SPLINE_WEIGHTS(w, tz); + + float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + + for (int z = 0; z < 4; z++) { + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + float weight = u[x] * v[y] * w[z]; + r += weight * svm_image_texture_read_3d(kg, id, ix + x - 1, iy + y - 1, iz + z - 1); + } + } + } + return r; + } } #undef SET_CUBIC_SPLINE_WEIGHTS diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h index 05e1ddf6da2..e123b4cd6ec 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel_split_function.h +++ b/intern/cycles/kernel/kernels/opencl/kernel_split_function.h @@ -14,50 +14,53 @@ * limitations under the License. */ -#define KERNEL_NAME_JOIN(a, b) a ## _ ## b +#define KERNEL_NAME_JOIN(a, b) a##_##b #define KERNEL_NAME_EVAL(a, b) KERNEL_NAME_JOIN(a, b) -__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)( - ccl_global char *kg_global, - ccl_constant KernelData *data, +__kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, + KERNEL_NAME)(ccl_global char *kg_global, + ccl_constant KernelData *data, - ccl_global void *split_data_buffer, - ccl_global char *ray_state, + ccl_global void *split_data_buffer, + ccl_global char *ray_state, - KERNEL_BUFFER_PARAMS, + KERNEL_BUFFER_PARAMS, - ccl_global int *queue_index, - ccl_global char *use_queues_flag, - ccl_global unsigned int *work_pools, - ccl_global float *buffer - ) + ccl_global int *queue_index, + ccl_global char *use_queues_flag, + ccl_global unsigned int *work_pools, + ccl_global float *buffer) { #ifdef LOCALS_TYPE - ccl_local LOCALS_TYPE locals; + ccl_local LOCALS_TYPE locals; #endif - KernelGlobals *kg = (KernelGlobals*)kg_global; + KernelGlobals *kg = (KernelGlobals *)kg_global; - if(ccl_local_id(0) + ccl_local_id(1) == 0) { - kg->data = data; + if (ccl_local_id(0) + ccl_local_id(1) == 0) { + kg->data = data; - kernel_split_params.queue_index = queue_index; - kernel_split_params.use_queues_flag = use_queues_flag; - kernel_split_params.work_pools = work_pools; - kernel_split_params.tile.buffer = buffer; + kernel_split_params.queue_index = queue_index; + kernel_split_params.use_queues_flag = use_queues_flag; + kernel_split_params.work_pools = work_pools; + kernel_split_params.tile.buffer = buffer; - split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state); + split_data_init(kg, + &kernel_split_state, + ccl_global_size(0) * ccl_global_size(1), + split_data_buffer, + ray_state); + } - } + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - - KERNEL_NAME_EVAL(kernel, KERNEL_NAME)( - kg + KERNEL_NAME_EVAL(kernel, KERNEL_NAME) + (kg #ifdef LOCALS_TYPE - , &locals + , + &locals #endif - ); + ); } #undef KERNEL_NAME_JOIN diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index 0a3d0b974cb..28d9ca854db 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -1,6 +1,6 @@ set(INC - ../.. + ../.. ) set(INC_SYS @@ -8,25 +8,25 @@ set(INC_SYS ) set(SRC - background.cpp - bsdf_diffuse_ramp.cpp - bsdf_phong_ramp.cpp - emissive.cpp - osl_bssrdf.cpp - osl_closures.cpp - osl_services.cpp - osl_shader.cpp + background.cpp + bsdf_diffuse_ramp.cpp + bsdf_phong_ramp.cpp + emissive.cpp + osl_bssrdf.cpp + osl_closures.cpp + osl_services.cpp + osl_shader.cpp ) set(HEADER_SRC - osl_closures.h - osl_globals.h - osl_services.h - osl_shader.h + osl_closures.h + osl_globals.h + osl_services.h + osl_shader.h ) set(LIB - cycles_render + cycles_render ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}") diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp index 6924a4144c5..b395227845d 100644 --- a/intern/cycles/kernel/osl/background.cpp +++ b/intern/cycles/kernel/osl/background.cpp @@ -51,11 +51,11 @@ using namespace OSL; /// only the weight is taking into account /// class GenericBackgroundClosure : public CClosurePrimitive { -public: - void setup(ShaderData *sd, int /* path_flag */, float3 weight) - { - background_setup(sd, weight); - } + public: + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + background_setup(sd, weight); + } }; /// Holdout closure @@ -66,31 +66,28 @@ public: /// used /// class HoldoutClosure : CClosurePrimitive { -public: - void setup(ShaderData *sd, int /* path_flag */, float3 weight) - { - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight); - sd->flag |= SD_HOLDOUT; - } + public: + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight); + sd->flag |= SD_HOLDOUT; + } }; ClosureParam *closure_background_params() { - static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"), - CLOSURE_FINISH_PARAM(GenericBackgroundClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"), + CLOSURE_FINISH_PARAM(GenericBackgroundClosure)}; + return params; } CCLOSURE_PREPARE(closure_background_prepare, GenericBackgroundClosure) ClosureParam *closure_holdout_params() { - static ClosureParam params[] = { - CLOSURE_FINISH_PARAM(HoldoutClosure) - }; - return params; + static ClosureParam params[] = {CLOSURE_FINISH_PARAM(HoldoutClosure)}; + return params; } CCLOSURE_PREPARE(closure_holdout_prepare, HoldoutClosure) diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp index ed5d5235a34..c5edc7c9be3 100644 --- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp @@ -47,36 +47,35 @@ CCL_NAMESPACE_BEGIN using namespace OSL; class DiffuseRampClosure : public CBSDFClosure { -public: - DiffuseRampBsdf params; - Color3 colors[8]; + public: + DiffuseRampBsdf params; + Color3 colors[8]; - void setup(ShaderData *sd, int /* path_flag */, float3 weight) - { - DiffuseRampBsdf *bsdf = (DiffuseRampBsdf*)bsdf_alloc_osl(sd, sizeof(DiffuseRampBsdf), weight, ¶ms); + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + DiffuseRampBsdf *bsdf = (DiffuseRampBsdf *)bsdf_alloc_osl( + sd, sizeof(DiffuseRampBsdf), weight, ¶ms); - if(bsdf) { - bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8); + if (bsdf) { + bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8); - if(bsdf->colors) { - for(int i = 0; i < 8; i++) - bsdf->colors[i] = TO_FLOAT3(colors[i]); + if (bsdf->colors) { + for (int i = 0; i < 8; i++) + bsdf->colors[i] = TO_FLOAT3(colors[i]); - sd->flag |= bsdf_diffuse_ramp_setup(bsdf); - } - } - } + sd->flag |= bsdf_diffuse_ramp_setup(bsdf); + } + } + } }; ClosureParam *closure_bsdf_diffuse_ramp_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N), - CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8), - CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"), - CLOSURE_FINISH_PARAM(DiffuseRampClosure) - }; - return params; + static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N), + CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8), + CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"), + CLOSURE_FINISH_PARAM(DiffuseRampClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_diffuse_ramp_prepare, DiffuseRampClosure) diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp index a8acdb8e342..4b7e59ff932 100644 --- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp +++ b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp @@ -46,37 +46,36 @@ CCL_NAMESPACE_BEGIN using namespace OSL; class PhongRampClosure : public CBSDFClosure { -public: - PhongRampBsdf params; - Color3 colors[8]; + public: + PhongRampBsdf params; + Color3 colors[8]; - void setup(ShaderData *sd, int /* path_flag */, float3 weight) - { - PhongRampBsdf *bsdf = (PhongRampBsdf*)bsdf_alloc_osl(sd, sizeof(PhongRampBsdf), weight, ¶ms); + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + PhongRampBsdf *bsdf = (PhongRampBsdf *)bsdf_alloc_osl( + sd, sizeof(PhongRampBsdf), weight, ¶ms); - if(bsdf) { - bsdf->colors = (float3*)closure_alloc_extra(sd, sizeof(float3)*8); + if (bsdf) { + bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8); - if(bsdf->colors) { - for(int i = 0; i < 8; i++) - bsdf->colors[i] = TO_FLOAT3(colors[i]); + if (bsdf->colors) { + for (int i = 0; i < 8; i++) + bsdf->colors[i] = TO_FLOAT3(colors[i]); - sd->flag |= bsdf_phong_ramp_setup(bsdf); - } - } - } + sd->flag |= bsdf_phong_ramp_setup(bsdf); + } + } + } }; ClosureParam *closure_bsdf_phong_ramp_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N), - CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent), - CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8), - CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"), - CLOSURE_FINISH_PARAM(PhongRampClosure) - }; - return params; + static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N), + CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent), + CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8), + CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"), + CLOSURE_FINISH_PARAM(PhongRampClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_phong_ramp_prepare, PhongRampClosure) diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp index c2a848231f2..c29ddb13e2e 100644 --- a/intern/cycles/kernel/osl/emissive.cpp +++ b/intern/cycles/kernel/osl/emissive.cpp @@ -53,20 +53,18 @@ using namespace OSL; /// if the provided angles are PI/2, which is the default /// class GenericEmissiveClosure : public CClosurePrimitive { -public: - void setup(ShaderData *sd, int /* path_flag */, float3 weight) - { - emission_setup(sd, weight); - } + public: + void setup(ShaderData *sd, int /* path_flag */, float3 weight) + { + emission_setup(sd, weight); + } }; ClosureParam *closure_emission_params() { - static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"), - CLOSURE_FINISH_PARAM(GenericEmissiveClosure) - }; - return params; + static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"), + CLOSURE_FINISH_PARAM(GenericEmissiveClosure)}; + return params; } CCLOSURE_PREPARE(closure_emission_prepare, GenericEmissiveClosure) diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 66ec8a996ca..dd52c33071c 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -56,77 +56,76 @@ static ustring u_random_walk("random_walk"); static ustring u_principled_random_walk("principled_random_walk"); class CBSSRDFClosure : public CClosurePrimitive { -public: - Bssrdf params; - ustring method; - - CBSSRDFClosure() - { - params.texture_blur = 0.0f; - params.sharpness = 0.0f; - params.roughness = 0.0f; - } - - void setup(ShaderData *sd, int path_flag, float3 weight) - { - if(method == u_cubic) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID); - } - else if(method == u_gaussian) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID); - } - else if(method == u_burley) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID); - } - else if(method == u_principled) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID); - } - else if(method == u_random_walk) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID); - } - else if(method == u_principled_random_walk) { - alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID); - } - } - - void alloc(ShaderData *sd, int path_flag, float3 weight, ClosureType type) - { - Bssrdf *bssrdf = bssrdf_alloc(sd, weight); - - if(bssrdf) { - /* disable in case of diffuse ancestor, can't see it well then and - * adds considerably noise due to probabilities of continuing path - * getting lower and lower */ - if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) { - params.radius = make_float3(0.0f, 0.0f, 0.0f); - } - - /* create one closure per color channel */ - bssrdf->radius = params.radius; - bssrdf->albedo = params.albedo; - bssrdf->texture_blur = params.texture_blur; - bssrdf->sharpness = params.sharpness; - bssrdf->N = params.N; - bssrdf->roughness = params.roughness; - sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type); - } - } + public: + Bssrdf params; + ustring method; + + CBSSRDFClosure() + { + params.texture_blur = 0.0f; + params.sharpness = 0.0f; + params.roughness = 0.0f; + } + + void setup(ShaderData *sd, int path_flag, float3 weight) + { + if (method == u_cubic) { + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID); + } + else if (method == u_gaussian) { + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID); + } + else if (method == u_burley) { + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID); + } + else if (method == u_principled) { + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID); + } + else if (method == u_random_walk) { + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID); + } + else if (method == u_principled_random_walk) { + alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID); + } + } + + void alloc(ShaderData *sd, int path_flag, float3 weight, ClosureType type) + { + Bssrdf *bssrdf = bssrdf_alloc(sd, weight); + + if (bssrdf) { + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) { + params.radius = make_float3(0.0f, 0.0f, 0.0f); + } + + /* create one closure per color channel */ + bssrdf->radius = params.radius; + bssrdf->albedo = params.albedo; + bssrdf->texture_blur = params.texture_blur; + bssrdf->sharpness = params.sharpness; + bssrdf->N = params.N; + bssrdf->roughness = params.roughness; + sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type); + } + } }; ClosureParam *closure_bssrdf_params() { - static ClosureParam params[] = { - CLOSURE_STRING_PARAM(CBSSRDFClosure, method), - CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N), - CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius), - CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo), - CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.texture_blur, "texture_blur"), - CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.sharpness, "sharpness"), - CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"), - CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"), - CLOSURE_FINISH_PARAM(CBSSRDFClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_STRING_PARAM(CBSSRDFClosure, method), + CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N), + CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius), + CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo), + CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.texture_blur, "texture_blur"), + CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.sharpness, "sharpness"), + CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"), + CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"), + CLOSURE_FINISH_PARAM(CBSSRDFClosure)}; + return params; } CCLOSURE_PREPARE(closure_bssrdf_prepare, CBSSRDFClosure) diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 169351d5ad9..aa7e2727577 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -71,706 +71,787 @@ using namespace OSL; /* BSDF class definitions */ BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N), -BSDF_CLOSURE_CLASS_END(Diffuse, diffuse) - -BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N), -BSDF_CLOSURE_CLASS_END(Translucent, translucent) - -BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N), - CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness), -BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar) - -BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR) - CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N), -BSDF_CLOSURE_CLASS_END(Reflection, reflection) - -BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR) - CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N), - CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior), -BSDF_CLOSURE_CLASS_END(Refraction, refraction) - -BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N), - CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma), -BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet) - -BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, ashikhmin_shirley_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N), - CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T), - CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y), -BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso) - -BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N), - CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size), - CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth), -BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon) - -BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N), - CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size), - CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth), -BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, microfacet_ggx, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x), -BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, microfacet_ggx_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y), -BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, microfacet_beckmann, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x), -BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, microfacet_beckmann_aniso, MicrofacetBsdf, LABEL_GLOSSY|LABEL_REFLECT) - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y), -BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT) - CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior), -BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction) - -BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction, MicrofacetBsdf, LABEL_GLOSSY|LABEL_TRANSMIT) - CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior), -BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) - -BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2), - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset), -BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection) - -BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY) - CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N), - CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1), - CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2), - CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), - CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset), -BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission) - -BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse, principled_diffuse, PrincipledDiffuseBsdf, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N), - CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness), -BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse) - -BSDF_CLOSURE_CLASS_BEGIN(PrincipledSheen, principled_sheen, PrincipledSheenBsdf, LABEL_DIFFUSE) - CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N), -BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen) - -/* PRINCIPLED HAIR BSDF */ -class PrincipledHairClosure : public CBSDFClosure { -public: - PrincipledHairBSDF params; - - PrincipledHairBSDF *alloc(ShaderData *sd, int path_flag, float3 weight) - { - PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)bsdf_alloc_osl(sd, sizeof(PrincipledHairBSDF), weight, ¶ms); - if(!bsdf) { - return NULL; - } - - PrincipledHairExtra *extra = (PrincipledHairExtra*)closure_alloc_extra(sd, sizeof(PrincipledHairExtra)); - if(!extra) { - return NULL; - } - - bsdf->extra = extra; - return bsdf; - } - - void setup(ShaderData *sd, int path_flag, float3 weight) - { - if(!skip(sd, path_flag, LABEL_GLOSSY)) { - PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0; - } - } +CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N), + BSDF_CLOSURE_CLASS_END(Diffuse, diffuse) + + BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N), + BSDF_CLOSURE_CLASS_END(Translucent, translucent) + + BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N), + CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness), + BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar) + + BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR) + CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N), + BSDF_CLOSURE_CLASS_END(Reflection, reflection) + + BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR) + CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N), + CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior), + BSDF_CLOSURE_CLASS_END(Refraction, refraction) + + BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N), + CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma), + BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet) + + BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley, + ashikhmin_shirley_aniso, + MicrofacetBsdf, + LABEL_GLOSSY | LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N), + CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T), + CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y), + BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley_aniso) + + BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N), + CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size), + CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth), + BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon) + + BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY) + CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N), + CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size), + CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth), + BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon) + + BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX, + microfacet_ggx, + MicrofacetBsdf, + LABEL_GLOSSY | LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x), + BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx) + + BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXAniso, + microfacet_ggx_aniso, + MicrofacetBsdf, + LABEL_GLOSSY | LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetGGXAnisoClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetGGXAnisoClosure, params.alpha_y), + BSDF_CLOSURE_CLASS_END(MicrofacetGGXAniso, microfacet_ggx_aniso) + + BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann, + microfacet_beckmann, + MicrofacetBsdf, + LABEL_GLOSSY | LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x), + BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann) + + BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannAniso, + microfacet_beckmann_aniso, + MicrofacetBsdf, + LABEL_GLOSSY | LABEL_REFLECT) + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannAnisoClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannAnisoClosure, params.alpha_y), + BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannAniso, microfacet_beckmann_aniso) + + BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction, + microfacet_ggx_refraction, + MicrofacetBsdf, + LABEL_GLOSSY | LABEL_TRANSMIT) + CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior), + BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction) + + BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction, + microfacet_beckmann_refraction, + MicrofacetBsdf, + LABEL_GLOSSY | LABEL_TRANSMIT) + CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior), + BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction) + + BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY) + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2), + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset), + BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection) + + BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY) + CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N), + CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1), + CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2), + CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T), + CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset), + BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission) + + BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse, + principled_diffuse, + PrincipledDiffuseBsdf, + LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N), + CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness), + BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse) + + BSDF_CLOSURE_CLASS_BEGIN(PrincipledSheen, + principled_sheen, + PrincipledSheenBsdf, + LABEL_DIFFUSE) + CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N), + BSDF_CLOSURE_CLASS_END(PrincipledSheen, principled_sheen) + + /* PRINCIPLED HAIR BSDF */ + class PrincipledHairClosure : public CBSDFClosure { + public: + PrincipledHairBSDF params; + + PrincipledHairBSDF *alloc(ShaderData *sd, int path_flag, float3 weight) + { + PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc_osl( + sd, sizeof(PrincipledHairBSDF), weight, ¶ms); + if (!bsdf) { + return NULL; + } + + PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra( + sd, sizeof(PrincipledHairExtra)); + if (!extra) { + return NULL; + } + + bsdf->extra = extra; + return bsdf; + } + + void setup(ShaderData *sd, int path_flag, float3 weight) + { + if (!skip(sd, path_flag, LABEL_GLOSSY)) { + PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0; + } + } }; static ClosureParam *closure_bsdf_principled_hair_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N), - CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha), - CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta), - CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"), - CLOSURE_FINISH_PARAM(PrincipledHairClosure) - }; - - return params; + static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N), + CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma), + CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v), + CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s), + CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness), + CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha), + CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta), + CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"), + CLOSURE_FINISH_PARAM(PrincipledHairClosure)}; + + return params; } CCLOSURE_PREPARE(closure_bsdf_principled_hair_prepare, PrincipledHairClosure) /* DISNEY PRINCIPLED CLEARCOAT */ class PrincipledClearcoatClosure : public CBSDFClosure { -public: - MicrofacetBsdf params; - float clearcoat, clearcoat_roughness; - - MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if(!bsdf) { - return NULL; - } - - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(!extra) { - return NULL; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra = extra; - bsdf->ior = 1.5f; - bsdf->alpha_x = clearcoat_roughness; - bsdf->alpha_y = clearcoat_roughness; - bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); - bsdf->extra->clearcoat = clearcoat; - return bsdf; - } - - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); - } + public: + MicrofacetBsdf params; + float clearcoat, clearcoat_roughness; + + MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( + sd, sizeof(MicrofacetBsdf), weight, ¶ms); + if (!bsdf) { + return NULL; + } + + MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (!extra) { + return NULL; + } + + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra = extra; + bsdf->ior = 1.5f; + bsdf->alpha_x = clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness; + bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); + bsdf->extra->clearcoat = clearcoat; + return bsdf; + } + + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); + } }; ClosureParam *closure_bsdf_principled_clearcoat_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N), - CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat), - CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness), - CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"), - CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N), + CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat), + CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness), + CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"), + CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_principled_clearcoat_prepare, PrincipledClearcoatClosure) - /* Registration */ -static void register_closure(OSL::ShadingSystem *ss, const char *name, int id, OSL::ClosureParam *params, OSL::PrepareClosureFunc prepare) +static void register_closure(OSL::ShadingSystem *ss, + const char *name, + int id, + OSL::ClosureParam *params, + OSL::PrepareClosureFunc prepare) { - /* optimization: it's possible to not use a prepare function at all and - * only initialize the actual class when accessing the closure component - * data, but then we need to map the id to the class somehow */ + /* optimization: it's possible to not use a prepare function at all and + * only initialize the actual class when accessing the closure component + * data, but then we need to map the id to the class somehow */ #if OSL_LIBRARY_VERSION_CODE >= 10900 - ss->register_closure(name, id, params, prepare, NULL); + ss->register_closure(name, id, params, prepare, NULL); #else - ss->register_closure(name, id, params, prepare, NULL, 16); + ss->register_closure(name, id, params, prepare, NULL, 16); #endif } void OSLShader::register_closures(OSLShadingSystem *ss_) { - OSL::ShadingSystem *ss = (OSL::ShadingSystem*)ss_; - int id = 0; - - register_closure(ss, "diffuse", id++, - bsdf_diffuse_params(), bsdf_diffuse_prepare); - register_closure(ss, "oren_nayar", id++, - bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare); - register_closure(ss, "translucent", id++, - bsdf_translucent_params(), bsdf_translucent_prepare); - register_closure(ss, "reflection", id++, - bsdf_reflection_params(), bsdf_reflection_prepare); - register_closure(ss, "refraction", id++, - bsdf_refraction_params(), bsdf_refraction_prepare); - register_closure(ss, "transparent", id++, - closure_bsdf_transparent_params(), closure_bsdf_transparent_prepare); - register_closure(ss, "microfacet_ggx", id++, - bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare); - register_closure(ss, "microfacet_ggx_aniso", id++, - bsdf_microfacet_ggx_aniso_params(), bsdf_microfacet_ggx_aniso_prepare); - register_closure(ss, "microfacet_ggx_refraction", id++, - bsdf_microfacet_ggx_refraction_params(), bsdf_microfacet_ggx_refraction_prepare); - register_closure(ss, "microfacet_multi_ggx", id++, - closure_bsdf_microfacet_multi_ggx_params(), closure_bsdf_microfacet_multi_ggx_prepare); - register_closure(ss, "microfacet_multi_ggx_glass", id++, - closure_bsdf_microfacet_multi_ggx_glass_params(), closure_bsdf_microfacet_multi_ggx_glass_prepare); - register_closure(ss, "microfacet_multi_ggx_aniso", id++, - closure_bsdf_microfacet_multi_ggx_aniso_params(), closure_bsdf_microfacet_multi_ggx_aniso_prepare); - register_closure(ss, "microfacet_ggx_fresnel", id++, - closure_bsdf_microfacet_ggx_fresnel_params(), closure_bsdf_microfacet_ggx_fresnel_prepare); - register_closure(ss, "microfacet_ggx_aniso_fresnel", id++, - closure_bsdf_microfacet_ggx_aniso_fresnel_params(), closure_bsdf_microfacet_ggx_aniso_fresnel_prepare); - register_closure(ss, "microfacet_multi_ggx_fresnel", id++, - closure_bsdf_microfacet_multi_ggx_fresnel_params(), closure_bsdf_microfacet_multi_ggx_fresnel_prepare); - register_closure(ss, "microfacet_multi_ggx_glass_fresnel", id++, - closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(), closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare); - register_closure(ss, "microfacet_multi_ggx_aniso_fresnel", id++, - closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(), closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare); - register_closure(ss, "microfacet_beckmann", id++, - bsdf_microfacet_beckmann_params(), bsdf_microfacet_beckmann_prepare); - register_closure(ss, "microfacet_beckmann_aniso", id++, - bsdf_microfacet_beckmann_aniso_params(), bsdf_microfacet_beckmann_aniso_prepare); - register_closure(ss, "microfacet_beckmann_refraction", id++, - bsdf_microfacet_beckmann_refraction_params(), bsdf_microfacet_beckmann_refraction_prepare); - register_closure(ss, "ashikhmin_shirley", id++, - bsdf_ashikhmin_shirley_aniso_params(), bsdf_ashikhmin_shirley_aniso_prepare); - register_closure(ss, "ashikhmin_velvet", id++, - bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare); - register_closure(ss, "diffuse_toon", id++, - bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare); - register_closure(ss, "glossy_toon", id++, - bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare); - register_closure(ss, "principled_diffuse", id++, - bsdf_principled_diffuse_params(), bsdf_principled_diffuse_prepare); - register_closure(ss, "principled_sheen", id++, - bsdf_principled_sheen_params(), bsdf_principled_sheen_prepare); - register_closure(ss, "principled_clearcoat", id++, - closure_bsdf_principled_clearcoat_params(), closure_bsdf_principled_clearcoat_prepare); - - register_closure(ss, "emission", id++, - closure_emission_params(), closure_emission_prepare); - register_closure(ss, "background", id++, - closure_background_params(), closure_background_prepare); - register_closure(ss, "holdout", id++, - closure_holdout_params(), closure_holdout_prepare); - register_closure(ss, "diffuse_ramp", id++, - closure_bsdf_diffuse_ramp_params(), closure_bsdf_diffuse_ramp_prepare); - register_closure(ss, "phong_ramp", id++, - closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare); - register_closure(ss, "bssrdf", id++, - closure_bssrdf_params(), closure_bssrdf_prepare); - - register_closure(ss, "hair_reflection", id++, - bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare); - register_closure(ss, "hair_transmission", id++, - bsdf_hair_transmission_params(), bsdf_hair_transmission_prepare); - - register_closure(ss, "principled_hair", id++, - closure_bsdf_principled_hair_params(), closure_bsdf_principled_hair_prepare); - - register_closure(ss, "henyey_greenstein", id++, - closure_henyey_greenstein_params(), closure_henyey_greenstein_prepare); - register_closure(ss, "absorption", id++, - closure_absorption_params(), closure_absorption_prepare); + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)ss_; + int id = 0; + + register_closure(ss, "diffuse", id++, bsdf_diffuse_params(), bsdf_diffuse_prepare); + register_closure(ss, "oren_nayar", id++, bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare); + register_closure(ss, "translucent", id++, bsdf_translucent_params(), bsdf_translucent_prepare); + register_closure(ss, "reflection", id++, bsdf_reflection_params(), bsdf_reflection_prepare); + register_closure(ss, "refraction", id++, bsdf_refraction_params(), bsdf_refraction_prepare); + register_closure(ss, + "transparent", + id++, + closure_bsdf_transparent_params(), + closure_bsdf_transparent_prepare); + register_closure( + ss, "microfacet_ggx", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare); + register_closure(ss, + "microfacet_ggx_aniso", + id++, + bsdf_microfacet_ggx_aniso_params(), + bsdf_microfacet_ggx_aniso_prepare); + register_closure(ss, + "microfacet_ggx_refraction", + id++, + bsdf_microfacet_ggx_refraction_params(), + bsdf_microfacet_ggx_refraction_prepare); + register_closure(ss, + "microfacet_multi_ggx", + id++, + closure_bsdf_microfacet_multi_ggx_params(), + closure_bsdf_microfacet_multi_ggx_prepare); + register_closure(ss, + "microfacet_multi_ggx_glass", + id++, + closure_bsdf_microfacet_multi_ggx_glass_params(), + closure_bsdf_microfacet_multi_ggx_glass_prepare); + register_closure(ss, + "microfacet_multi_ggx_aniso", + id++, + closure_bsdf_microfacet_multi_ggx_aniso_params(), + closure_bsdf_microfacet_multi_ggx_aniso_prepare); + register_closure(ss, + "microfacet_ggx_fresnel", + id++, + closure_bsdf_microfacet_ggx_fresnel_params(), + closure_bsdf_microfacet_ggx_fresnel_prepare); + register_closure(ss, + "microfacet_ggx_aniso_fresnel", + id++, + closure_bsdf_microfacet_ggx_aniso_fresnel_params(), + closure_bsdf_microfacet_ggx_aniso_fresnel_prepare); + register_closure(ss, + "microfacet_multi_ggx_fresnel", + id++, + closure_bsdf_microfacet_multi_ggx_fresnel_params(), + closure_bsdf_microfacet_multi_ggx_fresnel_prepare); + register_closure(ss, + "microfacet_multi_ggx_glass_fresnel", + id++, + closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(), + closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare); + register_closure(ss, + "microfacet_multi_ggx_aniso_fresnel", + id++, + closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(), + closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare); + register_closure(ss, + "microfacet_beckmann", + id++, + bsdf_microfacet_beckmann_params(), + bsdf_microfacet_beckmann_prepare); + register_closure(ss, + "microfacet_beckmann_aniso", + id++, + bsdf_microfacet_beckmann_aniso_params(), + bsdf_microfacet_beckmann_aniso_prepare); + register_closure(ss, + "microfacet_beckmann_refraction", + id++, + bsdf_microfacet_beckmann_refraction_params(), + bsdf_microfacet_beckmann_refraction_prepare); + register_closure(ss, + "ashikhmin_shirley", + id++, + bsdf_ashikhmin_shirley_aniso_params(), + bsdf_ashikhmin_shirley_aniso_prepare); + register_closure( + ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare); + register_closure( + ss, "diffuse_toon", id++, bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare); + register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare); + register_closure(ss, + "principled_diffuse", + id++, + bsdf_principled_diffuse_params(), + bsdf_principled_diffuse_prepare); + register_closure( + ss, "principled_sheen", id++, bsdf_principled_sheen_params(), bsdf_principled_sheen_prepare); + register_closure(ss, + "principled_clearcoat", + id++, + closure_bsdf_principled_clearcoat_params(), + closure_bsdf_principled_clearcoat_prepare); + + register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare); + register_closure( + ss, "background", id++, closure_background_params(), closure_background_prepare); + register_closure(ss, "holdout", id++, closure_holdout_params(), closure_holdout_prepare); + register_closure(ss, + "diffuse_ramp", + id++, + closure_bsdf_diffuse_ramp_params(), + closure_bsdf_diffuse_ramp_prepare); + register_closure( + ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare); + register_closure(ss, "bssrdf", id++, closure_bssrdf_params(), closure_bssrdf_prepare); + + register_closure( + ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare); + register_closure(ss, + "hair_transmission", + id++, + bsdf_hair_transmission_params(), + bsdf_hair_transmission_prepare); + + register_closure(ss, + "principled_hair", + id++, + closure_bsdf_principled_hair_params(), + closure_bsdf_principled_hair_prepare); + + register_closure(ss, + "henyey_greenstein", + id++, + closure_henyey_greenstein_params(), + closure_henyey_greenstein_prepare); + register_closure( + ss, "absorption", id++, closure_absorption_params(), closure_absorption_prepare); } /* BSDF Closure */ bool CBSDFClosure::skip(const ShaderData *sd, int path_flag, int scattering) { - /* caustic options */ - if((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { - KernelGlobals *kg = sd->osl_globals; - - if((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || - (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) - { - return true; - } - } - - return false; -} + /* caustic options */ + if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) { + KernelGlobals *kg = sd->osl_globals; + if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) || + (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) { + return true; + } + } + + return false; +} /* GGX closures with Fresnel */ class MicrofacetFresnelClosure : public CBSDFClosure { -public: - MicrofacetBsdf params; - float3 color; - float3 cspec0; - - MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) - { - /* Technically, the MultiGGX Glass closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ - if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { - return NULL; - } - - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if(!bsdf) { - return NULL; - } - - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(!extra) { - return NULL; - } - - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = cspec0; - bsdf->extra->clearcoat = 0.0f; - return bsdf; - } + public: + MicrofacetBsdf params; + float3 color; + float3 cspec0; + + MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) + { + /* Technically, the MultiGGX Glass closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return NULL; + } + + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( + sd, sizeof(MicrofacetBsdf), weight, ¶ms); + if (!bsdf) { + return NULL; + } + + MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (!extra) { + return NULL; + } + + bsdf->extra = extra; + bsdf->extra->color = color; + bsdf->extra->cspec0 = cspec0; + bsdf->extra->clearcoat = 0.0f; + return bsdf; + } }; class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure { -public: - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); - } + public: + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->alpha_y = bsdf->alpha_x; + sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); + } }; ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior), + CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color), + CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0), + CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_fresnel_prepare, MicrofacetGGXFresnelClosure); class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure { -public: - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); - } + public: + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); + } }; ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y), - CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y), + CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior), + CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color), + CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0), + CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)}; + return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare, MicrofacetGGXAnisoFresnelClosure); - +CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare, + MicrofacetGGXAnisoFresnelClosure); /* Multiscattering GGX closures */ class MicrofacetMultiClosure : public CBSDFClosure { -public: - MicrofacetBsdf params; - float3 color; - - MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) - { - /* Technically, the MultiGGX closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ - if(skip(sd, path_flag, LABEL_GLOSSY|LABEL_REFLECT)) { - return NULL; - } - - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if(!bsdf) { - return NULL; - } - - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(!extra) { - return NULL; - } - - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->clearcoat = 0.0f; - return bsdf; - } + public: + MicrofacetBsdf params; + float3 color; + + MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) + { + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return NULL; + } + + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( + sd, sizeof(MicrofacetBsdf), weight, ¶ms); + if (!bsdf) { + return NULL; + } + + MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (!extra) { + return NULL; + } + + bsdf->extra = extra; + bsdf->extra->color = color; + bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->clearcoat = 0.0f; + return bsdf; + } }; class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure { -public: - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - bsdf->ior = 0.0f; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); - } + public: + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + bsdf->ior = 0.0f; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->alpha_y = bsdf->alpha_x; + sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); + } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), + CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXClosure); class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure { -public: - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - bsdf->ior = 0.0f; - sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf); - } + public: + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + bsdf->ior = 0.0f; + sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf); + } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), + CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_prepare, MicrofacetMultiGGXAnisoClosure); class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure { -public: - MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure() {} - - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); - } + public: + MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure() + { + } + + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->alpha_y = bsdf->alpha_x; + sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); + } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color), + CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_prepare, MicrofacetMultiGGXGlassClosure); - /* Multiscattering GGX closures with Fresnel */ class MicrofacetMultiFresnelClosure : public CBSDFClosure { -public: - MicrofacetBsdf params; - float3 color; - float3 cspec0; - - MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) - { - /* Technically, the MultiGGX closure may also transmit. However, - * since this is set statically and only used for caustic flags, this - * is probably as good as it gets. */ - if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { - return NULL; - } - - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - if(!bsdf) { - return NULL; - } - - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(!extra) { - return NULL; - } - - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = cspec0; - bsdf->extra->clearcoat = 0.0f; - return bsdf; - } + public: + MicrofacetBsdf params; + float3 color; + float3 cspec0; + + MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) + { + /* Technically, the MultiGGX closure may also transmit. However, + * since this is set statically and only used for caustic flags, this + * is probably as good as it gets. */ + if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return NULL; + } + + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl( + sd, sizeof(MicrofacetBsdf), weight, ¶ms); + if (!bsdf) { + return NULL; + } + + MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (!extra) { + return NULL; + } + + bsdf->extra = extra; + bsdf->extra->color = color; + bsdf->extra->cspec0 = cspec0; + bsdf->extra->clearcoat = 0.0f; + return bsdf; + } }; class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure { -public: - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd); - } + public: + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->alpha_y = bsdf->alpha_x; + sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(bsdf, sd); + } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), + CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)}; + return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare, MicrofacetMultiGGXFresnelClosure); +CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare, + MicrofacetMultiGGXFresnelClosure); class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosure { -public: - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); - } + public: + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); + } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), + CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)}; + return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare, MicrofacetMultiGGXAnisoFresnelClosure); +CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare, + MicrofacetMultiGGXAnisoFresnelClosure); class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosure { -public: - MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure() {} - - void setup(ShaderData *sd, int path_flag, float3 weight) - { - MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); - if(!bsdf) { - return; - } - - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_y = bsdf->alpha_x; - sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); - } + public: + MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure() + { + } + + void setup(ShaderData *sd, int path_flag, float3 weight) + { + MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight); + if (!bsdf) { + return; + } + + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->alpha_y = bsdf->alpha_x; + sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); + } }; ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), - CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), - CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), - CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), - CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x), + CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color), + CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0), + CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"), + CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)}; + return params; } -CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare, MicrofacetMultiGGXGlassFresnelClosure); +CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare, + MicrofacetMultiGGXGlassFresnelClosure); /* Transparent */ class TransparentClosure : public CBSDFClosure { -public: - ShaderClosure params; - float3 unused; - - void setup(ShaderData *sd, int path_flag, float3 weight) - { - bsdf_transparent_setup(sd, weight, path_flag); - } + public: + ShaderClosure params; + float3 unused; + + void setup(ShaderData *sd, int path_flag, float3 weight) + { + bsdf_transparent_setup(sd, weight, path_flag); + } }; ClosureParam *closure_bsdf_transparent_params() { - static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"), - CLOSURE_FINISH_PARAM(TransparentClosure) - }; - return params; + static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"), + CLOSURE_FINISH_PARAM(TransparentClosure)}; + return params; } CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure) @@ -778,52 +859,49 @@ CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure) /* Volume */ class VolumeAbsorptionClosure : public CBSDFClosure { -public: - void setup(ShaderData *sd, int path_flag, float3 weight) - { - volume_extinction_setup(sd, weight); - } + public: + void setup(ShaderData *sd, int path_flag, float3 weight) + { + volume_extinction_setup(sd, weight); + } }; ClosureParam *closure_absorption_params() { - static ClosureParam params[] = { - CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"), - CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure) - }; - return params; + static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"), + CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)}; + return params; } CCLOSURE_PREPARE(closure_absorption_prepare, VolumeAbsorptionClosure) class VolumeHenyeyGreensteinClosure : public CBSDFClosure { -public: - HenyeyGreensteinVolume params; + public: + HenyeyGreensteinVolume params; - void setup(ShaderData *sd, int path_flag, float3 weight) - { - volume_extinction_setup(sd, weight); + void setup(ShaderData *sd, int path_flag, float3 weight) + { + volume_extinction_setup(sd, weight); - HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc_osl(sd, sizeof(HenyeyGreensteinVolume), weight, ¶ms); - if(!volume) { - return; - } + HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc_osl( + sd, sizeof(HenyeyGreensteinVolume), weight, ¶ms); + if (!volume) { + return; + } - sd->flag |= volume_henyey_greenstein_setup(volume); - } + sd->flag |= volume_henyey_greenstein_setup(volume); + } }; ClosureParam *closure_henyey_greenstein_params() { - static ClosureParam params[] = { - CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g), - CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"), - CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure) - }; - return params; + static ClosureParam params[] = { + CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g), + CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"), + CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)}; + return params; } CCLOSURE_PREPARE(closure_henyey_greenstein_prepare, VolumeHenyeyGreensteinClosure) - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_closures.h b/intern/cycles/kernel/osl/osl_closures.h index 2a50704b569..d3db6b71f5c 100644 --- a/intern/cycles/kernel/osl/osl_closures.h +++ b/intern/cycles/kernel/osl/osl_closures.h @@ -74,24 +74,34 @@ void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id, void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data); void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data); void closure_bsdf_microfacet_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *, int id, void *data); -void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *, int id, void *data); +void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *, + int id, + void *data); +void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *, + int id, + void *data); +void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *, + int id, + void *data); +void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *, + int id, + void *data); void closure_bsdf_principled_clearcoat_prepare(OSL::RendererServices *, int id, void *data); void closure_bsdf_principled_hair_prepare(OSL::RendererServices *, int id, void *data); -#define CCLOSURE_PREPARE(name, classname) \ -void name(RendererServices *, int id, void *data) \ -{ \ - memset(data, 0, sizeof(classname)); \ - new (data) classname(); \ -} +#define CCLOSURE_PREPARE(name, classname) \ + void name(RendererServices *, int id, void *data) \ + { \ + memset(data, 0, sizeof(classname)); \ + new (data) classname(); \ + } #define CCLOSURE_PREPARE_STATIC(name, classname) static CCLOSURE_PREPARE(name, classname) #define CLOSURE_FLOAT3_PARAM(st, fld) \ - { TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) } + { \ + TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) \ + } #define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z) #define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z) @@ -100,50 +110,50 @@ void name(RendererServices *, int id, void *data) \ /* Closure */ class CClosurePrimitive { -public: - virtual void setup(ShaderData *sd, int path_flag, float3 weight) = 0; + public: + virtual void setup(ShaderData *sd, int path_flag, float3 weight) = 0; - OSL::ustring label; + OSL::ustring label; }; /* BSDF */ class CBSDFClosure : public CClosurePrimitive { -public: - bool skip(const ShaderData *sd, int path_flag, int scattering); + public: + bool skip(const ShaderData *sd, int path_flag, int scattering); }; #define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \ \ -class Upper##Closure : public CBSDFClosure { \ -public: \ - structname params; \ - float3 unused; \ + class Upper##Closure : public CBSDFClosure { \ + public: \ + structname params; \ + float3 unused; \ \ - void setup(ShaderData *sd, int path_flag, float3 weight) \ - { \ - if(!skip(sd, path_flag, TYPE)) { \ - structname *bsdf = (structname*)bsdf_alloc_osl(sd, sizeof(structname), weight, ¶ms); \ - sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \ - } \ - } \ -}; \ + void setup(ShaderData *sd, int path_flag, float3 weight) \ + { \ + if (!skip(sd, path_flag, TYPE)) { \ + structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, ¶ms); \ + sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \ + } \ + } \ + }; \ \ -static ClosureParam *bsdf_##lower##_params() \ -{ \ - static ClosureParam params[] = { + static ClosureParam *bsdf_##lower##_params() \ + { \ + static ClosureParam params[] = { /* parameters */ #define BSDF_CLOSURE_CLASS_END(Upper, lower) \ - CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), \ - CLOSURE_FINISH_PARAM(Upper##Closure) \ - }; \ - return params; \ -} \ + CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \ + } \ + ; \ + return params; \ + } \ \ -CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure) + CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure) CCL_NAMESPACE_END -#endif /* __OSL_CLOSURES_H__ */ +#endif /* __OSL_CLOSURES_H__ */ diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h index 88192fbcccb..641c9967586 100644 --- a/intern/cycles/kernel/osl/osl_globals.h +++ b/intern/cycles/kernel/osl/osl_globals.h @@ -19,79 +19,79 @@ #ifdef WITH_OSL -#include <OSL/oslexec.h> +# include <OSL/oslexec.h> -#include "util/util_map.h" -#include "util/util_param.h" -#include "util/util_thread.h" -#include "util/util_vector.h" +# include "util/util_map.h" +# include "util/util_param.h" +# include "util/util_thread.h" +# include "util/util_vector.h" -#ifndef WIN32 +# ifndef WIN32 using std::isfinite; -#endif +# endif CCL_NAMESPACE_BEGIN class OSLRenderServices; struct OSLGlobals { - OSLGlobals() - { - ss = NULL; - ts = NULL; - services = NULL; - use = false; - } - - bool use; - - /* shading system */ - OSL::ShadingSystem *ss; - OSL::TextureSystem *ts; - OSLRenderServices *services; - - /* shader states */ - vector<OSL::ShaderGroupRef> surface_state; - vector<OSL::ShaderGroupRef> volume_state; - vector<OSL::ShaderGroupRef> displacement_state; - vector<OSL::ShaderGroupRef> bump_state; - OSL::ShaderGroupRef background_state; - - /* attributes */ - struct Attribute { - TypeDesc type; - AttributeDescriptor desc; - ParamValue value; - }; - - typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap; - typedef unordered_map<ustring, int, ustringHash> ObjectNameMap; - - vector<AttributeMap> attribute_map; - ObjectNameMap object_name_map; - vector<ustring> object_names; + OSLGlobals() + { + ss = NULL; + ts = NULL; + services = NULL; + use = false; + } + + bool use; + + /* shading system */ + OSL::ShadingSystem *ss; + OSL::TextureSystem *ts; + OSLRenderServices *services; + + /* shader states */ + vector<OSL::ShaderGroupRef> surface_state; + vector<OSL::ShaderGroupRef> volume_state; + vector<OSL::ShaderGroupRef> displacement_state; + vector<OSL::ShaderGroupRef> bump_state; + OSL::ShaderGroupRef background_state; + + /* attributes */ + struct Attribute { + TypeDesc type; + AttributeDescriptor desc; + ParamValue value; + }; + + typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap; + typedef unordered_map<ustring, int, ustringHash> ObjectNameMap; + + vector<AttributeMap> attribute_map; + ObjectNameMap object_name_map; + vector<ustring> object_names; }; /* trace() call result */ struct OSLTraceData { - Ray ray; - Intersection isect; - ShaderData sd; - bool setup; - bool init; + Ray ray; + Intersection isect; + ShaderData sd; + bool setup; + bool init; }; /* thread key for thread specific data lookup */ struct OSLThreadData { - OSL::ShaderGlobals globals; - OSL::PerThreadInfo *osl_thread_info; - OSLTraceData tracedata; - OSL::ShadingContext *context; - OIIO::TextureSystem::Perthread *oiio_thread_info; + OSL::ShaderGlobals globals; + OSL::PerThreadInfo *osl_thread_info; + OSLTraceData tracedata; + OSL::ShadingContext *context; + OIIO::TextureSystem::Perthread *oiio_thread_info; }; CCL_NAMESPACE_END #endif -#endif /* __OSL_GLOBALS_H__ */ +#endif /* __OSL_GLOBALS_H__ */ diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 6464d382634..eb9f672fd8a 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -63,16 +63,16 @@ CCL_NAMESPACE_BEGIN /* RenderServices implementation */ -static void copy_matrix(OSL::Matrix44& m, const Transform& tfm) +static void copy_matrix(OSL::Matrix44 &m, const Transform &tfm) { - ProjectionTransform t = projection_transpose(ProjectionTransform(tfm)); - memcpy((void *)&m, &t, sizeof(m)); + ProjectionTransform t = projection_transpose(ProjectionTransform(tfm)); + memcpy((void *)&m, &t, sizeof(m)); } -static void copy_matrix(OSL::Matrix44& m, const ProjectionTransform& tfm) +static void copy_matrix(OSL::Matrix44 &m, const ProjectionTransform &tfm) { - ProjectionTransform t = projection_transpose(tfm); - memcpy((void *)&m, &t, sizeof(m)); + ProjectionTransform t = projection_transpose(tfm); + memcpy((void *)&m, &t, sizeof(m)); } /* static ustrings */ @@ -129,815 +129,846 @@ ustring OSLRenderServices::u_at_ao("@ao"); OSLRenderServices::OSLRenderServices() { - kernel_globals = NULL; - osl_ts = NULL; + kernel_globals = NULL; + osl_ts = NULL; #ifdef WITH_PTEX - size_t maxmem = 16384 * 1024; - ptex_cache = PtexCache::create(0, maxmem); + size_t maxmem = 16384 * 1024; + ptex_cache = PtexCache::create(0, maxmem); #endif } OSLRenderServices::~OSLRenderServices() { - if(osl_ts) { - VLOG(2) << "OSL texture system stats:\n" - << osl_ts->getstats(); - } + if (osl_ts) { + VLOG(2) << "OSL texture system stats:\n" << osl_ts->getstats(); + } #ifdef WITH_PTEX - ptex_cache->release(); + ptex_cache->release(); #endif } void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::TextureSystem *osl_ts_) { - kernel_globals = kernel_globals_; - osl_ts = osl_ts_; + kernel_globals = kernel_globals_; + osl_ts = osl_ts_; } -bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) +bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform, + float time) { - /* this is only used for shader and object space, we don't really have - * a concept of shader space, so we just use object space for both. */ - if(xform) { - const ShaderData *sd = (const ShaderData *)xform; - KernelGlobals *kg = sd->osl_globals; - int object = sd->object; - - if(object != OBJECT_NONE) { + /* this is only used for shader and object space, we don't really have + * a concept of shader space, so we just use object space for both. */ + if (xform) { + const ShaderData *sd = (const ShaderData *)xform; + KernelGlobals *kg = sd->osl_globals; + int object = sd->object; + + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm; + Transform tfm; - if(time == sd->time) - tfm = sd->ob_tfm; - else - tfm = object_fetch_transform_motion_test(kg, object, time, NULL); + if (time == sd->time) + tfm = sd->ob_tfm; + else + tfm = object_fetch_transform_motion_test(kg, object, time, NULL); #else - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); #endif - copy_matrix(result, tfm); + copy_matrix(result, tfm); - return true; - } - else if(sd->type == PRIMITIVE_LAMP) { - copy_matrix(result, sd->ob_tfm); + return true; + } + else if (sd->type == PRIMITIVE_LAMP) { + copy_matrix(result, sd->ob_tfm); - return true; - } - } + return true; + } + } - return false; + return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform, + float time) { - /* this is only used for shader and object space, we don't really have - * a concept of shader space, so we just use object space for both. */ - if(xform) { - const ShaderData *sd = (const ShaderData *)xform; - KernelGlobals *kg = sd->osl_globals; - int object = sd->object; - - if(object != OBJECT_NONE) { + /* this is only used for shader and object space, we don't really have + * a concept of shader space, so we just use object space for both. */ + if (xform) { + const ShaderData *sd = (const ShaderData *)xform; + KernelGlobals *kg = sd->osl_globals; + int object = sd->object; + + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform itfm; + Transform itfm; - if(time == sd->time) - itfm = sd->ob_itfm; - else - object_fetch_transform_motion_test(kg, object, time, &itfm); + if (time == sd->time) + itfm = sd->ob_itfm; + else + object_fetch_transform_motion_test(kg, object, time, &itfm); #else - Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); #endif - copy_matrix(result, itfm); + copy_matrix(result, itfm); - return true; - } - else if(sd->type == PRIMITIVE_LAMP) { - copy_matrix(result, sd->ob_itfm); + return true; + } + else if (sd->type == PRIMITIVE_LAMP) { + copy_matrix(result, sd->ob_itfm); - return true; - } - } + return true; + } + } - return false; + return false; } -bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time) +bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + ustring from, + float time) { - KernelGlobals *kg = kernel_globals; - - if(from == u_ndc) { - copy_matrix(result, kernel_data.cam.ndctoworld); - return true; - } - else if(from == u_raster) { - copy_matrix(result, kernel_data.cam.rastertoworld); - return true; - } - else if(from == u_screen) { - copy_matrix(result, kernel_data.cam.screentoworld); - return true; - } - else if(from == u_camera) { - copy_matrix(result, kernel_data.cam.cameratoworld); - return true; - } - else if(from == u_world) { - result.makeIdentity(); - return true; - } - - return false; + KernelGlobals *kg = kernel_globals; + + if (from == u_ndc) { + copy_matrix(result, kernel_data.cam.ndctoworld); + return true; + } + else if (from == u_raster) { + copy_matrix(result, kernel_data.cam.rastertoworld); + return true; + } + else if (from == u_screen) { + copy_matrix(result, kernel_data.cam.screentoworld); + return true; + } + else if (from == u_camera) { + copy_matrix(result, kernel_data.cam.cameratoworld); + return true; + } + else if (from == u_world) { + result.makeIdentity(); + return true; + } + + return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + ustring to, + float time) { - KernelGlobals *kg = kernel_globals; - - if(to == u_ndc) { - copy_matrix(result, kernel_data.cam.worldtondc); - return true; - } - else if(to == u_raster) { - copy_matrix(result, kernel_data.cam.worldtoraster); - return true; - } - else if(to == u_screen) { - copy_matrix(result, kernel_data.cam.worldtoscreen); - return true; - } - else if(to == u_camera) { - copy_matrix(result, kernel_data.cam.worldtocamera); - return true; - } - else if(to == u_world) { - result.makeIdentity(); - return true; - } - - return false; + KernelGlobals *kg = kernel_globals; + + if (to == u_ndc) { + copy_matrix(result, kernel_data.cam.worldtondc); + return true; + } + else if (to == u_raster) { + copy_matrix(result, kernel_data.cam.worldtoraster); + return true; + } + else if (to == u_screen) { + copy_matrix(result, kernel_data.cam.worldtoscreen); + return true; + } + else if (to == u_camera) { + copy_matrix(result, kernel_data.cam.worldtocamera); + return true; + } + else if (to == u_world) { + result.makeIdentity(); + return true; + } + + return false; } -bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) +bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform) { - /* this is only used for shader and object space, we don't really have - * a concept of shader space, so we just use object space for both. */ - if(xform) { - const ShaderData *sd = (const ShaderData *)xform; - int object = sd->object; + /* this is only used for shader and object space, we don't really have + * a concept of shader space, so we just use object space for both. */ + if (xform) { + const ShaderData *sd = (const ShaderData *)xform; + int object = sd->object; - if(object != OBJECT_NONE) { + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; + Transform tfm = sd->ob_tfm; #else - KernelGlobals *kg = sd->osl_globals; - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + KernelGlobals *kg = sd->osl_globals; + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); #endif - copy_matrix(result, tfm); + copy_matrix(result, tfm); - return true; - } - else if(sd->type == PRIMITIVE_LAMP) { - copy_matrix(result, sd->ob_tfm); + return true; + } + else if (sd->type == PRIMITIVE_LAMP) { + copy_matrix(result, sd->ob_tfm); - return true; - } - } + return true; + } + } - return false; + return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform) { - /* this is only used for shader and object space, we don't really have - * a concept of shader space, so we just use object space for both. */ - if(xform) { - const ShaderData *sd = (const ShaderData *)xform; - int object = sd->object; + /* this is only used for shader and object space, we don't really have + * a concept of shader space, so we just use object space for both. */ + if (xform) { + const ShaderData *sd = (const ShaderData *)xform; + int object = sd->object; - if(object != OBJECT_NONE) { + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; + Transform tfm = sd->ob_itfm; #else - KernelGlobals *kg = sd->osl_globals; - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + KernelGlobals *kg = sd->osl_globals; + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); #endif - copy_matrix(result, tfm); + copy_matrix(result, tfm); - return true; - } - else if(sd->type == PRIMITIVE_LAMP) { - copy_matrix(result, sd->ob_itfm); + return true; + } + else if (sd->type == PRIMITIVE_LAMP) { + copy_matrix(result, sd->ob_itfm); - return true; - } - } + return true; + } + } - return false; + return false; } bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) { - KernelGlobals *kg = kernel_globals; - - if(from == u_ndc) { - copy_matrix(result, kernel_data.cam.ndctoworld); - return true; - } - else if(from == u_raster) { - copy_matrix(result, kernel_data.cam.rastertoworld); - return true; - } - else if(from == u_screen) { - copy_matrix(result, kernel_data.cam.screentoworld); - return true; - } - else if(from == u_camera) { - copy_matrix(result, kernel_data.cam.cameratoworld); - return true; - } - - return false; + KernelGlobals *kg = kernel_globals; + + if (from == u_ndc) { + copy_matrix(result, kernel_data.cam.ndctoworld); + return true; + } + else if (from == u_raster) { + copy_matrix(result, kernel_data.cam.rastertoworld); + return true; + } + else if (from == u_screen) { + copy_matrix(result, kernel_data.cam.screentoworld); + return true; + } + else if (from == u_camera) { + copy_matrix(result, kernel_data.cam.cameratoworld); + return true; + } + + return false; } -bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to) +bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + ustring to) { - KernelGlobals *kg = kernel_globals; - - if(to == u_ndc) { - copy_matrix(result, kernel_data.cam.worldtondc); - return true; - } - else if(to == u_raster) { - copy_matrix(result, kernel_data.cam.worldtoraster); - return true; - } - else if(to == u_screen) { - copy_matrix(result, kernel_data.cam.worldtoscreen); - return true; - } - else if(to == u_camera) { - copy_matrix(result, kernel_data.cam.worldtocamera); - return true; - } - - return false; + KernelGlobals *kg = kernel_globals; + + if (to == u_ndc) { + copy_matrix(result, kernel_data.cam.worldtondc); + return true; + } + else if (to == u_raster) { + copy_matrix(result, kernel_data.cam.worldtoraster); + return true; + } + else if (to == u_screen) { + copy_matrix(result, kernel_data.cam.worldtoscreen); + return true; + } + else if (to == u_camera) { + copy_matrix(result, kernel_data.cam.worldtocamera); + return true; + } + + return false; } -bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives, - ustring object, TypeDesc type, ustring name, - int index, void *val) +bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, + bool derivatives, + ustring object, + TypeDesc type, + ustring name, + int index, + void *val) { - return false; + return false; } static bool set_attribute_float2(float2 f[3], TypeDesc type, bool derivatives, void *val) { - if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || - type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) - { - float *fval = (float *)val; - - fval[0] = f[0].x; - fval[1] = f[0].y; - fval[2] = 0.0f; - - if(derivatives) { - fval[3] = f[1].x; - fval[4] = f[1].y; - fval[5] = 0.0f; - - fval[6] = f[2].x; - fval[7] = f[2].y; - fval[8] = 0.0f; - } - - return true; - } - else if(type == TypeDesc::TypeFloat) { - float *fval = (float *)val; - fval[0] = average(f[0]); - - if(derivatives) { - fval[1] = average(f[1]); - fval[2] = average(f[2]); - } - - return true; - } - - return false; + if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || + type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) { + float *fval = (float *)val; + + fval[0] = f[0].x; + fval[1] = f[0].y; + fval[2] = 0.0f; + + if (derivatives) { + fval[3] = f[1].x; + fval[4] = f[1].y; + fval[5] = 0.0f; + + fval[6] = f[2].x; + fval[7] = f[2].y; + fval[8] = 0.0f; + } + + return true; + } + else if (type == TypeDesc::TypeFloat) { + float *fval = (float *)val; + fval[0] = average(f[0]); + + if (derivatives) { + fval[1] = average(f[1]); + fval[2] = average(f[2]); + } + + return true; + } + + return false; } static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val) { - if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || - type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) - { - float *fval = (float *)val; - - fval[0] = f[0].x; - fval[1] = f[0].y; - fval[2] = f[0].z; - - if(derivatives) { - fval[3] = f[1].x; - fval[4] = f[1].y; - fval[5] = f[1].z; - - fval[6] = f[2].x; - fval[7] = f[2].y; - fval[8] = f[2].z; - } - - return true; - } - else if(type == TypeDesc::TypeFloat) { - float *fval = (float *)val; - fval[0] = average(f[0]); - - if(derivatives) { - fval[1] = average(f[1]); - fval[2] = average(f[2]); - } - - return true; - } - - return false; + if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || + type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) { + float *fval = (float *)val; + + fval[0] = f[0].x; + fval[1] = f[0].y; + fval[2] = f[0].z; + + if (derivatives) { + fval[3] = f[1].x; + fval[4] = f[1].y; + fval[5] = f[1].z; + + fval[6] = f[2].x; + fval[7] = f[2].y; + fval[8] = f[2].z; + } + + return true; + } + else if (type == TypeDesc::TypeFloat) { + float *fval = (float *)val; + fval[0] = average(f[0]); + + if (derivatives) { + fval[1] = average(f[1]); + fval[2] = average(f[2]); + } + + return true; + } + + return false; } static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void *val) { - float3 fv[3]; + float3 fv[3]; - fv[0] = f; - fv[1] = make_float3(0.0f, 0.0f, 0.0f); - fv[2] = make_float3(0.0f, 0.0f, 0.0f); + fv[0] = f; + fv[1] = make_float3(0.0f, 0.0f, 0.0f); + fv[2] = make_float3(0.0f, 0.0f, 0.0f); - return set_attribute_float3(fv, type, derivatives, val); + return set_attribute_float3(fv, type, derivatives, val); } static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val) { - if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || - type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) - { - float *fval = (float *)val; - fval[0] = f[0]; - fval[1] = f[1]; - fval[2] = f[2]; - - if(derivatives) { - fval[3] = f[1]; - fval[4] = f[1]; - fval[5] = f[1]; - - fval[6] = f[2]; - fval[7] = f[2]; - fval[8] = f[2]; - } - - return true; - } - else if(type == TypeDesc::TypeFloat) { - float *fval = (float *)val; - fval[0] = f[0]; - - if(derivatives) { - fval[1] = f[1]; - fval[2] = f[2]; - } - - return true; - } - - return false; + if (type == TypeDesc::TypePoint || type == TypeDesc::TypeVector || + type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor) { + float *fval = (float *)val; + fval[0] = f[0]; + fval[1] = f[1]; + fval[2] = f[2]; + + if (derivatives) { + fval[3] = f[1]; + fval[4] = f[1]; + fval[5] = f[1]; + + fval[6] = f[2]; + fval[7] = f[2]; + fval[8] = f[2]; + } + + return true; + } + else if (type == TypeDesc::TypeFloat) { + float *fval = (float *)val; + fval[0] = f[0]; + + if (derivatives) { + fval[1] = f[1]; + fval[2] = f[2]; + } + + return true; + } + + return false; } static bool set_attribute_float(float f, TypeDesc type, bool derivatives, void *val) { - float fv[3]; + float fv[3]; - fv[0] = f; - fv[1] = 0.0f; - fv[2] = 0.0f; + fv[0] = f; + fv[1] = 0.0f; + fv[2] = 0.0f; - return set_attribute_float(fv, type, derivatives, val); + return set_attribute_float(fv, type, derivatives, val); } static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val) { - if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) { - int *ival = (int *)val; - ival[0] = i; + if (type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) { + int *ival = (int *)val; + ival[0] = i; - if(derivatives) { - ival[1] = 0; - ival[2] = 0; - } + if (derivatives) { + ival[1] = 0; + ival[2] = 0; + } - return true; - } + return true; + } - return false; + return false; } static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val) { - if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) { - ustring *sval = (ustring *)val; - sval[0] = str; + if (type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && + type.arraylen == 0) { + ustring *sval = (ustring *)val; + sval[0] = str; - if(derivatives) { - sval[1] = OSLRenderServices::u_empty; - sval[2] = OSLRenderServices::u_empty; - } + if (derivatives) { + sval[1] = OSLRenderServices::u_empty; + sval[2] = OSLRenderServices::u_empty; + } - return true; - } + return true; + } - return false; + return false; } static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives, void *val) { - if(type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) { - float *fval = (float *)val; + if (type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) { + float *fval = (float *)val; - fval[0] = P[0].x; - fval[1] = P[0].y; - fval[2] = P[0].z; + fval[0] = P[0].x; + fval[1] = P[0].y; + fval[2] = P[0].z; - fval[3] = P[1].x; - fval[4] = P[1].y; - fval[5] = P[1].z; + fval[3] = P[1].x; + fval[4] = P[1].y; + fval[5] = P[1].z; - fval[6] = P[2].x; - fval[7] = P[2].y; - fval[8] = P[2].z; + fval[6] = P[2].x; + fval[7] = P[2].y; + fval[8] = P[2].z; - if(type.arraylen > 3) - memset(fval + 3*3, 0, sizeof(float)*3*(type.arraylen - 3)); - if(derivatives) - memset(fval + type.arraylen*3, 0, sizeof(float)*2*3*type.arraylen); + if (type.arraylen > 3) + memset(fval + 3 * 3, 0, sizeof(float) * 3 * (type.arraylen - 3)); + if (derivatives) + memset(fval + type.arraylen * 3, 0, sizeof(float) * 2 * 3 * type.arraylen); - return true; - } + return true; + } - return false; + return false; } -static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val) +static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val) { - if(type == TypeDesc::TypeMatrix) { - copy_matrix(*(OSL::Matrix44*)val, tfm); - return true; - } + if (type == TypeDesc::TypeMatrix) { + copy_matrix(*(OSL::Matrix44 *)val, tfm); + return true; + } - return false; + return false; } -static bool get_primitive_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr, - const TypeDesc& type, bool derivatives, void *val) +static bool get_primitive_attribute(KernelGlobals *kg, + const ShaderData *sd, + const OSLGlobals::Attribute &attr, + const TypeDesc &type, + bool derivatives, + void *val) { - if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || - attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) - { - float3 fval[3]; - fval[0] = primitive_attribute_float3(kg, sd, attr.desc, - (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); - return set_attribute_float3(fval, type, derivatives, val); - } - else if(attr.type == TypeFloat2) { - float2 fval[2]; - fval[0] = primitive_attribute_float2(kg, sd, attr.desc, - (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); - return set_attribute_float2(fval, type, derivatives, val); - } - else if(attr.type == TypeDesc::TypeFloat) { - float fval[3]; - fval[0] = primitive_attribute_float(kg, sd, attr.desc, - (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); - return set_attribute_float(fval, type, derivatives, val); - } - else { - return false; - } + if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || + attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { + float3 fval[3]; + fval[0] = primitive_attribute_float3( + kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + return set_attribute_float3(fval, type, derivatives, val); + } + else if (attr.type == TypeFloat2) { + float2 fval[2]; + fval[0] = primitive_attribute_float2( + kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + return set_attribute_float2(fval, type, derivatives, val); + } + else if (attr.type == TypeDesc::TypeFloat) { + float fval[3]; + fval[0] = primitive_attribute_float( + kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); + return set_attribute_float(fval, type, derivatives, val); + } + else { + return false; + } } -static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr, - const TypeDesc& type, bool derivatives, void *val) +static bool get_mesh_attribute(KernelGlobals *kg, + const ShaderData *sd, + const OSLGlobals::Attribute &attr, + const TypeDesc &type, + bool derivatives, + void *val) { - if(attr.type == TypeDesc::TypeMatrix) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc); - return set_attribute_matrix(tfm, type, val); - } - else { - return false; - } + if (attr.type == TypeDesc::TypeMatrix) { + Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc); + return set_attribute_matrix(tfm, type, val); + } + else { + return false; + } } -static void get_object_attribute(const OSLGlobals::Attribute& attr, bool derivatives, void *val) +static void get_object_attribute(const OSLGlobals::Attribute &attr, bool derivatives, void *val) { - size_t datasize = attr.value.datasize(); + size_t datasize = attr.value.datasize(); - memcpy(val, attr.value.data(), datasize); - if(derivatives) - memset((char *)val + datasize, 0, datasize * 2); + memcpy(val, attr.value.data(), datasize); + if (derivatives) + memset((char *)val + datasize, 0, datasize * 2); } -bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, - TypeDesc type, bool derivatives, void *val) +bool OSLRenderServices::get_object_standard_attribute( + KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val) { - /* todo: turn this into hash table? */ - - /* Object Attributes */ - if(name == u_object_location) { - float3 f = object_location(kg, sd); - return set_attribute_float3(f, type, derivatives, val); - } - else if(name == u_object_index) { - float f = object_pass_id(kg, sd->object); - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_geom_dupli_generated) { - float3 f = object_dupli_generated(kg, sd->object); - return set_attribute_float3(f, type, derivatives, val); - } - else if(name == u_geom_dupli_uv) { - float3 f = object_dupli_uv(kg, sd->object); - return set_attribute_float3(f, type, derivatives, val); - } - else if(name == u_material_index) { - float f = shader_pass_id(kg, sd); - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_object_random) { - float f = object_random_number(kg, sd->object); - return set_attribute_float(f, type, derivatives, val); - } - - /* Particle Attributes */ - else if(name == u_particle_index) { - int particle_id = object_particle_id(kg, sd->object); - float f = particle_index(kg, particle_id); - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_particle_random) { - int particle_id = object_particle_id(kg, sd->object); - float f = hash_int_01(particle_index(kg, particle_id)); - return set_attribute_float(f, type, derivatives, val); - } - - else if(name == u_particle_age) { - int particle_id = object_particle_id(kg, sd->object); - float f = particle_age(kg, particle_id); - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_particle_lifetime) { - int particle_id = object_particle_id(kg, sd->object); - float f = particle_lifetime(kg, particle_id); - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_particle_location) { - int particle_id = object_particle_id(kg, sd->object); - float3 f = particle_location(kg, particle_id); - return set_attribute_float3(f, type, derivatives, val); - } -#if 0 /* unsupported */ - else if(name == u_particle_rotation) { - int particle_id = object_particle_id(kg, sd->object); - float4 f = particle_rotation(kg, particle_id); - return set_attribute_float4(f, type, derivatives, val); - } + /* todo: turn this into hash table? */ + + /* Object Attributes */ + if (name == u_object_location) { + float3 f = object_location(kg, sd); + return set_attribute_float3(f, type, derivatives, val); + } + else if (name == u_object_index) { + float f = object_pass_id(kg, sd->object); + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_geom_dupli_generated) { + float3 f = object_dupli_generated(kg, sd->object); + return set_attribute_float3(f, type, derivatives, val); + } + else if (name == u_geom_dupli_uv) { + float3 f = object_dupli_uv(kg, sd->object); + return set_attribute_float3(f, type, derivatives, val); + } + else if (name == u_material_index) { + float f = shader_pass_id(kg, sd); + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_object_random) { + float f = object_random_number(kg, sd->object); + return set_attribute_float(f, type, derivatives, val); + } + + /* Particle Attributes */ + else if (name == u_particle_index) { + int particle_id = object_particle_id(kg, sd->object); + float f = particle_index(kg, particle_id); + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_particle_random) { + int particle_id = object_particle_id(kg, sd->object); + float f = hash_int_01(particle_index(kg, particle_id)); + return set_attribute_float(f, type, derivatives, val); + } + + else if (name == u_particle_age) { + int particle_id = object_particle_id(kg, sd->object); + float f = particle_age(kg, particle_id); + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_particle_lifetime) { + int particle_id = object_particle_id(kg, sd->object); + float f = particle_lifetime(kg, particle_id); + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_particle_location) { + int particle_id = object_particle_id(kg, sd->object); + float3 f = particle_location(kg, particle_id); + return set_attribute_float3(f, type, derivatives, val); + } +#if 0 /* unsupported */ + else if(name == u_particle_rotation) { + int particle_id = object_particle_id(kg, sd->object); + float4 f = particle_rotation(kg, particle_id); + return set_attribute_float4(f, type, derivatives, val); + } #endif - else if(name == u_particle_size) { - int particle_id = object_particle_id(kg, sd->object); - float f = particle_size(kg, particle_id); - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_particle_velocity) { - int particle_id = object_particle_id(kg, sd->object); - float3 f = particle_velocity(kg, particle_id); - return set_attribute_float3(f, type, derivatives, val); - } - else if(name == u_particle_angular_velocity) { - int particle_id = object_particle_id(kg, sd->object); - float3 f = particle_angular_velocity(kg, particle_id); - return set_attribute_float3(f, type, derivatives, val); - } - - /* Geometry Attributes */ - else if(name == u_geom_numpolyvertices) { - return set_attribute_int(3, type, derivatives, val); - } - else if((name == u_geom_trianglevertices || name == u_geom_polyvertices) - && sd->type & PRIMITIVE_ALL_TRIANGLE) - { - float3 P[3]; - - if(sd->type & PRIMITIVE_TRIANGLE) - triangle_vertices(kg, sd->prim, P); - else - motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P); - - if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_position_transform(kg, sd, &P[0]); - object_position_transform(kg, sd, &P[1]); - object_position_transform(kg, sd, &P[2]); - } - - return set_attribute_float3_3(P, type, derivatives, val); - } - else if(name == u_geom_name) { - ustring object_name = kg->osl->object_names[sd->object]; - return set_attribute_string(object_name, type, derivatives, val); - } - else if(name == u_is_smooth) { - float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0); - return set_attribute_float(f, type, derivatives, val); - } - /* Hair Attributes */ - else if(name == u_is_curve) { - float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0; - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_curve_thickness) { - float f = curve_thickness(kg, sd); - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_curve_tangent_normal) { - float3 f = curve_tangent_normal(kg, sd); - return set_attribute_float3(f, type, derivatives, val); - } - else - return false; + else if (name == u_particle_size) { + int particle_id = object_particle_id(kg, sd->object); + float f = particle_size(kg, particle_id); + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_particle_velocity) { + int particle_id = object_particle_id(kg, sd->object); + float3 f = particle_velocity(kg, particle_id); + return set_attribute_float3(f, type, derivatives, val); + } + else if (name == u_particle_angular_velocity) { + int particle_id = object_particle_id(kg, sd->object); + float3 f = particle_angular_velocity(kg, particle_id); + return set_attribute_float3(f, type, derivatives, val); + } + + /* Geometry Attributes */ + else if (name == u_geom_numpolyvertices) { + return set_attribute_int(3, type, derivatives, val); + } + else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices) && + sd->type & PRIMITIVE_ALL_TRIANGLE) { + float3 P[3]; + + if (sd->type & PRIMITIVE_TRIANGLE) + triangle_vertices(kg, sd->prim, P); + else + motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P); + + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_position_transform(kg, sd, &P[0]); + object_position_transform(kg, sd, &P[1]); + object_position_transform(kg, sd, &P[2]); + } + + return set_attribute_float3_3(P, type, derivatives, val); + } + else if (name == u_geom_name) { + ustring object_name = kg->osl->object_names[sd->object]; + return set_attribute_string(object_name, type, derivatives, val); + } + else if (name == u_is_smooth) { + float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0); + return set_attribute_float(f, type, derivatives, val); + } + /* Hair Attributes */ + else if (name == u_is_curve) { + float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0; + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_curve_thickness) { + float f = curve_thickness(kg, sd); + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_curve_tangent_normal) { + float3 f = curve_tangent_normal(kg, sd); + return set_attribute_float3(f, type, derivatives, val); + } + else + return false; } -bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, - TypeDesc type, bool derivatives, void *val) +bool OSLRenderServices::get_background_attribute( + KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val) { - if(name == u_path_ray_length) { - /* Ray Length */ - float f = sd->ray_length; - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_path_ray_depth) { - /* Ray Depth */ - PathState *state = sd->osl_path_state; - int f = state->bounce; - return set_attribute_int(f, type, derivatives, val); - } - else if(name == u_path_diffuse_depth) { - /* Diffuse Ray Depth */ - PathState *state = sd->osl_path_state; - int f = state->diffuse_bounce; - return set_attribute_int(f, type, derivatives, val); - } - else if(name == u_path_glossy_depth) { - /* Glossy Ray Depth */ - PathState *state = sd->osl_path_state; - int f = state->glossy_bounce; - return set_attribute_int(f, type, derivatives, val); - } - else if(name == u_path_transmission_depth) { - /* Transmission Ray Depth */ - PathState *state = sd->osl_path_state; - int f = state->transmission_bounce; - return set_attribute_int(f, type, derivatives, val); - } - else if(name == u_path_transparent_depth) { - /* Transparent Ray Depth */ - PathState *state = sd->osl_path_state; - int f = state->transparent_bounce; - return set_attribute_int(f, type, derivatives, val); - } - else if(name == u_path_transmission_depth) { - /* Transmission Ray Depth */ - PathState *state = sd->osl_path_state; - int f = state->transmission_bounce; - return set_attribute_int(f, type, derivatives, val); - } - else if(name == u_ndc) { - /* NDC coordinates with special exception for otho */ - OSLThreadData *tdata = kg->osl_tdata; - OSL::ShaderGlobals *globals = &tdata->globals; - float3 ndc[3]; - - if((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { - ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P); - - if(derivatives) { - ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx) - ndc[0]; - ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy) - ndc[0]; - } - } - else { - ndc[0] = camera_world_to_ndc(kg, sd, sd->P); - - if(derivatives) { - ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0]; - ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0]; - } - } - - return set_attribute_float3(ndc, type, derivatives, val); - } - else - return false; + if (name == u_path_ray_length) { + /* Ray Length */ + float f = sd->ray_length; + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_path_ray_depth) { + /* Ray Depth */ + PathState *state = sd->osl_path_state; + int f = state->bounce; + return set_attribute_int(f, type, derivatives, val); + } + else if (name == u_path_diffuse_depth) { + /* Diffuse Ray Depth */ + PathState *state = sd->osl_path_state; + int f = state->diffuse_bounce; + return set_attribute_int(f, type, derivatives, val); + } + else if (name == u_path_glossy_depth) { + /* Glossy Ray Depth */ + PathState *state = sd->osl_path_state; + int f = state->glossy_bounce; + return set_attribute_int(f, type, derivatives, val); + } + else if (name == u_path_transmission_depth) { + /* Transmission Ray Depth */ + PathState *state = sd->osl_path_state; + int f = state->transmission_bounce; + return set_attribute_int(f, type, derivatives, val); + } + else if (name == u_path_transparent_depth) { + /* Transparent Ray Depth */ + PathState *state = sd->osl_path_state; + int f = state->transparent_bounce; + return set_attribute_int(f, type, derivatives, val); + } + else if (name == u_path_transmission_depth) { + /* Transmission Ray Depth */ + PathState *state = sd->osl_path_state; + int f = state->transmission_bounce; + return set_attribute_int(f, type, derivatives, val); + } + else if (name == u_ndc) { + /* NDC coordinates with special exception for otho */ + OSLThreadData *tdata = kg->osl_tdata; + OSL::ShaderGlobals *globals = &tdata->globals; + float3 ndc[3]; + + if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && + kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { + ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P); + + if (derivatives) { + ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx) - ndc[0]; + ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy) - ndc[0]; + } + } + else { + ndc[0] = camera_world_to_ndc(kg, sd, sd->P); + + if (derivatives) { + ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0]; + ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0]; + } + } + + return set_attribute_float3(ndc, type, derivatives, val); + } + else + return false; } -bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name, - TypeDesc type, ustring name, void *val) +bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, + bool derivatives, + ustring object_name, + TypeDesc type, + ustring name, + void *val) { - if(sg == NULL || sg->renderstate == NULL) - return false; + if (sg == NULL || sg->renderstate == NULL) + return false; - ShaderData *sd = (ShaderData *)(sg->renderstate); - return get_attribute(sd, derivatives, object_name, type, name, val); + ShaderData *sd = (ShaderData *)(sg->renderstate); + return get_attribute(sd, derivatives, object_name, type, name, val); } -bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name, - TypeDesc type, ustring name, void *val) +bool OSLRenderServices::get_attribute( + ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val) { - KernelGlobals *kg = sd->osl_globals; - int prim_type = 0; - int object; - - /* lookup of attribute on another object */ - if(object_name != u_empty) { - OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name); - - if(it == kg->osl->object_name_map.end()) - return false; - - object = it->second; - } - else { - object = sd->object; - prim_type = attribute_primitive_type(kg, sd); - - if(object == OBJECT_NONE) - return get_background_attribute(kg, sd, name, type, derivatives, val); - } - - /* find attribute on object */ - object = object*ATTR_PRIM_TYPES + prim_type; - OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object]; - OSLGlobals::AttributeMap::iterator it = attribute_map.find(name); - - if(it != attribute_map.end()) { - const OSLGlobals::Attribute& attr = it->second; - - if(attr.desc.element != ATTR_ELEMENT_OBJECT) { - /* triangle and vertex attributes */ - if(get_primitive_attribute(kg, sd, attr, type, derivatives, val)) - return true; - else - return get_mesh_attribute(kg, sd, attr, type, derivatives, val); - } - else { - /* object attribute */ - get_object_attribute(attr, derivatives, val); - return true; - } - } - else { - /* not found in attribute, check standard object info */ - bool is_std_object_attribute = get_object_standard_attribute(kg, sd, name, type, derivatives, val); - - if(is_std_object_attribute) - return true; - - return get_background_attribute(kg, sd, name, type, derivatives, val); - } - - return false; + KernelGlobals *kg = sd->osl_globals; + int prim_type = 0; + int object; + + /* lookup of attribute on another object */ + if (object_name != u_empty) { + OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name); + + if (it == kg->osl->object_name_map.end()) + return false; + + object = it->second; + } + else { + object = sd->object; + prim_type = attribute_primitive_type(kg, sd); + + if (object == OBJECT_NONE) + return get_background_attribute(kg, sd, name, type, derivatives, val); + } + + /* find attribute on object */ + object = object * ATTR_PRIM_TYPES + prim_type; + OSLGlobals::AttributeMap &attribute_map = kg->osl->attribute_map[object]; + OSLGlobals::AttributeMap::iterator it = attribute_map.find(name); + + if (it != attribute_map.end()) { + const OSLGlobals::Attribute &attr = it->second; + + if (attr.desc.element != ATTR_ELEMENT_OBJECT) { + /* triangle and vertex attributes */ + if (get_primitive_attribute(kg, sd, attr, type, derivatives, val)) + return true; + else + return get_mesh_attribute(kg, sd, attr, type, derivatives, val); + } + else { + /* object attribute */ + get_object_attribute(attr, derivatives, val); + return true; + } + } + else { + /* not found in attribute, check standard object info */ + bool is_std_object_attribute = get_object_standard_attribute( + kg, sd, name, type, derivatives, val); + + if (is_std_object_attribute) + return true; + + return get_background_attribute(kg, sd, name, type, derivatives, val); + } + + return false; } -bool OSLRenderServices::get_userdata(bool derivatives, ustring name, TypeDesc type, - OSL::ShaderGlobals *sg, void *val) +bool OSLRenderServices::get_userdata( + bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val) { - return false; /* disabled by lockgeom */ + return false; /* disabled by lockgeom */ } TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring filename) { - if(filename.length() && filename[0] == '@') { - /* Dummy, we don't use texture handles for builtin textures but need - * to tell the OSL runtime optimizer that this is a valid texture. */ - return NULL; - } - else { - return texturesys()->get_texture_handle(filename); - } + if (filename.length() && filename[0] == '@') { + /* Dummy, we don't use texture handles for builtin textures but need + * to tell the OSL runtime optimizer that this is a valid texture. */ + return NULL; + } + else { + return texturesys()->get_texture_handle(filename); + } } bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle) { - return texturesys()->good(texture_handle); + return texturesys()->good(texture_handle); } bool OSLRenderServices::texture(ustring filename, @@ -945,157 +976,169 @@ bool OSLRenderServices::texture(ustring filename, TexturePerthread *texture_thread_info, TextureOpt &options, OSL::ShaderGlobals *sg, - float s, float t, - float dsdx, float dtdx, float dsdy, float dtdy, + float s, + float t, + float dsdx, + float dtdx, + float dsdy, + float dtdy, int nchannels, float *result, float *dresultds, float *dresultdt, ustring *errormessage) { - OSL::TextureSystem *ts = osl_ts; - ShaderData *sd = (ShaderData *)(sg->renderstate); - KernelGlobals *kg = sd->osl_globals; + OSL::TextureSystem *ts = osl_ts; + ShaderData *sd = (ShaderData *)(sg->renderstate); + KernelGlobals *kg = sd->osl_globals; - if(texture_thread_info == NULL) { - OSLThreadData *tdata = kg->osl_tdata; - texture_thread_info = tdata->oiio_thread_info; - } + if (texture_thread_info == NULL) { + OSLThreadData *tdata = kg->osl_tdata; + texture_thread_info = tdata->oiio_thread_info; + } #ifdef WITH_PTEX - /* todo: this is just a quick hack, only works with particular files and options */ - if(string_endswith(filename.string(), ".ptx")) { - float2 uv; - int faceid; + /* todo: this is just a quick hack, only works with particular files and options */ + if (string_endswith(filename.string(), ".ptx")) { + float2 uv; + int faceid; - if(!primitive_ptex(kg, sd, &uv, &faceid)) - return false; + if (!primitive_ptex(kg, sd, &uv, &faceid)) + return false; - float u = uv.x; - float v = uv.y; - float dudx = 0.0f; - float dvdx = 0.0f; - float dudy = 0.0f; - float dvdy = 0.0f; + float u = uv.x; + float v = uv.y; + float dudx = 0.0f; + float dvdx = 0.0f; + float dudy = 0.0f; + float dvdy = 0.0f; - Ptex::String error; - PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error)); + Ptex::String error; + PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error)); - if(!r) { - //std::cerr << error.c_str() << std::endl; - return false; - } + if (!r) { + //std::cerr << error.c_str() << std::endl; + return false; + } - bool mipmaplerp = false; - float sharpness = 1.0f; - PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness); - PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts)); + bool mipmaplerp = false; + float sharpness = 1.0f; + PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness); + PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts)); - f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy); + f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy); - for(int c = r->numChannels(); c < nchannels; c++) - result[c] = result[0]; + for (int c = r->numChannels(); c < nchannels; c++) + result[c] = result[0]; - return true; - } + return true; + } #endif - bool status = false; - - if(filename.length() && filename[0] == '@') { - if(filename == u_at_bevel) { - /* Bevel shader hack. */ - if(nchannels >= 3) { - PathState *state = sd->osl_path_state; - int num_samples = (int)s; - float radius = t; - float3 N = svm_bevel(kg, sd, state, radius, num_samples); - result[0] = N.x; - result[1] = N.y; - result[2] = N.z; - status = true; - } - } - else if(filename == u_at_ao) { - /* AO shader hack. */ - PathState *state = sd->osl_path_state; - int num_samples = (int)s; - float radius = t; - float3 N = make_float3(dsdx, dtdx, dsdy); - int flags = 0; - if((int)dtdy) { - flags |= NODE_AO_INSIDE; - } - if((int)options.sblur) { - flags |= NODE_AO_ONLY_LOCAL; - } - if((int)options.tblur) { - flags |= NODE_AO_GLOBAL_RADIUS; - } - result[0] = svm_ao(kg, sd, N, state, radius, num_samples, flags); - status = true; - } - else if(filename[1] == 'l') { - /* IES light. */ - int slot = atoi(filename.c_str() + 2); - result[0] = kernel_ies_interp(kg, slot, s, t); - status = true; - } - else { - /* Packed texture. */ - int slot = atoi(filename.c_str() + 2); - float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t); - - result[0] = rgba[0]; - if(nchannels > 1) - result[1] = rgba[1]; - if(nchannels > 2) - result[2] = rgba[2]; - if(nchannels > 3) - result[3] = rgba[3]; - status = true; - } - } - else { - if(texture_handle != NULL) { - status = ts->texture(texture_handle, - texture_thread_info, - options, - s, t, - dsdx, dtdx, - dsdy, dtdy, - nchannels, - result, - dresultds, dresultdt); - } - else { - status = ts->texture(filename, - options, - s, t, - dsdx, dtdx, - dsdy, dtdy, - nchannels, - result, - dresultds, dresultdt); - } - } - - if(!status) { - if(nchannels == 3 || nchannels == 4) { - result[0] = 1.0f; - result[1] = 0.0f; - result[2] = 1.0f; - - if(nchannels == 4) - result[3] = 1.0f; - } - /* This might be slow, but prevents error messages leak and - * other nasty stuff happening. - */ - string err = ts->geterror(); - (void) err; - } - - return status; + bool status = false; + + if (filename.length() && filename[0] == '@') { + if (filename == u_at_bevel) { + /* Bevel shader hack. */ + if (nchannels >= 3) { + PathState *state = sd->osl_path_state; + int num_samples = (int)s; + float radius = t; + float3 N = svm_bevel(kg, sd, state, radius, num_samples); + result[0] = N.x; + result[1] = N.y; + result[2] = N.z; + status = true; + } + } + else if (filename == u_at_ao) { + /* AO shader hack. */ + PathState *state = sd->osl_path_state; + int num_samples = (int)s; + float radius = t; + float3 N = make_float3(dsdx, dtdx, dsdy); + int flags = 0; + if ((int)dtdy) { + flags |= NODE_AO_INSIDE; + } + if ((int)options.sblur) { + flags |= NODE_AO_ONLY_LOCAL; + } + if ((int)options.tblur) { + flags |= NODE_AO_GLOBAL_RADIUS; + } + result[0] = svm_ao(kg, sd, N, state, radius, num_samples, flags); + status = true; + } + else if (filename[1] == 'l') { + /* IES light. */ + int slot = atoi(filename.c_str() + 2); + result[0] = kernel_ies_interp(kg, slot, s, t); + status = true; + } + else { + /* Packed texture. */ + int slot = atoi(filename.c_str() + 2); + float4 rgba = kernel_tex_image_interp(kg, slot, s, 1.0f - t); + + result[0] = rgba[0]; + if (nchannels > 1) + result[1] = rgba[1]; + if (nchannels > 2) + result[2] = rgba[2]; + if (nchannels > 3) + result[3] = rgba[3]; + status = true; + } + } + else { + if (texture_handle != NULL) { + status = ts->texture(texture_handle, + texture_thread_info, + options, + s, + t, + dsdx, + dtdx, + dsdy, + dtdy, + nchannels, + result, + dresultds, + dresultdt); + } + else { + status = ts->texture(filename, + options, + s, + t, + dsdx, + dtdx, + dsdy, + dtdy, + nchannels, + result, + dresultds, + dresultdt); + } + } + + if (!status) { + if (nchannels == 3 || nchannels == 4) { + result[0] = 1.0f; + result[1] = 0.0f; + result[2] = 1.0f; + + if (nchannels == 4) + result[3] = 1.0f; + } + /* This might be slow, but prevents error messages leak and + * other nasty stuff happening. + */ + string err = ts->geterror(); + (void)err; + } + + return status; } bool OSLRenderServices::texture3d(ustring filename, @@ -1114,68 +1157,76 @@ bool OSLRenderServices::texture3d(ustring filename, float *dresultdr, ustring *errormessage) { - OSL::TextureSystem *ts = osl_ts; - ShaderData *sd = (ShaderData *)(sg->renderstate); - KernelGlobals *kg = sd->osl_globals; - - if(texture_thread_info == NULL) { - OSLThreadData *tdata = kg->osl_tdata; - texture_thread_info = tdata->oiio_thread_info; - } - - bool status; - if(filename.length() && filename[0] == '@') { - int slot = atoi(filename.c_str() + 1); - float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE); - - result[0] = rgba[0]; - if(nchannels > 1) - result[1] = rgba[1]; - if(nchannels > 2) - result[2] = rgba[2]; - if(nchannels > 3) - result[3] = rgba[3]; - status = true; - } - else { - if(texture_handle != NULL) { - status = ts->texture3d(texture_handle, - texture_thread_info, - options, - P, - dPdx, dPdy, dPdz, - nchannels, - result, - dresultds, dresultdt, dresultdr); - } - else { - status = ts->texture3d(filename, - options, - P, - dPdx, dPdy, dPdz, - nchannels, - result, - dresultds, dresultdt, dresultdr); - } - } - - if(!status) { - if(nchannels == 3 || nchannels == 4) { - result[0] = 1.0f; - result[1] = 0.0f; - result[2] = 1.0f; - - if(nchannels == 4) - result[3] = 1.0f; - } - /* This might be slow, but prevents error messages leak and - * other nasty stuff happening. - */ - string err = ts->geterror(); - (void) err; - } - - return status; + OSL::TextureSystem *ts = osl_ts; + ShaderData *sd = (ShaderData *)(sg->renderstate); + KernelGlobals *kg = sd->osl_globals; + + if (texture_thread_info == NULL) { + OSLThreadData *tdata = kg->osl_tdata; + texture_thread_info = tdata->oiio_thread_info; + } + + bool status; + if (filename.length() && filename[0] == '@') { + int slot = atoi(filename.c_str() + 1); + float4 rgba = kernel_tex_image_interp_3d(kg, slot, P.x, P.y, P.z, INTERPOLATION_NONE); + + result[0] = rgba[0]; + if (nchannels > 1) + result[1] = rgba[1]; + if (nchannels > 2) + result[2] = rgba[2]; + if (nchannels > 3) + result[3] = rgba[3]; + status = true; + } + else { + if (texture_handle != NULL) { + status = ts->texture3d(texture_handle, + texture_thread_info, + options, + P, + dPdx, + dPdy, + dPdz, + nchannels, + result, + dresultds, + dresultdt, + dresultdr); + } + else { + status = ts->texture3d(filename, + options, + P, + dPdx, + dPdy, + dPdz, + nchannels, + result, + dresultds, + dresultdt, + dresultdr); + } + } + + if (!status) { + if (nchannels == 3 || nchannels == 4) { + result[0] = 1.0f; + result[1] = 0.0f; + result[2] = 1.0f; + + if (nchannels == 4) + result[3] = 1.0f; + } + /* This might be slow, but prevents error messages leak and + * other nasty stuff happening. + */ + string err = ts->geterror(); + (void)err; + } + + return status; } bool OSLRenderServices::environment(ustring filename, @@ -1192,35 +1243,34 @@ bool OSLRenderServices::environment(ustring filename, float *dresultdt, ustring *errormessage) { - OSL::TextureSystem *ts = osl_ts; - - if (thread_info == NULL) { - ShaderData *sd = (ShaderData *)(sg->renderstate); - KernelGlobals *kg = sd->osl_globals; - OSLThreadData *tdata = kg->osl_tdata; - thread_info = tdata->oiio_thread_info; - } - - if (th == NULL) { - th = ts->get_texture_handle(filename, thread_info); - } - - bool status = ts->environment(th, thread_info, - options, R, dRdx, dRdy, - nchannels, result, dresultds, dresultdt); - - if(!status) { - if(nchannels == 3 || nchannels == 4) { - result[0] = 1.0f; - result[1] = 0.0f; - result[2] = 1.0f; - - if(nchannels == 4) - result[3] = 1.0f; - } - } - - return status; + OSL::TextureSystem *ts = osl_ts; + + if (thread_info == NULL) { + ShaderData *sd = (ShaderData *)(sg->renderstate); + KernelGlobals *kg = sd->osl_globals; + OSLThreadData *tdata = kg->osl_tdata; + thread_info = tdata->oiio_thread_info; + } + + if (th == NULL) { + th = ts->get_texture_handle(filename, thread_info); + } + + bool status = ts->environment( + th, thread_info, options, R, dRdx, dRdy, nchannels, result, dresultds, dresultdt); + + if (!status) { + if (nchannels == 3 || nchannels == 4) { + result[0] = 1.0f; + result[1] = 0.0f; + result[2] = 1.0f; + + if (nchannels == 4) + result[3] = 1.0f; + } + } + + return status; } bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, @@ -1231,138 +1281,158 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, TypeDesc datatype, void *data) { - OSL::TextureSystem *ts = osl_ts; - if(filename.length() && filename[0] == '@') { - /* Special builtin textures. */ - return false; - } - else { - return ts->get_texture_info(filename, subimage, dataname, datatype, data); - } + OSL::TextureSystem *ts = osl_ts; + if (filename.length() && filename[0] == '@') { + /* Special builtin textures. */ + return false; + } + else { + return ts->get_texture_info(filename, subimage, dataname, datatype, data); + } } -int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 ¢er, - float radius, int max_points, bool sort, - size_t *out_indices, float *out_distances, int derivs_offset) +int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, + ustring filename, + const OSL::Vec3 ¢er, + float radius, + int max_points, + bool sort, + size_t *out_indices, + float *out_distances, + int derivs_offset) { - return 0; + return 0; } -int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg, ustring filename, size_t *indices, int count, - ustring attr_name, TypeDesc attr_type, void *out_data) +int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg, + ustring filename, + size_t *indices, + int count, + ustring attr_name, + TypeDesc attr_type, + void *out_data) { - return 0; + return 0; } bool OSLRenderServices::pointcloud_write(OSL::ShaderGlobals *sg, - ustring filename, const OSL::Vec3 &pos, - int nattribs, const ustring *names, + ustring filename, + const OSL::Vec3 &pos, + int nattribs, + const ustring *names, const TypeDesc *types, const void **data) { - return false; + return false; } -bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg, - const OSL::Vec3 &P, const OSL::Vec3 &dPdx, - const OSL::Vec3 &dPdy, const OSL::Vec3 &R, - const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy) +bool OSLRenderServices::trace(TraceOpt &options, + OSL::ShaderGlobals *sg, + const OSL::Vec3 &P, + const OSL::Vec3 &dPdx, + const OSL::Vec3 &dPdy, + const OSL::Vec3 &R, + const OSL::Vec3 &dRdx, + const OSL::Vec3 &dRdy) { - /* todo: options.shader support, maybe options.traceset */ - ShaderData *sd = (ShaderData *)(sg->renderstate); - - /* setup ray */ - Ray ray; - - ray.P = TO_FLOAT3(P); - ray.D = TO_FLOAT3(R); - ray.t = (options.maxdist == 1.0e30f)? FLT_MAX: options.maxdist - options.mindist; - ray.time = sd->time; - - if(options.mindist == 0.0f) { - /* avoid self-intersections */ - if(ray.P == sd->P) { - bool transmit = (dot(sd->Ng, ray.D) < 0.0f); - ray.P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng); - } - } - else { - /* offset for minimum distance */ - ray.P += options.mindist*ray.D; - } - - /* ray differentials */ - ray.dP.dx = TO_FLOAT3(dPdx); - ray.dP.dy = TO_FLOAT3(dPdy); - ray.dD.dx = TO_FLOAT3(dRdx); - ray.dD.dy = TO_FLOAT3(dRdy); - - /* allocate trace data */ - OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata; - tracedata->ray = ray; - tracedata->setup = false; - tracedata->init = true; - tracedata->sd.osl_globals = sd->osl_globals; - - /* Raytrace, leaving out shadow opaque to avoid early exit. */ - uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE; - return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f); + /* todo: options.shader support, maybe options.traceset */ + ShaderData *sd = (ShaderData *)(sg->renderstate); + + /* setup ray */ + Ray ray; + + ray.P = TO_FLOAT3(P); + ray.D = TO_FLOAT3(R); + ray.t = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist; + ray.time = sd->time; + + if (options.mindist == 0.0f) { + /* avoid self-intersections */ + if (ray.P == sd->P) { + bool transmit = (dot(sd->Ng, ray.D) < 0.0f); + ray.P = ray_offset(sd->P, (transmit) ? -sd->Ng : sd->Ng); + } + } + else { + /* offset for minimum distance */ + ray.P += options.mindist * ray.D; + } + + /* ray differentials */ + ray.dP.dx = TO_FLOAT3(dPdx); + ray.dP.dy = TO_FLOAT3(dPdy); + ray.dD.dx = TO_FLOAT3(dRdx); + ray.dD.dy = TO_FLOAT3(dRdy); + + /* allocate trace data */ + OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata; + tracedata->ray = ray; + tracedata->setup = false; + tracedata->init = true; + tracedata->sd.osl_globals = sd->osl_globals; + + /* Raytrace, leaving out shadow opaque to avoid early exit. */ + uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE; + return scene_intersect(sd->osl_globals, ray, visibility, &tracedata->isect, NULL, 0.0f, 0.0f); } - -bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name, - TypeDesc type, void *val, bool derivatives) +bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, + ustring source, + ustring name, + TypeDesc type, + void *val, + bool derivatives) { - OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata; - - if(source == u_trace && tracedata->init) { - if(name == u_hit) { - return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val); - } - else if(tracedata->isect.prim != PRIM_NONE) { - if(name == u_hitdist) { - float f[3] = {tracedata->isect.t, 0.0f, 0.0f}; - return set_attribute_float(f, type, derivatives, val); - } - else { - ShaderData *sd = &tracedata->sd; - KernelGlobals *kg = sd->osl_globals; - - if(!tracedata->setup) { - /* lazy shader data setup */ - shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray); - tracedata->setup = true; - } - - if(name == u_N) { - return set_attribute_float3(sd->N, type, derivatives, val); - } - else if(name == u_Ng) { - return set_attribute_float3(sd->Ng, type, derivatives, val); - } - else if(name == u_P) { - float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy}; - return set_attribute_float3(f, type, derivatives, val); - } - else if(name == u_I) { - float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy}; - return set_attribute_float3(f, type, derivatives, val); - } - else if(name == u_u) { - float f[3] = {sd->u, sd->du.dx, sd->du.dy}; - return set_attribute_float(f, type, derivatives, val); - } - else if(name == u_v) { - float f[3] = {sd->v, sd->dv.dx, sd->dv.dy}; - return set_attribute_float(f, type, derivatives, val); - } - - return get_attribute(sd, derivatives, u_empty, type, name, val); - } - } - } - - return false; + OSLTraceData *tracedata = (OSLTraceData *)sg->tracedata; + + if (source == u_trace && tracedata->init) { + if (name == u_hit) { + return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val); + } + else if (tracedata->isect.prim != PRIM_NONE) { + if (name == u_hitdist) { + float f[3] = {tracedata->isect.t, 0.0f, 0.0f}; + return set_attribute_float(f, type, derivatives, val); + } + else { + ShaderData *sd = &tracedata->sd; + KernelGlobals *kg = sd->osl_globals; + + if (!tracedata->setup) { + /* lazy shader data setup */ + shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray); + tracedata->setup = true; + } + + if (name == u_N) { + return set_attribute_float3(sd->N, type, derivatives, val); + } + else if (name == u_Ng) { + return set_attribute_float3(sd->Ng, type, derivatives, val); + } + else if (name == u_P) { + float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy}; + return set_attribute_float3(f, type, derivatives, val); + } + else if (name == u_I) { + float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy}; + return set_attribute_float3(f, type, derivatives, val); + } + else if (name == u_u) { + float f[3] = {sd->u, sd->du.dx, sd->du.dy}; + return set_attribute_float(f, type, derivatives, val); + } + else if (name == u_v) { + float f[3] = {sd->v, sd->dv.dx, sd->dv.dy}; + return set_attribute_float(f, type, derivatives, val); + } + + return get_attribute(sd, derivatives, u_empty, type, name, val); + } + } + } + + return false; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 3990a22aefd..2fad5833fc9 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -40,177 +40,229 @@ class Shader; struct ShaderData; struct float3; struct KernelGlobals; -class OSLRenderServices : public OSL::RendererServices -{ -public: - OSLRenderServices(); - ~OSLRenderServices(); - - void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts); - - bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) override; - bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) override; - - bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time) override; - bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time) override; - - bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) override; - bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform) override; - - bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override; - bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override; - - bool get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives, - ustring object, TypeDesc type, ustring name, - int index, void *val) override; - bool get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object, - TypeDesc type, ustring name, void *val) override; - bool get_attribute(ShaderData *sd, bool derivatives, ustring object_name, - TypeDesc type, ustring name, void *val); - - bool get_userdata(bool derivatives, ustring name, TypeDesc type, - OSL::ShaderGlobals *sg, void *val) override; - - int pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 ¢er, - float radius, int max_points, bool sort, size_t *out_indices, - float *out_distances, int derivs_offset) override; - - int pointcloud_get(OSL::ShaderGlobals *sg, ustring filename, size_t *indices, int count, - ustring attr_name, TypeDesc attr_type, void *out_data) override; - - bool pointcloud_write(OSL::ShaderGlobals *sg, - ustring filename, const OSL::Vec3 &pos, - int nattribs, const ustring *names, - const TypeDesc *types, - const void **data) override; - - bool trace(TraceOpt &options, OSL::ShaderGlobals *sg, - const OSL::Vec3 &P, const OSL::Vec3 &dPdx, - const OSL::Vec3 &dPdy, const OSL::Vec3 &R, - const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy) override; - - bool getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name, - TypeDesc type, void *val, bool derivatives) override; - - TextureSystem::TextureHandle *get_texture_handle(ustring filename) override; - - bool good(TextureSystem::TextureHandle *texture_handle) override; - - bool texture(ustring filename, - TextureSystem::TextureHandle *texture_handle, - TexturePerthread *texture_thread_info, - TextureOpt &options, - OSL::ShaderGlobals *sg, - float s, float t, - float dsdx, float dtdx, float dsdy, float dtdy, - int nchannels, - float *result, - float *dresultds, - float *dresultdt, - ustring *errormessage) override; - - bool texture3d(ustring filename, - TextureHandle *texture_handle, - TexturePerthread *texture_thread_info, - TextureOpt &options, - OSL::ShaderGlobals *sg, - const OSL::Vec3 &P, - const OSL::Vec3 &dPdx, - const OSL::Vec3 &dPdy, - const OSL::Vec3 &dPdz, - int nchannels, - float *result, - float *dresultds, - float *dresultdt, - float *dresultdr, - ustring *errormessage) override; - - bool environment(ustring filename, - TextureHandle *texture_handle, - TexturePerthread *texture_thread_info, - TextureOpt &options, - OSL::ShaderGlobals *sg, - const OSL::Vec3 &R, - const OSL::Vec3 &dRdx, - const OSL::Vec3 &dRdy, - int nchannels, - float *result, - float *dresultds, - float *dresultdt, - ustring *errormessage) override; - - bool get_texture_info(OSL::ShaderGlobals *sg, - ustring filename, - TextureHandle *texture_handle, - int subimage, - ustring dataname, - TypeDesc datatype, - void *data) override; - - static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, - TypeDesc type, bool derivatives, void *val); - static bool get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, - TypeDesc type, bool derivatives, void *val); - - static ustring u_distance; - static ustring u_index; - static ustring u_world; - static ustring u_camera; - static ustring u_screen; - static ustring u_raster; - static ustring u_ndc; - static ustring u_object_location; - static ustring u_object_index; - static ustring u_geom_dupli_generated; - static ustring u_geom_dupli_uv; - static ustring u_material_index; - static ustring u_object_random; - static ustring u_particle_index; - static ustring u_particle_random; - static ustring u_particle_age; - static ustring u_particle_lifetime; - static ustring u_particle_location; - static ustring u_particle_rotation; - static ustring u_particle_size; - static ustring u_particle_velocity; - static ustring u_particle_angular_velocity; - static ustring u_geom_numpolyvertices; - static ustring u_geom_trianglevertices; - static ustring u_geom_polyvertices; - static ustring u_geom_name; - static ustring u_geom_undisplaced; - static ustring u_is_smooth; - static ustring u_is_curve; - static ustring u_curve_thickness; - static ustring u_curve_tangent_normal; - static ustring u_curve_random; - static ustring u_path_ray_length; - static ustring u_path_ray_depth; - static ustring u_path_diffuse_depth; - static ustring u_path_glossy_depth; - static ustring u_path_transparent_depth; - static ustring u_path_transmission_depth; - static ustring u_trace; - static ustring u_hit; - static ustring u_hitdist; - static ustring u_N; - static ustring u_Ng; - static ustring u_P; - static ustring u_I; - static ustring u_u; - static ustring u_v; - static ustring u_empty; - static ustring u_at_bevel; - static ustring u_at_ao; - -private: - KernelGlobals *kernel_globals; - OSL::TextureSystem *osl_ts; +class OSLRenderServices : public OSL::RendererServices { + public: + OSLRenderServices(); + ~OSLRenderServices(); + + void thread_init(KernelGlobals *kernel_globals, OSL::TextureSystem *ts); + + bool get_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform, + float time) override; + bool get_inverse_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform, + float time) override; + + bool get_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + ustring from, + float time) override; + bool get_inverse_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + ustring to, + float time) override; + + bool get_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform) override; + bool get_inverse_matrix(OSL::ShaderGlobals *sg, + OSL::Matrix44 &result, + OSL::TransformationPtr xform) override; + + bool get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override; + bool get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from) override; + + bool get_array_attribute(OSL::ShaderGlobals *sg, + bool derivatives, + ustring object, + TypeDesc type, + ustring name, + int index, + void *val) override; + bool get_attribute(OSL::ShaderGlobals *sg, + bool derivatives, + ustring object, + TypeDesc type, + ustring name, + void *val) override; + bool get_attribute(ShaderData *sd, + bool derivatives, + ustring object_name, + TypeDesc type, + ustring name, + void *val); + + bool get_userdata( + bool derivatives, ustring name, TypeDesc type, OSL::ShaderGlobals *sg, void *val) override; + + int pointcloud_search(OSL::ShaderGlobals *sg, + ustring filename, + const OSL::Vec3 ¢er, + float radius, + int max_points, + bool sort, + size_t *out_indices, + float *out_distances, + int derivs_offset) override; + + int pointcloud_get(OSL::ShaderGlobals *sg, + ustring filename, + size_t *indices, + int count, + ustring attr_name, + TypeDesc attr_type, + void *out_data) override; + + bool pointcloud_write(OSL::ShaderGlobals *sg, + ustring filename, + const OSL::Vec3 &pos, + int nattribs, + const ustring *names, + const TypeDesc *types, + const void **data) override; + + bool trace(TraceOpt &options, + OSL::ShaderGlobals *sg, + const OSL::Vec3 &P, + const OSL::Vec3 &dPdx, + const OSL::Vec3 &dPdy, + const OSL::Vec3 &R, + const OSL::Vec3 &dRdx, + const OSL::Vec3 &dRdy) override; + + bool getmessage(OSL::ShaderGlobals *sg, + ustring source, + ustring name, + TypeDesc type, + void *val, + bool derivatives) override; + + TextureSystem::TextureHandle *get_texture_handle(ustring filename) override; + + bool good(TextureSystem::TextureHandle *texture_handle) override; + + bool texture(ustring filename, + TextureSystem::TextureHandle *texture_handle, + TexturePerthread *texture_thread_info, + TextureOpt &options, + OSL::ShaderGlobals *sg, + float s, + float t, + float dsdx, + float dtdx, + float dsdy, + float dtdy, + int nchannels, + float *result, + float *dresultds, + float *dresultdt, + ustring *errormessage) override; + + bool texture3d(ustring filename, + TextureHandle *texture_handle, + TexturePerthread *texture_thread_info, + TextureOpt &options, + OSL::ShaderGlobals *sg, + const OSL::Vec3 &P, + const OSL::Vec3 &dPdx, + const OSL::Vec3 &dPdy, + const OSL::Vec3 &dPdz, + int nchannels, + float *result, + float *dresultds, + float *dresultdt, + float *dresultdr, + ustring *errormessage) override; + + bool environment(ustring filename, + TextureHandle *texture_handle, + TexturePerthread *texture_thread_info, + TextureOpt &options, + OSL::ShaderGlobals *sg, + const OSL::Vec3 &R, + const OSL::Vec3 &dRdx, + const OSL::Vec3 &dRdy, + int nchannels, + float *result, + float *dresultds, + float *dresultdt, + ustring *errormessage) override; + + bool get_texture_info(OSL::ShaderGlobals *sg, + ustring filename, + TextureHandle *texture_handle, + int subimage, + ustring dataname, + TypeDesc datatype, + void *data) override; + + static bool get_background_attribute( + KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val); + static bool get_object_standard_attribute( + KernelGlobals *kg, ShaderData *sd, ustring name, TypeDesc type, bool derivatives, void *val); + + static ustring u_distance; + static ustring u_index; + static ustring u_world; + static ustring u_camera; + static ustring u_screen; + static ustring u_raster; + static ustring u_ndc; + static ustring u_object_location; + static ustring u_object_index; + static ustring u_geom_dupli_generated; + static ustring u_geom_dupli_uv; + static ustring u_material_index; + static ustring u_object_random; + static ustring u_particle_index; + static ustring u_particle_random; + static ustring u_particle_age; + static ustring u_particle_lifetime; + static ustring u_particle_location; + static ustring u_particle_rotation; + static ustring u_particle_size; + static ustring u_particle_velocity; + static ustring u_particle_angular_velocity; + static ustring u_geom_numpolyvertices; + static ustring u_geom_trianglevertices; + static ustring u_geom_polyvertices; + static ustring u_geom_name; + static ustring u_geom_undisplaced; + static ustring u_is_smooth; + static ustring u_is_curve; + static ustring u_curve_thickness; + static ustring u_curve_tangent_normal; + static ustring u_curve_random; + static ustring u_path_ray_length; + static ustring u_path_ray_depth; + static ustring u_path_diffuse_depth; + static ustring u_path_glossy_depth; + static ustring u_path_transparent_depth; + static ustring u_path_transmission_depth; + static ustring u_trace; + static ustring u_hit; + static ustring u_hitdist; + static ustring u_N; + static ustring u_Ng; + static ustring u_P; + static ustring u_I; + static ustring u_u; + static ustring u_v; + static ustring u_empty; + static ustring u_at_bevel; + static ustring u_at_ao; + + private: + KernelGlobals *kernel_globals; + OSL::TextureSystem *osl_ts; #ifdef WITH_PTEX - PtexCache *ptex_cache; + PtexCache *ptex_cache; #endif }; CCL_NAMESPACE_END -#endif /* __OSL_SERVICES_H__ */ +#endif /* __OSL_SERVICES_H__ */ diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index a89bb3fd1a3..3d9c579c9ff 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -33,103 +33,104 @@ #include "render/attribute.h" - CCL_NAMESPACE_BEGIN /* Threads */ -void OSLShader::thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals) +void OSLShader::thread_init(KernelGlobals *kg, + KernelGlobals *kernel_globals, + OSLGlobals *osl_globals) { - /* no osl used? */ - if(!osl_globals->use) { - kg->osl = NULL; - return; - } + /* no osl used? */ + if (!osl_globals->use) { + kg->osl = NULL; + return; + } - /* per thread kernel data init*/ - kg->osl = osl_globals; - kg->osl->services->thread_init(kernel_globals, osl_globals->ts); + /* per thread kernel data init*/ + kg->osl = osl_globals; + kg->osl->services->thread_init(kernel_globals, osl_globals->ts); - OSL::ShadingSystem *ss = kg->osl->ss; - OSLThreadData *tdata = new OSLThreadData(); + OSL::ShadingSystem *ss = kg->osl->ss; + OSLThreadData *tdata = new OSLThreadData(); - memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals)); - tdata->globals.tracedata = &tdata->tracedata; - tdata->globals.flipHandedness = false; - tdata->osl_thread_info = ss->create_thread_info(); - tdata->context = ss->get_context(tdata->osl_thread_info); + memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals)); + tdata->globals.tracedata = &tdata->tracedata; + tdata->globals.flipHandedness = false; + tdata->osl_thread_info = ss->create_thread_info(); + tdata->context = ss->get_context(tdata->osl_thread_info); - tdata->oiio_thread_info = osl_globals->ts->get_perthread_info(); + tdata->oiio_thread_info = osl_globals->ts->get_perthread_info(); - kg->osl_ss = (OSLShadingSystem*)ss; - kg->osl_tdata = tdata; + kg->osl_ss = (OSLShadingSystem *)ss; + kg->osl_tdata = tdata; } void OSLShader::thread_free(KernelGlobals *kg) { - if(!kg->osl) - return; + if (!kg->osl) + return; - OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss; - OSLThreadData *tdata = kg->osl_tdata; - ss->release_context(tdata->context); + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSLThreadData *tdata = kg->osl_tdata; + ss->release_context(tdata->context); - ss->destroy_thread_info(tdata->osl_thread_info); + ss->destroy_thread_info(tdata->osl_thread_info); - delete tdata; + delete tdata; - kg->osl = NULL; - kg->osl_ss = NULL; - kg->osl_tdata = NULL; + kg->osl = NULL; + kg->osl_ss = NULL; + kg->osl_tdata = NULL; } /* Globals */ -static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd, PathState *state, - int path_flag, OSLThreadData *tdata) +static void shaderdata_to_shaderglobals( + KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag, OSLThreadData *tdata) { - OSL::ShaderGlobals *globals = &tdata->globals; - - /* copy from shader data to shader globals */ - globals->P = TO_VEC3(sd->P); - globals->dPdx = TO_VEC3(sd->dP.dx); - globals->dPdy = TO_VEC3(sd->dP.dy); - globals->I = TO_VEC3(sd->I); - globals->dIdx = TO_VEC3(sd->dI.dx); - globals->dIdy = TO_VEC3(sd->dI.dy); - globals->N = TO_VEC3(sd->N); - globals->Ng = TO_VEC3(sd->Ng); - globals->u = sd->u; - globals->dudx = sd->du.dx; - globals->dudy = sd->du.dy; - globals->v = sd->v; - globals->dvdx = sd->dv.dx; - globals->dvdy = sd->dv.dy; - globals->dPdu = TO_VEC3(sd->dPdu); - globals->dPdv = TO_VEC3(sd->dPdv); - globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object); - globals->time = sd->time; - - /* booleans */ - globals->raytype = path_flag; - globals->backfacing = (sd->flag & SD_BACKFACING); - - /* shader data to be used in services callbacks */ - globals->renderstate = sd; - - /* hacky, we leave it to services to fetch actual object matrix */ - globals->shader2common = sd; - globals->object2common = sd; - - /* must be set to NULL before execute */ - globals->Ci = NULL; - - /* clear trace data */ - tdata->tracedata.init = false; - - /* used by renderservices */ - sd->osl_globals = kg; - sd->osl_path_state = state; + OSL::ShaderGlobals *globals = &tdata->globals; + + /* copy from shader data to shader globals */ + globals->P = TO_VEC3(sd->P); + globals->dPdx = TO_VEC3(sd->dP.dx); + globals->dPdy = TO_VEC3(sd->dP.dy); + globals->I = TO_VEC3(sd->I); + globals->dIdx = TO_VEC3(sd->dI.dx); + globals->dIdy = TO_VEC3(sd->dI.dy); + globals->N = TO_VEC3(sd->N); + globals->Ng = TO_VEC3(sd->Ng); + globals->u = sd->u; + globals->dudx = sd->du.dx; + globals->dudy = sd->du.dy; + globals->v = sd->v; + globals->dvdx = sd->dv.dx; + globals->dvdy = sd->dv.dy; + globals->dPdu = TO_VEC3(sd->dPdu); + globals->dPdv = TO_VEC3(sd->dPdv); + globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object); + globals->time = sd->time; + + /* booleans */ + globals->raytype = path_flag; + globals->backfacing = (sd->flag & SD_BACKFACING); + + /* shader data to be used in services callbacks */ + globals->renderstate = sd; + + /* hacky, we leave it to services to fetch actual object matrix */ + globals->shader2common = sd; + globals->object2common = sd; + + /* must be set to NULL before execute */ + globals->Ci = NULL; + + /* clear trace data */ + tdata->tracedata.init = false; + + /* used by renderservices */ + sd->osl_globals = kg; + sd->osl_path_state = state; } /* Surface */ @@ -139,97 +140,101 @@ static void flatten_surface_closure_tree(ShaderData *sd, const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f)) { - /* OSL gives us a closure tree, we flatten it into arrays per - * closure type, for evaluation, sampling, etc later on. */ - - switch(closure->id) { - case OSL::ClosureColor::MUL: { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight); - break; - } - case OSL::ClosureColor::ADD: { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - flatten_surface_closure_tree(sd, path_flag, add->closureA, weight); - flatten_surface_closure_tree(sd, path_flag, add->closureB, weight); - break; - } - default: { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - - if(prim) { + /* OSL gives us a closure tree, we flatten it into arrays per + * closure type, for evaluation, sampling, etc later on. */ + + switch (closure->id) { + case OSL::ClosureColor::MUL: { + OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; + flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight); + break; + } + case OSL::ClosureColor::ADD: { + OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; + flatten_surface_closure_tree(sd, path_flag, add->closureA, weight); + flatten_surface_closure_tree(sd, path_flag, add->closureB, weight); + break; + } + default: { + OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; + CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); + + if (prim) { #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight*TO_FLOAT3(comp->w); + weight = weight * TO_FLOAT3(comp->w); #endif - prim->setup(sd, path_flag, weight); - } - break; - } - } + prim->setup(sd, path_flag, weight); + } + break; + } + } } void OSLShader::eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag) { - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; - shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); - - /* execute shader for this point */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - int shader = sd->shader & SHADER_MASK; - - /* automatic bump shader */ - if(kg->osl->bump_state[shader]) { - /* save state */ - float3 P = sd->P; - float3 dPdx = sd->dP.dx; - float3 dPdy = sd->dP.dy; - - /* set state as if undisplaced */ - if(sd->flag & SD_HAS_DISPLACEMENT) { - float data[9]; - bool found = kg->osl->services->get_attribute(sd, true, OSLRenderServices::u_empty, TypeDesc::TypeVector, - OSLRenderServices::u_geom_undisplaced, data); - (void) found; - assert(found); - - memcpy(&sd->P, data, sizeof(float)*3); - memcpy(&sd->dP.dx, data+3, sizeof(float)*3); - memcpy(&sd->dP.dy, data+6, sizeof(float)*3); - - object_position_transform(kg, sd, &sd->P); - object_dir_transform(kg, sd, &sd->dP.dx); - object_dir_transform(kg, sd, &sd->dP.dy); - - globals->P = TO_VEC3(sd->P); - globals->dPdx = TO_VEC3(sd->dP.dx); - globals->dPdy = TO_VEC3(sd->dP.dy); - } - - /* execute bump shader */ - ss->execute(octx, *(kg->osl->bump_state[shader]), *globals); - - /* reset state */ - sd->P = P; - sd->dP.dx = dPdx; - sd->dP.dy = dPdy; - - globals->P = TO_VEC3(P); - globals->dPdx = TO_VEC3(dPdx); - globals->dPdy = TO_VEC3(dPdy); - } - - /* surface shader */ - if(kg->osl->surface_state[shader]) { - ss->execute(octx, *(kg->osl->surface_state[shader]), *globals); - } - - /* flatten closure tree */ - if(globals->Ci) - flatten_surface_closure_tree(sd, path_flag, globals->Ci); + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; + shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); + + /* execute shader for this point */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; + int shader = sd->shader & SHADER_MASK; + + /* automatic bump shader */ + if (kg->osl->bump_state[shader]) { + /* save state */ + float3 P = sd->P; + float3 dPdx = sd->dP.dx; + float3 dPdy = sd->dP.dy; + + /* set state as if undisplaced */ + if (sd->flag & SD_HAS_DISPLACEMENT) { + float data[9]; + bool found = kg->osl->services->get_attribute(sd, + true, + OSLRenderServices::u_empty, + TypeDesc::TypeVector, + OSLRenderServices::u_geom_undisplaced, + data); + (void)found; + assert(found); + + memcpy(&sd->P, data, sizeof(float) * 3); + memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3); + memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3); + + object_position_transform(kg, sd, &sd->P); + object_dir_transform(kg, sd, &sd->dP.dx); + object_dir_transform(kg, sd, &sd->dP.dy); + + globals->P = TO_VEC3(sd->P); + globals->dPdx = TO_VEC3(sd->dP.dx); + globals->dPdy = TO_VEC3(sd->dP.dy); + } + + /* execute bump shader */ + ss->execute(octx, *(kg->osl->bump_state[shader]), *globals); + + /* reset state */ + sd->P = P; + sd->dP.dx = dPdx; + sd->dP.dy = dPdy; + + globals->P = TO_VEC3(P); + globals->dPdx = TO_VEC3(dPdx); + globals->dPdy = TO_VEC3(dPdy); + } + + /* surface shader */ + if (kg->osl->surface_state[shader]) { + ss->execute(octx, *(kg->osl->surface_state[shader]), *globals); + } + + /* flatten closure tree */ + if (globals->Ci) + flatten_surface_closure_tree(sd, path_flag, globals->Ci); } /* Background */ @@ -238,56 +243,56 @@ static void flatten_background_closure_tree(ShaderData *sd, const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f)) { - /* OSL gives us a closure tree, if we are shading for background there - * is only one supported closure type at the moment, which has no evaluation - * functions, so we just sum the weights */ - - switch(closure->id) { - case OSL::ClosureColor::MUL: { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight)); - break; - } - case OSL::ClosureColor::ADD: { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - - flatten_background_closure_tree(sd, add->closureA, weight); - flatten_background_closure_tree(sd, add->closureB, weight); - break; - } - default: { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - - if(prim) { + /* OSL gives us a closure tree, if we are shading for background there + * is only one supported closure type at the moment, which has no evaluation + * functions, so we just sum the weights */ + + switch (closure->id) { + case OSL::ClosureColor::MUL: { + OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; + flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight)); + break; + } + case OSL::ClosureColor::ADD: { + OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; + + flatten_background_closure_tree(sd, add->closureA, weight); + flatten_background_closure_tree(sd, add->closureB, weight); + break; + } + default: { + OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; + CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); + + if (prim) { #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight*TO_FLOAT3(comp->w); + weight = weight * TO_FLOAT3(comp->w); #endif - prim->setup(sd, 0, weight); - } - break; - } - } + prim->setup(sd, 0, weight); + } + break; + } + } } void OSLShader::eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag) { - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; - shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); - - /* execute shader for this point */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - - if(kg->osl->background_state) { - ss->execute(octx, *(kg->osl->background_state), *globals); - } - - /* return background color immediately */ - if(globals->Ci) - flatten_background_closure_tree(sd, globals->Ci); + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; + shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); + + /* execute shader for this point */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; + + if (kg->osl->background_state) { + ss->execute(octx, *(kg->osl->background_state), *globals); + } + + /* return background color immediately */ + if (globals->Ci) + flatten_background_closure_tree(sd, globals->Ci); } /* Volume */ @@ -296,112 +301,117 @@ static void flatten_volume_closure_tree(ShaderData *sd, const OSL::ClosureColor *closure, float3 weight = make_float3(1.0f, 1.0f, 1.0f)) { - /* OSL gives us a closure tree, we flatten it into arrays per - * closure type, for evaluation, sampling, etc later on. */ - - switch(closure->id) { - case OSL::ClosureColor::MUL: { - OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; - flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight); - break; - } - case OSL::ClosureColor::ADD: { - OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; - flatten_volume_closure_tree(sd, add->closureA, weight); - flatten_volume_closure_tree(sd, add->closureB, weight); - break; - } - default: { - OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; - CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); - - if(prim) { + /* OSL gives us a closure tree, we flatten it into arrays per + * closure type, for evaluation, sampling, etc later on. */ + + switch (closure->id) { + case OSL::ClosureColor::MUL: { + OSL::ClosureMul *mul = (OSL::ClosureMul *)closure; + flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight); + break; + } + case OSL::ClosureColor::ADD: { + OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure; + flatten_volume_closure_tree(sd, add->closureA, weight); + flatten_volume_closure_tree(sd, add->closureB, weight); + break; + } + default: { + OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure; + CClosurePrimitive *prim = (CClosurePrimitive *)comp->data(); + + if (prim) { #ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS - weight = weight*TO_FLOAT3(comp->w); + weight = weight * TO_FLOAT3(comp->w); #endif - prim->setup(sd, 0, weight); - } - } - } + prim->setup(sd, 0, weight); + } + } + } } void OSLShader::eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag) { - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; - shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); - - /* execute shader */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - int shader = sd->shader & SHADER_MASK; - - if(kg->osl->volume_state[shader]) { - ss->execute(octx, *(kg->osl->volume_state[shader]), *globals); - } - - /* flatten closure tree */ - if(globals->Ci) - flatten_volume_closure_tree(sd, globals->Ci); + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; + shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata); + + /* execute shader */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; + int shader = sd->shader & SHADER_MASK; + + if (kg->osl->volume_state[shader]) { + ss->execute(octx, *(kg->osl->volume_state[shader]), *globals); + } + + /* flatten closure tree */ + if (globals->Ci) + flatten_volume_closure_tree(sd, globals->Ci); } /* Displacement */ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state) { - /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; + /* setup shader globals from shader data */ + OSLThreadData *tdata = kg->osl_tdata; - shaderdata_to_shaderglobals(kg, sd, state, 0, tdata); + shaderdata_to_shaderglobals(kg, sd, state, 0, tdata); - /* execute shader */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem*)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; - int shader = sd->shader & SHADER_MASK; + /* execute shader */ + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; + OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShadingContext *octx = tdata->context; + int shader = sd->shader & SHADER_MASK; - if(kg->osl->displacement_state[shader]) { - ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals); - } + if (kg->osl->displacement_state[shader]) { + ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals); + } - /* get back position */ - sd->P = TO_FLOAT3(globals->P); + /* get back position */ + sd->P = TO_FLOAT3(globals->P); } /* Attributes */ -int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc) +int OSLShader::find_attribute(KernelGlobals *kg, + const ShaderData *sd, + uint id, + AttributeDescriptor *desc) { - /* for OSL, a hash map is used to lookup the attribute by name. */ - int object = sd->object*ATTR_PRIM_TYPES; + /* for OSL, a hash map is used to lookup the attribute by name. */ + int object = sd->object * ATTR_PRIM_TYPES; #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) object += ATTR_PRIM_CURVE; + if (sd->type & PRIMITIVE_ALL_CURVE) + object += ATTR_PRIM_CURVE; #endif - OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object]; - ustring stdname(std::string("geom:") + std::string(Attribute::standard_name((AttributeStandard)id))); - OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname); - - if(it != attr_map.end()) { - const OSLGlobals::Attribute &osl_attr = it->second; - *desc = osl_attr.desc; - - if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) { - desc->offset = ATTR_STD_NOT_FOUND; - return ATTR_STD_NOT_FOUND; - } - - /* return result */ - if(osl_attr.desc.element == ATTR_ELEMENT_NONE) { - desc->offset = ATTR_STD_NOT_FOUND; - } - return desc->offset; - } - else { - desc->offset = ATTR_STD_NOT_FOUND; - return (int)ATTR_STD_NOT_FOUND; - } + OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object]; + ustring stdname(std::string("geom:") + + std::string(Attribute::standard_name((AttributeStandard)id))); + OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname); + + if (it != attr_map.end()) { + const OSLGlobals::Attribute &osl_attr = it->second; + *desc = osl_attr.desc; + + if (sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) { + desc->offset = ATTR_STD_NOT_FOUND; + return ATTR_STD_NOT_FOUND; + } + + /* return result */ + if (osl_attr.desc.element == ATTR_ELEMENT_NONE) { + desc->offset = ATTR_STD_NOT_FOUND; + } + return desc->offset; + } + else { + desc->offset = ATTR_STD_NOT_FOUND; + return (int)ATTR_STD_NOT_FOUND; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h index 9824f966a44..a4fa24d0a90 100644 --- a/intern/cycles/kernel/osl/osl_shader.h +++ b/intern/cycles/kernel/osl/osl_shader.h @@ -29,7 +29,7 @@ * This means no thread state must be passed along in the kernel itself. */ -#include "kernel/kernel_types.h" +# include "kernel/kernel_types.h" CCL_NAMESPACE_BEGIN @@ -44,26 +44,31 @@ struct OSLGlobals; struct OSLShadingSystem; class OSLShader { -public: - /* init */ - static void register_closures(OSLShadingSystem *ss); + public: + /* init */ + static void register_closures(OSLShadingSystem *ss); - /* per thread data */ - static void thread_init(KernelGlobals *kg, KernelGlobals *kernel_globals, OSLGlobals *osl_globals); - static void thread_free(KernelGlobals *kg); + /* per thread data */ + static void thread_init(KernelGlobals *kg, + KernelGlobals *kernel_globals, + OSLGlobals *osl_globals); + static void thread_free(KernelGlobals *kg); - /* eval */ - static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); - static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); - static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); - static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state); + /* eval */ + static void eval_surface(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); + static void eval_background(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); + static void eval_volume(KernelGlobals *kg, ShaderData *sd, PathState *state, int path_flag); + static void eval_displacement(KernelGlobals *kg, ShaderData *sd, PathState *state); - /* attributes */ - static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc); + /* attributes */ + static int find_attribute(KernelGlobals *kg, + const ShaderData *sd, + uint id, + AttributeDescriptor *desc); }; CCL_NAMESPACE_END #endif -#endif /* __OSL_SHADER_H__ */ +#endif /* __OSL_SHADER_H__ */ diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index 4740db27d4e..b42b9b2fe64 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -2,102 +2,102 @@ # OSL node shaders set(SRC_OSL - node_add_closure.osl - node_ambient_occlusion.osl - node_anisotropic_bsdf.osl - node_attribute.osl - node_background.osl - node_bevel.osl - node_brick_texture.osl - node_brightness.osl - node_bump.osl - node_camera.osl - node_checker_texture.osl - node_combine_rgb.osl - node_combine_hsv.osl - node_combine_xyz.osl - node_convert_from_color.osl - node_convert_from_float.osl - node_convert_from_int.osl - node_convert_from_normal.osl - node_convert_from_point.osl - node_convert_from_vector.osl - node_diffuse_bsdf.osl - node_displacement.osl - node_vector_displacement.osl - node_emission.osl - node_environment_texture.osl - node_fresnel.osl - node_gamma.osl - node_geometry.osl - node_glass_bsdf.osl - node_glossy_bsdf.osl - node_gradient_texture.osl - node_hair_info.osl - node_scatter_volume.osl - node_absorption_volume.osl - node_principled_volume.osl - node_holdout.osl - node_hsv.osl - node_ies_light.osl - node_image_texture.osl - node_invert.osl - node_layer_weight.osl - node_light_falloff.osl - node_light_path.osl - node_magic_texture.osl - node_mapping.osl - node_math.osl - node_mix.osl - node_mix_closure.osl - node_musgrave_texture.osl - node_noise_texture.osl - node_normal.osl - node_normal_map.osl - node_object_info.osl - node_output_displacement.osl - node_output_surface.osl - node_output_volume.osl - node_particle_info.osl - node_refraction_bsdf.osl - node_rgb_curves.osl - node_rgb_ramp.osl - node_separate_rgb.osl - node_separate_hsv.osl - node_separate_xyz.osl - node_set_normal.osl - node_sky_texture.osl - node_subsurface_scattering.osl - node_tangent.osl - node_texture_coordinate.osl - node_toon_bsdf.osl - node_translucent_bsdf.osl - node_transparent_bsdf.osl - node_value.osl - node_vector_curves.osl - node_vector_math.osl - node_vector_transform.osl - node_velvet_bsdf.osl - node_voronoi_texture.osl - node_voxel_texture.osl - node_wavelength.osl - node_blackbody.osl - node_wave_texture.osl - node_wireframe.osl - node_hair_bsdf.osl - node_principled_hair_bsdf.osl - node_uv_map.osl - node_principled_bsdf.osl - node_rgb_to_bw.osl + node_add_closure.osl + node_ambient_occlusion.osl + node_anisotropic_bsdf.osl + node_attribute.osl + node_background.osl + node_bevel.osl + node_brick_texture.osl + node_brightness.osl + node_bump.osl + node_camera.osl + node_checker_texture.osl + node_combine_rgb.osl + node_combine_hsv.osl + node_combine_xyz.osl + node_convert_from_color.osl + node_convert_from_float.osl + node_convert_from_int.osl + node_convert_from_normal.osl + node_convert_from_point.osl + node_convert_from_vector.osl + node_diffuse_bsdf.osl + node_displacement.osl + node_vector_displacement.osl + node_emission.osl + node_environment_texture.osl + node_fresnel.osl + node_gamma.osl + node_geometry.osl + node_glass_bsdf.osl + node_glossy_bsdf.osl + node_gradient_texture.osl + node_hair_info.osl + node_scatter_volume.osl + node_absorption_volume.osl + node_principled_volume.osl + node_holdout.osl + node_hsv.osl + node_ies_light.osl + node_image_texture.osl + node_invert.osl + node_layer_weight.osl + node_light_falloff.osl + node_light_path.osl + node_magic_texture.osl + node_mapping.osl + node_math.osl + node_mix.osl + node_mix_closure.osl + node_musgrave_texture.osl + node_noise_texture.osl + node_normal.osl + node_normal_map.osl + node_object_info.osl + node_output_displacement.osl + node_output_surface.osl + node_output_volume.osl + node_particle_info.osl + node_refraction_bsdf.osl + node_rgb_curves.osl + node_rgb_ramp.osl + node_separate_rgb.osl + node_separate_hsv.osl + node_separate_xyz.osl + node_set_normal.osl + node_sky_texture.osl + node_subsurface_scattering.osl + node_tangent.osl + node_texture_coordinate.osl + node_toon_bsdf.osl + node_translucent_bsdf.osl + node_transparent_bsdf.osl + node_value.osl + node_vector_curves.osl + node_vector_math.osl + node_vector_transform.osl + node_velvet_bsdf.osl + node_voronoi_texture.osl + node_voxel_texture.osl + node_wavelength.osl + node_blackbody.osl + node_wave_texture.osl + node_wireframe.osl + node_hair_bsdf.osl + node_principled_hair_bsdf.osl + node_uv_map.osl + node_principled_bsdf.osl + node_rgb_to_bw.osl ) set(SRC_OSL_HEADERS - node_color.h - node_fresnel.h - node_ramp_util.h - node_texture.h - stdosl.h - oslutil.h + node_color.h + node_fresnel.h + node_ramp_util.h + node_texture.h + stdosl.h + oslutil.h ) set(SRC_OSO @@ -106,20 +106,20 @@ set(SRC_OSO # TODO, add a module to compile OSL foreach(_file ${SRC_OSL}) - set(_OSL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${_file}) - set_source_files_properties(${_file} PROPERTIES HEADER_FILE_ONLY TRUE) - string(REPLACE ".osl" ".oso" _OSO_FILE ${_OSL_FILE}) - string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE}) - add_custom_command( - OUTPUT ${_OSO_FILE} - COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE} - DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER}) - list(APPEND SRC_OSO - ${_OSO_FILE} - ) + set(_OSL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${_file}) + set_source_files_properties(${_file} PROPERTIES HEADER_FILE_ONLY TRUE) + string(REPLACE ".osl" ".oso" _OSO_FILE ${_OSL_FILE}) + string(REPLACE ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} _OSO_FILE ${_OSO_FILE}) + add_custom_command( + OUTPUT ${_OSO_FILE} + COMMAND ${OSL_COMPILER} -q -O2 -I"${CMAKE_CURRENT_SOURCE_DIR}" -o ${_OSO_FILE} ${_OSL_FILE} + DEPENDS ${_OSL_FILE} ${SRC_OSL_HEADERS} ${OSL_COMPILER}) + list(APPEND SRC_OSO + ${_OSO_FILE} + ) - unset(_OSL_FILE) - unset(_OSO_FILE) + unset(_OSL_FILE) + unset(_OSO_FILE) endforeach() add_custom_target(cycles_osl_shaders ALL DEPENDS ${SRC_OSO} ${SRC_OSL_HEADERS} ${OSL_COMPILER} SOURCES ${SRC_OSL}) diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl index 18f662ebbbd..e99bd254666 100644 --- a/intern/cycles/kernel/shaders/node_absorption_volume.osl +++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl @@ -16,11 +16,9 @@ #include "stdosl.h" -shader node_absorption_volume( - color Color = color(0.8, 0.8, 0.8), - float Density = 1.0, - output closure color Volume = 0) +shader node_absorption_volume(color Color = color(0.8, 0.8, 0.8), + float Density = 1.0, + output closure color Volume = 0) { - Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption(); + Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption(); } - diff --git a/intern/cycles/kernel/shaders/node_add_closure.osl b/intern/cycles/kernel/shaders/node_add_closure.osl index b6596e0b6bd..077e2735e61 100644 --- a/intern/cycles/kernel/shaders/node_add_closure.osl +++ b/intern/cycles/kernel/shaders/node_add_closure.osl @@ -16,11 +16,9 @@ #include "stdosl.h" -shader node_add_closure( - closure color Closure1 = 0, - closure color Closure2 = 0, - output closure color Closure = 0) +shader node_add_closure(closure color Closure1 = 0, + closure color Closure2 = 0, + output closure color Closure = 0) { - Closure = Closure1 + Closure2; + Closure = Closure1 + Closure2; } - diff --git a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl index 825cccd59ce..7bf28719e78 100644 --- a/intern/cycles/kernel/shaders/node_ambient_occlusion.osl +++ b/intern/cycles/kernel/shaders/node_ambient_occlusion.osl @@ -16,20 +16,28 @@ #include "stdosl.h" -shader node_ambient_occlusion( - color ColorIn = color(1.0, 1.0, 1.0), - int samples = 16, - float Distance = 1.0, - normal Normal = N, - int inside = 0, - int only_local = 0, - output color ColorOut = color(1.0, 1.0, 1.0), - output float AO = 1.0) +shader node_ambient_occlusion(color ColorIn = color(1.0, 1.0, 1.0), + int samples = 16, + float Distance = 1.0, + normal Normal = N, + int inside = 0, + int only_local = 0, + output color ColorOut = color(1.0, 1.0, 1.0), + output float AO = 1.0) { - int global_radius = (Distance == 0.0 && !isconnected(Distance)); + int global_radius = (Distance == 0.0 && !isconnected(Distance)); - /* Abuse texture call with special @ao token. */ - AO = texture("@ao", samples, Distance, Normal[0], Normal[1], Normal[2], inside, "sblur", only_local, "tblur", global_radius); - ColorOut = ColorIn * AO; + /* Abuse texture call with special @ao token. */ + AO = texture("@ao", + samples, + Distance, + Normal[0], + Normal[1], + Normal[2], + inside, + "sblur", + only_local, + "tblur", + global_radius); + ColorOut = ColorIn * AO; } - diff --git a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl index 21e28ece65d..165c09eb8e0 100644 --- a/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_anisotropic_bsdf.osl @@ -16,45 +16,43 @@ #include "stdosl.h" -shader node_anisotropic_bsdf( - color Color = 0.0, - string distribution = "GGX", - float Roughness = 0.0, - float Anisotropy = 0.0, - float Rotation = 0.0, - normal Normal = N, - normal Tangent = normalize(dPdu), - output closure color BSDF = 0) +shader node_anisotropic_bsdf(color Color = 0.0, + string distribution = "GGX", + float Roughness = 0.0, + float Anisotropy = 0.0, + float Rotation = 0.0, + normal Normal = N, + normal Tangent = normalize(dPdu), + output closure color BSDF = 0) { - /* rotate tangent around normal */ - vector T = Tangent; + /* rotate tangent around normal */ + vector T = Tangent; - if (Rotation != 0.0) - T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal); + if (Rotation != 0.0) + T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal); - /* compute roughness */ - float roughness = Roughness * Roughness; - float roughness_u, roughness_v; - float aniso = clamp(Anisotropy, -0.99, 0.99); + /* compute roughness */ + float roughness = Roughness * Roughness; + float roughness_u, roughness_v; + float aniso = clamp(Anisotropy, -0.99, 0.99); - if (aniso < 0.0) { - roughness_u = roughness / (1.0 + aniso); - roughness_v = roughness * (1.0 + aniso); - } - else { - roughness_u = roughness * (1.0 - aniso); - roughness_v = roughness / (1.0 - aniso); - } + if (aniso < 0.0) { + roughness_u = roughness / (1.0 + aniso); + roughness_v = roughness * (1.0 + aniso); + } + else { + roughness_u = roughness * (1.0 - aniso); + roughness_v = roughness / (1.0 - aniso); + } - if (distribution == "sharp") - BSDF = Color * reflection(Normal); - else if (distribution == "beckmann") - BSDF = Color * microfacet_beckmann_aniso(Normal, T, roughness_u, roughness_v); - else if (distribution == "GGX") - BSDF = Color * microfacet_ggx_aniso(Normal, T, roughness_u, roughness_v); - else if (distribution == "Multiscatter GGX") - BSDF = Color * microfacet_multi_ggx_aniso(Normal, T, roughness_u, roughness_v, Color); - else - BSDF = Color * ashikhmin_shirley(Normal, T, roughness_u, roughness_v); + if (distribution == "sharp") + BSDF = Color * reflection(Normal); + else if (distribution == "beckmann") + BSDF = Color * microfacet_beckmann_aniso(Normal, T, roughness_u, roughness_v); + else if (distribution == "GGX") + BSDF = Color * microfacet_ggx_aniso(Normal, T, roughness_u, roughness_v); + else if (distribution == "Multiscatter GGX") + BSDF = Color * microfacet_multi_ggx_aniso(Normal, T, roughness_u, roughness_v, Color); + else + BSDF = Color * ashikhmin_shirley(Normal, T, roughness_u, roughness_v); } - diff --git a/intern/cycles/kernel/shaders/node_attribute.osl b/intern/cycles/kernel/shaders/node_attribute.osl index 67183e9ffe0..336543cc130 100644 --- a/intern/cycles/kernel/shaders/node_attribute.osl +++ b/intern/cycles/kernel/shaders/node_attribute.osl @@ -16,26 +16,24 @@ #include "stdosl.h" -shader node_attribute( - string bump_offset = "center", - string name = "", - output point Vector = point(0.0, 0.0, 0.0), - output color Color = 0.0, - output float Fac = 0.0) +shader node_attribute(string bump_offset = "center", + string name = "", + output point Vector = point(0.0, 0.0, 0.0), + output color Color = 0.0, + output float Fac = 0.0) { - getattribute(name, Color); - Vector = point(Color); - getattribute(name, Fac); + getattribute(name, Color); + Vector = point(Color); + getattribute(name, Fac); - if (bump_offset == "dx") { - Color += Dx(Color); - Vector += Dx(Vector); - Fac += Dx(Fac); - } - else if (bump_offset == "dy") { - Color += Dy(Color); - Vector += Dy(Vector); - Fac += Dy(Fac); - } + if (bump_offset == "dx") { + Color += Dx(Color); + Vector += Dx(Vector); + Fac += Dx(Fac); + } + else if (bump_offset == "dy") { + Color += Dy(Color); + Vector += Dy(Vector); + Fac += Dy(Fac); + } } - diff --git a/intern/cycles/kernel/shaders/node_background.osl b/intern/cycles/kernel/shaders/node_background.osl index 613d4e360fa..6ded0d2c65c 100644 --- a/intern/cycles/kernel/shaders/node_background.osl +++ b/intern/cycles/kernel/shaders/node_background.osl @@ -16,11 +16,9 @@ #include "stdosl.h" -shader node_background( - color Color = 0.8, - float Strength = 1.0, - output closure color Background = 0) +shader node_background(color Color = 0.8, + float Strength = 1.0, + output closure color Background = 0) { - Background = Color * Strength * background(); + Background = Color * Strength * background(); } - diff --git a/intern/cycles/kernel/shaders/node_bevel.osl b/intern/cycles/kernel/shaders/node_bevel.osl index 9c4ca15be17..189c20c52e7 100644 --- a/intern/cycles/kernel/shaders/node_bevel.osl +++ b/intern/cycles/kernel/shaders/node_bevel.osl @@ -16,16 +16,14 @@ #include "stdosl.h" -shader node_bevel( - int samples = 4, - float Radius = 0.05, - normal NormalIn = N, - output normal NormalOut = N) +shader node_bevel(int samples = 4, + float Radius = 0.05, + normal NormalIn = N, + output normal NormalOut = N) { - /* Abuse texture call with special @bevel token. */ - vector bevel_N = (normal)(color)texture("@bevel", samples, Radius); + /* Abuse texture call with special @bevel token. */ + vector bevel_N = (normal)(color)texture("@bevel", samples, Radius); - /* Preserve input normal. */ - NormalOut = normalize(NormalIn + (bevel_N - N)); + /* Preserve input normal. */ + NormalOut = normalize(NormalIn + (bevel_N - N)); } - diff --git a/intern/cycles/kernel/shaders/node_blackbody.osl b/intern/cycles/kernel/shaders/node_blackbody.osl index 1da6894d0f0..8a24bf1e28b 100644 --- a/intern/cycles/kernel/shaders/node_blackbody.osl +++ b/intern/cycles/kernel/shaders/node_blackbody.osl @@ -16,16 +16,13 @@ #include "stdosl.h" -shader node_blackbody( - float Temperature = 1200.0, - output color Color = 0.0) +shader node_blackbody(float Temperature = 1200.0, output color Color = 0.0) { - color rgb = blackbody(Temperature); - - /* Scale by luminance */ - float l = luminance(rgb); - if (l != 0.0) - rgb /= l; - Color = rgb; -} + color rgb = blackbody(Temperature); + /* Scale by luminance */ + float l = luminance(rgb); + if (l != 0.0) + rgb /= l; + Color = rgb; +} diff --git a/intern/cycles/kernel/shaders/node_brick_texture.osl b/intern/cycles/kernel/shaders/node_brick_texture.osl index 9d2e5b74ce6..0abc3574c48 100644 --- a/intern/cycles/kernel/shaders/node_brick_texture.osl +++ b/intern/cycles/kernel/shaders/node_brick_texture.osl @@ -21,85 +21,100 @@ float brick_noise(int ns) /* fast integer noise */ { - int nn; - int n = (ns + 1013) & 2147483647; - n = (n >> 13) ^ n; - nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 2147483647; - return 0.5 * ((float)nn / 1073741824.0); + int nn; + int n = (ns + 1013) & 2147483647; + n = (n >> 13) ^ n; + nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 2147483647; + return 0.5 * ((float)nn / 1073741824.0); } -float brick(point p, float mortar_size, float mortar_smooth, float bias, - float BrickWidth, float row_height, float offset_amount, int offset_frequency, - float squash_amount, int squash_frequency, output float tint) +float brick(point p, + float mortar_size, + float mortar_smooth, + float bias, + float BrickWidth, + float row_height, + float offset_amount, + int offset_frequency, + float squash_amount, + int squash_frequency, + output float tint) { - int bricknum, rownum; - float offset = 0.0; - float brick_width = BrickWidth; - float x, y; - - rownum = (int)floor(p[1] / row_height); - - if (offset_frequency && squash_frequency) { - brick_width *= (rownum % squash_frequency) ? 1.0 : squash_amount; /* squash */ - offset = (rownum % offset_frequency) ? 0.0 : (brick_width * offset_amount); /* offset */ - } - - bricknum = (int)floor((p[0] + offset) / brick_width); - - x = (p[0] + offset) - brick_width * bricknum; - y = p[1] - row_height * rownum; - - tint = clamp((brick_noise((rownum << 16) + (bricknum & 65535)) + bias), 0.0, 1.0); - - float min_dist = min(min(x, y), min(brick_width - x, row_height - y)); - if(min_dist >= mortar_size) { - return 0.0; - } - else if(mortar_smooth == 0.0) { - return 1.0; - } - else { - min_dist = 1.0 - min_dist/mortar_size; - return smoothstep(0.0, mortar_smooth, min_dist); - } + int bricknum, rownum; + float offset = 0.0; + float brick_width = BrickWidth; + float x, y; + + rownum = (int)floor(p[1] / row_height); + + if (offset_frequency && squash_frequency) { + brick_width *= (rownum % squash_frequency) ? 1.0 : squash_amount; /* squash */ + offset = (rownum % offset_frequency) ? 0.0 : (brick_width * offset_amount); /* offset */ + } + + bricknum = (int)floor((p[0] + offset) / brick_width); + + x = (p[0] + offset) - brick_width * bricknum; + y = p[1] - row_height * rownum; + + tint = clamp((brick_noise((rownum << 16) + (bricknum & 65535)) + bias), 0.0, 1.0); + + float min_dist = min(min(x, y), min(brick_width - x, row_height - y)); + if (min_dist >= mortar_size) { + return 0.0; + } + else if (mortar_smooth == 0.0) { + return 1.0; + } + else { + min_dist = 1.0 - min_dist / mortar_size; + return smoothstep(0.0, mortar_smooth, min_dist); + } } -shader node_brick_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - float offset = 0.5, - int offset_frequency = 2, - float squash = 1.0, - int squash_frequency = 1, - point Vector = P, - color Color1 = 0.2, - color Color2 = 0.8, - color Mortar = 0.0, - float Scale = 5.0, - float MortarSize = 0.02, - float MortarSmooth = 0.0, - float Bias = 0.0, - float BrickWidth = 0.5, - float RowHeight = 0.25, - output float Fac = 0.0, - output color Color = 0.2) +shader node_brick_texture(int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + float offset = 0.5, + int offset_frequency = 2, + float squash = 1.0, + int squash_frequency = 1, + point Vector = P, + color Color1 = 0.2, + color Color2 = 0.8, + color Mortar = 0.0, + float Scale = 5.0, + float MortarSize = 0.02, + float MortarSmooth = 0.0, + float Bias = 0.0, + float BrickWidth = 0.5, + float RowHeight = 0.25, + output float Fac = 0.0, + output color Color = 0.2) { - point p = Vector; - - if (use_mapping) - p = transform(mapping, p); - - float tint = 0.0; - color Col = Color1; - - Fac = brick(p * Scale, MortarSize, MortarSmooth, Bias, BrickWidth, RowHeight, - offset, offset_frequency, squash, squash_frequency, tint); - - if (Fac != 1.0) { - float facm = 1.0 - tint; - Col = facm * Color1 + tint * Color2; - } - - Color = mix(Col, Mortar, Fac); -} + point p = Vector; + + if (use_mapping) + p = transform(mapping, p); + + float tint = 0.0; + color Col = Color1; + Fac = brick(p * Scale, + MortarSize, + MortarSmooth, + Bias, + BrickWidth, + RowHeight, + offset, + offset_frequency, + squash, + squash_frequency, + tint); + + if (Fac != 1.0) { + float facm = 1.0 - tint; + Col = facm * Color1 + tint * Color2; + } + + Color = mix(Col, Mortar, Fac); +} diff --git a/intern/cycles/kernel/shaders/node_brightness.osl b/intern/cycles/kernel/shaders/node_brightness.osl index 00cfb167885..2defbc4b1db 100644 --- a/intern/cycles/kernel/shaders/node_brightness.osl +++ b/intern/cycles/kernel/shaders/node_brightness.osl @@ -16,17 +16,15 @@ #include "stdosl.h" -shader node_brightness( - color ColorIn = 0.8, - float Bright = 0.0, - float Contrast = 0.0, - output color ColorOut = 0.8) +shader node_brightness(color ColorIn = 0.8, + float Bright = 0.0, + float Contrast = 0.0, + output color ColorOut = 0.8) { - float a = 1.0 + Contrast; - float b = Bright - Contrast * 0.5; + float a = 1.0 + Contrast; + float b = Bright - Contrast * 0.5; - ColorOut[0] = max(a * ColorIn[0] + b, 0.0); - ColorOut[1] = max(a * ColorIn[1] + b, 0.0); - ColorOut[2] = max(a * ColorIn[2] + b, 0.0); + ColorOut[0] = max(a * ColorIn[0] + b, 0.0); + ColorOut[1] = max(a * ColorIn[1] + b, 0.0); + ColorOut[2] = max(a * ColorIn[2] + b, 0.0); } - diff --git a/intern/cycles/kernel/shaders/node_bump.osl b/intern/cycles/kernel/shaders/node_bump.osl index a2a4468d5f3..3697bb37fd9 100644 --- a/intern/cycles/kernel/shaders/node_bump.osl +++ b/intern/cycles/kernel/shaders/node_bump.osl @@ -19,52 +19,50 @@ /* "Bump Mapping Unparameterized Surfaces on the GPU" * Morten S. Mikkelsen, 2010 */ -surface node_bump( - int invert = 0, - int use_object_space = 0, - normal NormalIn = N, - float Strength = 0.1, - float Distance = 1.0, - float SampleCenter = 0.0, - float SampleX = 0.0, - float SampleY = 0.0, - output normal NormalOut = N) +surface node_bump(int invert = 0, + int use_object_space = 0, + normal NormalIn = N, + float Strength = 0.1, + float Distance = 1.0, + float SampleCenter = 0.0, + float SampleX = 0.0, + float SampleY = 0.0, + output normal NormalOut = N) { - point Ptmp = P; - normal Normal = NormalIn; + point Ptmp = P; + normal Normal = NormalIn; - if (use_object_space) { - Ptmp = transform("object", Ptmp); - Normal = normalize(transform("object", Normal)); - } + if (use_object_space) { + Ptmp = transform("object", Ptmp); + Normal = normalize(transform("object", Normal)); + } - /* get surface tangents from normal */ - vector dPdx = Dx(Ptmp); - vector dPdy = Dy(Ptmp); + /* get surface tangents from normal */ + vector dPdx = Dx(Ptmp); + vector dPdy = Dy(Ptmp); - vector Rx = cross(dPdy, Normal); - vector Ry = cross(Normal, dPdx); + vector Rx = cross(dPdy, Normal); + vector Ry = cross(Normal, dPdx); - /* compute surface gradient and determinant */ - float det = dot(dPdx, Rx); - vector surfgrad = (SampleX - SampleCenter) * Rx + (SampleY - SampleCenter) * Ry; + /* compute surface gradient and determinant */ + float det = dot(dPdx, Rx); + vector surfgrad = (SampleX - SampleCenter) * Rx + (SampleY - SampleCenter) * Ry; - float absdet = fabs(det); + float absdet = fabs(det); - float strength = max(Strength, 0.0); - float dist = Distance; + float strength = max(Strength, 0.0); + float dist = Distance; - if (invert) - dist *= -1.0; - - /* compute and output perturbed normal */ - NormalOut = normalize(absdet * Normal - dist * sign(det) * surfgrad); - NormalOut = normalize(strength * NormalOut + (1.0 - strength) * Normal); + if (invert) + dist *= -1.0; - if (use_object_space) { - NormalOut = normalize(transform("object", "world", NormalOut)); - } + /* compute and output perturbed normal */ + NormalOut = normalize(absdet * Normal - dist * sign(det) * surfgrad); + NormalOut = normalize(strength * NormalOut + (1.0 - strength) * Normal); - NormalOut = ensure_valid_reflection(Ng, I, NormalOut); -} + if (use_object_space) { + NormalOut = normalize(transform("object", "world", NormalOut)); + } + NormalOut = ensure_valid_reflection(Ng, I, NormalOut); +} diff --git a/intern/cycles/kernel/shaders/node_camera.osl b/intern/cycles/kernel/shaders/node_camera.osl index 5e90cb8b8ee..833e9e775fe 100644 --- a/intern/cycles/kernel/shaders/node_camera.osl +++ b/intern/cycles/kernel/shaders/node_camera.osl @@ -16,16 +16,14 @@ #include "stdosl.h" -shader node_camera( - output vector ViewVector = vector(0.0, 0.0, 0.0), - output float ViewZDepth = 0.0, - output float ViewDistance = 0.0) +shader node_camera(output vector ViewVector = vector(0.0, 0.0, 0.0), + output float ViewZDepth = 0.0, + output float ViewDistance = 0.0) { - ViewVector = (vector)transform("world", "camera", P); + ViewVector = (vector)transform("world", "camera", P); - ViewZDepth = fabs(ViewVector[2]); - ViewDistance = length(ViewVector); + ViewZDepth = fabs(ViewVector[2]); + ViewDistance = length(ViewVector); - ViewVector = normalize(ViewVector); + ViewVector = normalize(ViewVector); } - diff --git a/intern/cycles/kernel/shaders/node_checker_texture.osl b/intern/cycles/kernel/shaders/node_checker_texture.osl index e745cfaee06..e068f7952ed 100644 --- a/intern/cycles/kernel/shaders/node_checker_texture.osl +++ b/intern/cycles/kernel/shaders/node_checker_texture.osl @@ -21,44 +21,43 @@ float checker(point ip) { - point p; - p[0] = (ip[0] + 0.000001) * 0.999999; - p[1] = (ip[1] + 0.000001) * 0.999999; - p[2] = (ip[2] + 0.000001) * 0.999999; - - int xi = (int)fabs(floor(p[0])); - int yi = (int)fabs(floor(p[1])); - int zi = (int)fabs(floor(p[2])); - - if ((xi % 2 == yi % 2) == (zi % 2)) { - return 1.0; - } - else { - return 0.0; - } + point p; + p[0] = (ip[0] + 0.000001) * 0.999999; + p[1] = (ip[1] + 0.000001) * 0.999999; + p[2] = (ip[2] + 0.000001) * 0.999999; + + int xi = (int)fabs(floor(p[0])); + int yi = (int)fabs(floor(p[1])); + int zi = (int)fabs(floor(p[2])); + + if ((xi % 2 == yi % 2) == (zi % 2)) { + return 1.0; + } + else { + return 0.0; + } } shader node_checker_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - float Scale = 5.0, - point Vector = P, - color Color1 = 0.8, - color Color2 = 0.2, - output float Fac = 0.0, - output color Color = 0.0) + int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + float Scale = 5.0, + point Vector = P, + color Color1 = 0.8, + color Color2 = 0.2, + output float Fac = 0.0, + output color Color = 0.0) { - point p = Vector; - - if (use_mapping) - p = transform(mapping, p); - - Fac = checker(p * Scale); - if (Fac == 1.0) { - Color = Color1; - } - else { - Color = Color2; - } + point p = Vector; + + if (use_mapping) + p = transform(mapping, p); + + Fac = checker(p * Scale); + if (Fac == 1.0) { + Color = Color1; + } + else { + Color = Color2; + } } - diff --git a/intern/cycles/kernel/shaders/node_color.h b/intern/cycles/kernel/shaders/node_color.h index fc758bef1fa..276c91843e8 100644 --- a/intern/cycles/kernel/shaders/node_color.h +++ b/intern/cycles/kernel/shaders/node_color.h @@ -18,135 +18,146 @@ float color_srgb_to_scene_linear(float c) { - if (c < 0.04045) - return (c < 0.0) ? 0.0 : c * (1.0 / 12.92); - else - return pow((c + 0.055) * (1.0 / 1.055), 2.4); + if (c < 0.04045) + return (c < 0.0) ? 0.0 : c * (1.0 / 12.92); + else + return pow((c + 0.055) * (1.0 / 1.055), 2.4); } float color_scene_linear_to_srgb(float c) { - if (c < 0.0031308) - return (c < 0.0) ? 0.0 : c * 12.92; - else - return 1.055 * pow(c, 1.0 / 2.4) - 0.055; + if (c < 0.0031308) + return (c < 0.0) ? 0.0 : c * 12.92; + else + return 1.055 * pow(c, 1.0 / 2.4) - 0.055; } color color_srgb_to_scene_linear(color c) { - return color( - color_srgb_to_scene_linear(c[0]), - color_srgb_to_scene_linear(c[1]), - color_srgb_to_scene_linear(c[2])); + return color(color_srgb_to_scene_linear(c[0]), + color_srgb_to_scene_linear(c[1]), + color_srgb_to_scene_linear(c[2])); } color color_scene_linear_to_srgb(color c) { - return color( - color_scene_linear_to_srgb(c[0]), - color_scene_linear_to_srgb(c[1]), - color_scene_linear_to_srgb(c[2])); + return color(color_scene_linear_to_srgb(c[0]), + color_scene_linear_to_srgb(c[1]), + color_scene_linear_to_srgb(c[2])); } color color_unpremultiply(color c, float alpha) { - if (alpha != 1.0 && alpha != 0.0) - return c / alpha; + if (alpha != 1.0 && alpha != 0.0) + return c / alpha; - return c; + return c; } /* Color Operations */ color xyY_to_xyz(float x, float y, float Y) { - float X, Z; + float X, Z; - if (y != 0.0) X = (x / y) * Y; - else X = 0.0; + if (y != 0.0) + X = (x / y) * Y; + else + X = 0.0; - if (y != 0.0 && Y != 0.0) Z = ((1.0 - x - y) / y) * Y; - else Z = 0.0; + if (y != 0.0 && Y != 0.0) + Z = ((1.0 - x - y) / y) * Y; + else + Z = 0.0; - return color(X, Y, Z); + return color(X, Y, Z); } color xyz_to_rgb(float x, float y, float z) { - return color( 3.240479 * x + -1.537150 * y + -0.498535 * z, - -0.969256 * x + 1.875991 * y + 0.041556 * z, - 0.055648 * x + -0.204043 * y + 1.057311 * z); + return color(3.240479 * x + -1.537150 * y + -0.498535 * z, + -0.969256 * x + 1.875991 * y + 0.041556 * z, + 0.055648 * x + -0.204043 * y + 1.057311 * z); } color rgb_to_hsv(color rgb) { - float cmax, cmin, h, s, v, cdelta; - color c; - - cmax = max(rgb[0], max(rgb[1], rgb[2])); - cmin = min(rgb[0], min(rgb[1], rgb[2])); - cdelta = cmax - cmin; - - v = cmax; - - if (cmax != 0.0) { - s = cdelta / cmax; - } - else { - s = 0.0; - h = 0.0; - } - - if (s == 0.0) { - h = 0.0; - } - else { - c = (color(cmax, cmax, cmax) - rgb) / cdelta; - - if (rgb[0] == cmax) h = c[2] - c[1]; - else if (rgb[1] == cmax) h = 2.0 + c[0] - c[2]; - else h = 4.0 + c[1] - c[0]; - - h /= 6.0; - - if (h < 0.0) - h += 1.0; - } - - return color(h, s, v); + float cmax, cmin, h, s, v, cdelta; + color c; + + cmax = max(rgb[0], max(rgb[1], rgb[2])); + cmin = min(rgb[0], min(rgb[1], rgb[2])); + cdelta = cmax - cmin; + + v = cmax; + + if (cmax != 0.0) { + s = cdelta / cmax; + } + else { + s = 0.0; + h = 0.0; + } + + if (s == 0.0) { + h = 0.0; + } + else { + c = (color(cmax, cmax, cmax) - rgb) / cdelta; + + if (rgb[0] == cmax) + h = c[2] - c[1]; + else if (rgb[1] == cmax) + h = 2.0 + c[0] - c[2]; + else + h = 4.0 + c[1] - c[0]; + + h /= 6.0; + + if (h < 0.0) + h += 1.0; + } + + return color(h, s, v); } color hsv_to_rgb(color hsv) { - float i, f, p, q, t, h, s, v; - color rgb; - - h = hsv[0]; - s = hsv[1]; - v = hsv[2]; - - if (s == 0.0) { - rgb = color(v, v, v); - } - else { - if (h == 1.0) - h = 0.0; - - h *= 6.0; - i = floor(h); - f = h - i; - rgb = color(f, f, f); - p = v * (1.0 - s); - q = v * (1.0 - (s * f)); - t = v * (1.0 - (s * (1.0 - f))); - - if (i == 0.0) rgb = color(v, t, p); - else if (i == 1.0) rgb = color(q, v, p); - else if (i == 2.0) rgb = color(p, v, t); - else if (i == 3.0) rgb = color(p, q, v); - else if (i == 4.0) rgb = color(t, p, v); - else rgb = color(v, p, q); - } - - return rgb; + float i, f, p, q, t, h, s, v; + color rgb; + + h = hsv[0]; + s = hsv[1]; + v = hsv[2]; + + if (s == 0.0) { + rgb = color(v, v, v); + } + else { + if (h == 1.0) + h = 0.0; + + h *= 6.0; + i = floor(h); + f = h - i; + rgb = color(f, f, f); + p = v * (1.0 - s); + q = v * (1.0 - (s * f)); + t = v * (1.0 - (s * (1.0 - f))); + + if (i == 0.0) + rgb = color(v, t, p); + else if (i == 1.0) + rgb = color(q, v, p); + else if (i == 2.0) + rgb = color(p, v, t); + else if (i == 3.0) + rgb = color(p, q, v); + else if (i == 4.0) + rgb = color(t, p, v); + else + rgb = color(v, p, q); + } + + return rgb; } diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl index 6b922bf4e6b..1658cf3d774 100644 --- a/intern/cycles/kernel/shaders/node_combine_hsv.osl +++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl @@ -16,12 +16,7 @@ #include "stdosl.h" -shader node_combine_hsv( - float H = 0.0, - float S = 0.0, - float V = 0.0, - output color Color = 0.8) +shader node_combine_hsv(float H = 0.0, float S = 0.0, float V = 0.0, output color Color = 0.8) { - Color = color("hsv", H, S, V); + Color = color("hsv", H, S, V); } - diff --git a/intern/cycles/kernel/shaders/node_combine_rgb.osl b/intern/cycles/kernel/shaders/node_combine_rgb.osl index f343fdefd84..aaa95e9c5af 100644 --- a/intern/cycles/kernel/shaders/node_combine_rgb.osl +++ b/intern/cycles/kernel/shaders/node_combine_rgb.osl @@ -16,12 +16,7 @@ #include "stdosl.h" -shader node_combine_rgb( - float R = 0.0, - float G = 0.0, - float B = 0.0, - output color Image = 0.8) +shader node_combine_rgb(float R = 0.0, float G = 0.0, float B = 0.0, output color Image = 0.8) { - Image = color(R, G, B); + Image = color(R, G, B); } - diff --git a/intern/cycles/kernel/shaders/node_combine_xyz.osl b/intern/cycles/kernel/shaders/node_combine_xyz.osl index 86182056b09..4ab49168704 100644 --- a/intern/cycles/kernel/shaders/node_combine_xyz.osl +++ b/intern/cycles/kernel/shaders/node_combine_xyz.osl @@ -16,12 +16,7 @@ #include "stdosl.h" -shader node_combine_xyz( - float X = 0.0, - float Y = 0.0, - float Z = 0.0, - output vector Vector = 0.8) +shader node_combine_xyz(float X = 0.0, float Y = 0.0, float Z = 0.0, output vector Vector = 0.8) { - Vector = vector(X, Y, Z); + Vector = vector(X, Y, Z); } - diff --git a/intern/cycles/kernel/shaders/node_convert_from_color.osl b/intern/cycles/kernel/shaders/node_convert_from_color.osl index e95a17f6fa1..7ea9a1e4fb3 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_color.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_color.osl @@ -16,19 +16,17 @@ #include "stdosl.h" -shader node_convert_from_color( - color value_color = 0.0, - output string value_string = "", - output float value_float = 0.0, - output int value_int = 0, - output vector value_vector = vector(0.0, 0.0, 0.0), - output point value_point = point(0.0, 0.0, 0.0), - output normal value_normal = normal(0.0, 0.0, 0.0)) +shader node_convert_from_color(color value_color = 0.0, + output string value_string = "", + output float value_float = 0.0, + output int value_int = 0, + output vector value_vector = vector(0.0, 0.0, 0.0), + output point value_point = point(0.0, 0.0, 0.0), + output normal value_normal = normal(0.0, 0.0, 0.0)) { - value_float = value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722; - value_int = (int)(value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722); - value_vector = vector(value_color[0], value_color[1], value_color[2]); - value_point = point(value_color[0], value_color[1], value_color[2]); - value_normal = normal(value_color[0], value_color[1], value_color[2]); + value_float = value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722; + value_int = (int)(value_color[0] * 0.2126 + value_color[1] * 0.7152 + value_color[2] * 0.0722); + value_vector = vector(value_color[0], value_color[1], value_color[2]); + value_point = point(value_color[0], value_color[1], value_color[2]); + value_normal = normal(value_color[0], value_color[1], value_color[2]); } - diff --git a/intern/cycles/kernel/shaders/node_convert_from_float.osl b/intern/cycles/kernel/shaders/node_convert_from_float.osl index a5c2e3b26ad..13b5dea0838 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_float.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_float.osl @@ -16,19 +16,17 @@ #include "stdosl.h" -shader node_convert_from_float( - float value_float = 0.0, - output string value_string = "", - output int value_int = 0, - output color value_color = 0.0, - output vector value_vector = vector(0.0, 0.0, 0.0), - output point value_point = point(0.0, 0.0, 0.0), - output normal value_normal = normal(0.0, 0.0, 0.0)) +shader node_convert_from_float(float value_float = 0.0, + output string value_string = "", + output int value_int = 0, + output color value_color = 0.0, + output vector value_vector = vector(0.0, 0.0, 0.0), + output point value_point = point(0.0, 0.0, 0.0), + output normal value_normal = normal(0.0, 0.0, 0.0)) { - value_int = (int)value_float; - value_color = color(value_float, value_float, value_float); - value_vector = vector(value_float, value_float, value_float); - value_point = point(value_float, value_float, value_float); - value_normal = normal(value_float, value_float, value_float); + value_int = (int)value_float; + value_color = color(value_float, value_float, value_float); + value_vector = vector(value_float, value_float, value_float); + value_point = point(value_float, value_float, value_float); + value_normal = normal(value_float, value_float, value_float); } - diff --git a/intern/cycles/kernel/shaders/node_convert_from_int.osl b/intern/cycles/kernel/shaders/node_convert_from_int.osl index 0e6ae711210..a59e025d822 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_int.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_int.osl @@ -16,20 +16,18 @@ #include "stdosl.h" -shader node_convert_from_int( - int value_int = 0, - output string value_string = "", - output float value_float = 0.0, - output color value_color = 0.0, - output vector value_vector = vector(0.0, 0.0, 0.0), - output point value_point = point(0.0, 0.0, 0.0), - output normal value_normal = normal(0.0, 0.0, 0.0)) +shader node_convert_from_int(int value_int = 0, + output string value_string = "", + output float value_float = 0.0, + output color value_color = 0.0, + output vector value_vector = vector(0.0, 0.0, 0.0), + output point value_point = point(0.0, 0.0, 0.0), + output normal value_normal = normal(0.0, 0.0, 0.0)) { - float f = (float)value_int; - value_float = f; - value_color = color(f, f, f); - value_vector = vector(f, f, f); - value_point = point(f, f, f); - value_normal = normal(f, f, f); + float f = (float)value_int; + value_float = f; + value_color = color(f, f, f); + value_vector = vector(f, f, f); + value_point = point(f, f, f); + value_normal = normal(f, f, f); } - diff --git a/intern/cycles/kernel/shaders/node_convert_from_normal.osl b/intern/cycles/kernel/shaders/node_convert_from_normal.osl index 7fffa7f6169..7bdd94d1941 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_normal.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_normal.osl @@ -16,19 +16,17 @@ #include "stdosl.h" -shader node_convert_from_normal( - normal value_normal = normal(0.0, 0.0, 0.0), - output string value_string = "", - output float value_float = 0.0, - output int value_int = 0, - output vector value_vector = vector(0.0, 0.0, 0.0), - output color value_color = 0.0, - output point value_point = point(0.0, 0.0, 0.0)) +shader node_convert_from_normal(normal value_normal = normal(0.0, 0.0, 0.0), + output string value_string = "", + output float value_float = 0.0, + output int value_int = 0, + output vector value_vector = vector(0.0, 0.0, 0.0), + output color value_color = 0.0, + output point value_point = point(0.0, 0.0, 0.0)) { - value_float = (value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0); - value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0)); - value_vector = vector(value_normal[0], value_normal[1], value_normal[2]); - value_color = color(value_normal[0], value_normal[1], value_normal[2]); - value_point = point(value_normal[0], value_normal[1], value_normal[2]); + value_float = (value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0); + value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0)); + value_vector = vector(value_normal[0], value_normal[1], value_normal[2]); + value_color = color(value_normal[0], value_normal[1], value_normal[2]); + value_point = point(value_normal[0], value_normal[1], value_normal[2]); } - diff --git a/intern/cycles/kernel/shaders/node_convert_from_point.osl b/intern/cycles/kernel/shaders/node_convert_from_point.osl index 9e4930296bb..79c1719e7a7 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_point.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_point.osl @@ -16,19 +16,17 @@ #include "stdosl.h" -shader node_convert_from_point( - point value_point = point(0.0, 0.0, 0.0), - output string value_string = "", - output float value_float = 0.0, - output int value_int = 0, - output vector value_vector = vector(0.0, 0.0, 0.0), - output color value_color = 0.0, - output normal value_normal = normal(0.0, 0.0, 0.0)) +shader node_convert_from_point(point value_point = point(0.0, 0.0, 0.0), + output string value_string = "", + output float value_float = 0.0, + output int value_int = 0, + output vector value_vector = vector(0.0, 0.0, 0.0), + output color value_color = 0.0, + output normal value_normal = normal(0.0, 0.0, 0.0)) { - value_float = (value_point[0] + value_point[1] + value_point[2]) * (1.0 / 3.0); - value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0)); - value_vector = vector(value_point[0], value_point[1], value_point[2]); - value_color = color(value_point[0], value_point[1], value_point[2]); - value_normal = normal(value_point[0], value_point[1], value_point[2]); + value_float = (value_point[0] + value_point[1] + value_point[2]) * (1.0 / 3.0); + value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0)); + value_vector = vector(value_point[0], value_point[1], value_point[2]); + value_color = color(value_point[0], value_point[1], value_point[2]); + value_normal = normal(value_point[0], value_point[1], value_point[2]); } - diff --git a/intern/cycles/kernel/shaders/node_convert_from_string.osl b/intern/cycles/kernel/shaders/node_convert_from_string.osl index cbc6653eada..48d894a6b3e 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_string.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_string.osl @@ -16,14 +16,12 @@ #include "stdosl.h" -shader node_convert_from_string( - string value_string = "", - output color value_color = color(0.0, 0.0, 0.0), - output float value_float = 0.0, - output int value_int = 0, - output vector value_vector = vector(0.0, 0.0, 0.0), - output point value_point = point(0.0, 0.0, 0.0), - output normal value_normal = normal(0.0, 0.0, 0.0)) +shader node_convert_from_string(string value_string = "", + output color value_color = color(0.0, 0.0, 0.0), + output float value_float = 0.0, + output int value_int = 0, + output vector value_vector = vector(0.0, 0.0, 0.0), + output point value_point = point(0.0, 0.0, 0.0), + output normal value_normal = normal(0.0, 0.0, 0.0)) { } - diff --git a/intern/cycles/kernel/shaders/node_convert_from_vector.osl b/intern/cycles/kernel/shaders/node_convert_from_vector.osl index 8bdca469b90..92ab2313bcb 100644 --- a/intern/cycles/kernel/shaders/node_convert_from_vector.osl +++ b/intern/cycles/kernel/shaders/node_convert_from_vector.osl @@ -16,19 +16,17 @@ #include "stdosl.h" -shader node_convert_from_vector( - vector value_vector = vector(0.0, 0.0, 0.0), - output string value_string = "", - output float value_float = 0.0, - output int value_int = 0, - output color value_color = color(0.0, 0.0, 0.0), - output point value_point = point(0.0, 0.0, 0.0), - output normal value_normal = normal(0.0, 0.0, 0.0)) +shader node_convert_from_vector(vector value_vector = vector(0.0, 0.0, 0.0), + output string value_string = "", + output float value_float = 0.0, + output int value_int = 0, + output color value_color = color(0.0, 0.0, 0.0), + output point value_point = point(0.0, 0.0, 0.0), + output normal value_normal = normal(0.0, 0.0, 0.0)) { - value_float = (value_vector[0] + value_vector[1] + value_vector[2]) * (1.0 / 3.0); - value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0)); - value_color = color(value_vector[0], value_vector[1], value_vector[2]); - value_point = point(value_vector[0], value_vector[1], value_vector[2]); - value_normal = normal(value_vector[0], value_vector[1], value_vector[2]); + value_float = (value_vector[0] + value_vector[1] + value_vector[2]) * (1.0 / 3.0); + value_int = (int)((value_normal[0] + value_normal[1] + value_normal[2]) * (1.0 / 3.0)); + value_color = color(value_vector[0], value_vector[1], value_vector[2]); + value_point = point(value_vector[0], value_vector[1], value_vector[2]); + value_normal = normal(value_vector[0], value_vector[1], value_vector[2]); } - diff --git a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl index 2bef2d65baa..bd5554b838a 100644 --- a/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_diffuse_bsdf.osl @@ -16,15 +16,13 @@ #include "stdosl.h" -shader node_diffuse_bsdf( - color Color = 0.8, - float Roughness = 0.0, - normal Normal = N, - output closure color BSDF = 0) +shader node_diffuse_bsdf(color Color = 0.8, + float Roughness = 0.0, + normal Normal = N, + output closure color BSDF = 0) { - if (Roughness == 0.0) - BSDF = Color * diffuse(Normal); - else - BSDF = Color * oren_nayar(Normal, Roughness); + if (Roughness == 0.0) + BSDF = Color * diffuse(Normal); + else + BSDF = Color * oren_nayar(Normal, Roughness); } - diff --git a/intern/cycles/kernel/shaders/node_displacement.osl b/intern/cycles/kernel/shaders/node_displacement.osl index 89f35841527..a1f3b7b7737 100644 --- a/intern/cycles/kernel/shaders/node_displacement.osl +++ b/intern/cycles/kernel/shaders/node_displacement.osl @@ -16,23 +16,21 @@ #include "stdosl.h" -shader node_displacement( - string space = "object", - float Height = 0.0, - float Midlevel = 0.5, - float Scale = 1.0, - normal Normal = N, - output vector Displacement = vector(0.0, 0.0, 0.0)) +shader node_displacement(string space = "object", + float Height = 0.0, + float Midlevel = 0.5, + float Scale = 1.0, + normal Normal = N, + output vector Displacement = vector(0.0, 0.0, 0.0)) { - Displacement = Normal; - if(space == "object") { - Displacement = transform("object", Displacement); - } + Displacement = Normal; + if (space == "object") { + Displacement = transform("object", Displacement); + } - Displacement = normalize(Displacement) * (Height - Midlevel) * Scale; + Displacement = normalize(Displacement) * (Height - Midlevel) * Scale; - if(space == "object") { - Displacement = transform("object", "world", Displacement); - } + if (space == "object") { + Displacement = transform("object", "world", Displacement); + } } - diff --git a/intern/cycles/kernel/shaders/node_emission.osl b/intern/cycles/kernel/shaders/node_emission.osl index c36e2a4c0f3..57973f57ac6 100644 --- a/intern/cycles/kernel/shaders/node_emission.osl +++ b/intern/cycles/kernel/shaders/node_emission.osl @@ -16,11 +16,7 @@ #include "stdosl.h" -shader node_emission( - color Color = 0.8, - float Strength = 1.0, - output closure color Emission = 0) +shader node_emission(color Color = 0.8, float Strength = 1.0, output closure color Emission = 0) { - Emission = (Strength * Color) * emission(); + Emission = (Strength * Color) * emission(); } - diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl index 95d9d813969..eb32dad392f 100644 --- a/intern/cycles/kernel/shaders/node_environment_texture.osl +++ b/intern/cycles/kernel/shaders/node_environment_texture.osl @@ -19,63 +19,63 @@ vector environment_texture_direction_to_equirectangular(vector dir) { - float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5; - float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5; + float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5; + float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5; - return vector(u, v, 0.0); + return vector(u, v, 0.0); } vector environment_texture_direction_to_mirrorball(vector idir) { - vector dir = idir; - dir[1] -= 1.0; + vector dir = idir; + dir[1] -= 1.0; - float div = 2.0 * sqrt(max(-0.5 * dir[1], 0.0)); - if (div > 0.0) - dir /= div; + float div = 2.0 * sqrt(max(-0.5 * dir[1], 0.0)); + if (div > 0.0) + dir /= div; - float u = 0.5 * (dir[0] + 1.0); - float v = 0.5 * (dir[2] + 1.0); + float u = 0.5 * (dir[0] + 1.0); + float v = 0.5 * (dir[2] + 1.0); - return vector(u, v, 0.0); + return vector(u, v, 0.0); } shader node_environment_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - vector Vector = P, - string filename = "", - string projection = "equirectangular", - string interpolation = "linear", - string color_space = "sRGB", - int is_float = 1, - int use_alpha = 1, - output color Color = 0.0, - output float Alpha = 1.0) + int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vector Vector = P, + string filename = "", + string projection = "equirectangular", + string interpolation = "linear", + string color_space = "sRGB", + int is_float = 1, + int use_alpha = 1, + output color Color = 0.0, + output float Alpha = 1.0) { - vector p = Vector; + vector p = Vector; - if (use_mapping) - p = transform(mapping, p); - - p = normalize(p); + if (use_mapping) + p = transform(mapping, p); - if (projection == "equirectangular") - p = environment_texture_direction_to_equirectangular(p); - else - p = environment_texture_direction_to_mirrorball(p); + p = normalize(p); - /* todo: use environment for better texture filtering of equirectangular */ - Color = (color)texture(filename, p[0], 1.0 - p[1], "wrap", "periodic", "interp", interpolation, "alpha", Alpha); + if (projection == "equirectangular") + p = environment_texture_direction_to_equirectangular(p); + else + p = environment_texture_direction_to_mirrorball(p); - if (use_alpha) { - Color = color_unpremultiply(Color, Alpha); + /* todo: use environment for better texture filtering of equirectangular */ + Color = (color)texture( + filename, p[0], 1.0 - p[1], "wrap", "periodic", "interp", interpolation, "alpha", Alpha); - if (!is_float) - Color = min(Color, 1.0); - } + if (use_alpha) { + Color = color_unpremultiply(Color, Alpha); - if (color_space == "sRGB") - Color = color_srgb_to_scene_linear(Color); -} + if (!is_float) + Color = min(Color, 1.0); + } + if (color_space == "sRGB") + Color = color_srgb_to_scene_linear(Color); +} diff --git a/intern/cycles/kernel/shaders/node_fresnel.h b/intern/cycles/kernel/shaders/node_fresnel.h index 40793479d8a..ade1d4c6207 100644 --- a/intern/cycles/kernel/shaders/node_fresnel.h +++ b/intern/cycles/kernel/shaders/node_fresnel.h @@ -32,33 +32,31 @@ float fresnel_dielectric_cos(float cosi, float eta) { - /* compute fresnel reflectance without explicitly computing - * the refracted direction */ - float c = fabs(cosi); - float g = eta * eta - 1 + c * c; - float result; + /* compute fresnel reflectance without explicitly computing + * the refracted direction */ + float c = fabs(cosi); + float g = eta * eta - 1 + c * c; + float result; - if (g > 0) { - g = sqrt(g); - float A = (g - c) / (g + c); - float B = (c * (g + c) - 1) / (c * (g - c) + 1); - result = 0.5 * A * A * (1 + B * B); - } - else - result = 1.0; /* TIR (no refracted component) */ + if (g > 0) { + g = sqrt(g); + float A = (g - c) / (g + c); + float B = (c * (g + c) - 1) / (c * (g - c) + 1); + result = 0.5 * A * A * (1 + B * B); + } + else + result = 1.0; /* TIR (no refracted component) */ - return result; + return result; } color fresnel_conductor(float cosi, color eta, color k) { - color cosi2 = color(cosi * cosi); - color one = color(1, 1, 1); - color tmp_f = eta * eta + k * k; - color tmp = tmp_f * cosi2; - color Rparl2 = (tmp - (2.0 * eta * cosi) + one) / - (tmp + (2.0 * eta * cosi) + one); - color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) / - (tmp_f + (2.0 * eta * cosi) + cosi2); - return (Rparl2 + Rperp2) * 0.5; + color cosi2 = color(cosi * cosi); + color one = color(1, 1, 1); + color tmp_f = eta * eta + k * k; + color tmp = tmp_f * cosi2; + color Rparl2 = (tmp - (2.0 * eta * cosi) + one) / (tmp + (2.0 * eta * cosi) + one); + color Rperp2 = (tmp_f - (2.0 * eta * cosi) + cosi2) / (tmp_f + (2.0 * eta * cosi) + cosi2); + return (Rparl2 + Rperp2) * 0.5; } diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl index 8bec7b432f5..89250db40f3 100644 --- a/intern/cycles/kernel/shaders/node_fresnel.osl +++ b/intern/cycles/kernel/shaders/node_fresnel.osl @@ -17,14 +17,10 @@ #include "stdosl.h" #include "node_fresnel.h" -shader node_fresnel( - float IOR = 1.45, - normal Normal = N, - output float Fac = 0.0) +shader node_fresnel(float IOR = 1.45, normal Normal = N, output float Fac = 0.0) { - float f = max(IOR, 1e-5); - float eta = backfacing() ? 1.0 / f : f; - float cosi = dot(I, Normal); - Fac = fresnel_dielectric_cos(cosi, eta); + float f = max(IOR, 1e-5); + float eta = backfacing() ? 1.0 / f : f; + float cosi = dot(I, Normal); + Fac = fresnel_dielectric_cos(cosi, eta); } - diff --git a/intern/cycles/kernel/shaders/node_gamma.osl b/intern/cycles/kernel/shaders/node_gamma.osl index bc4c1b34266..9b9c17dc8af 100644 --- a/intern/cycles/kernel/shaders/node_gamma.osl +++ b/intern/cycles/kernel/shaders/node_gamma.osl @@ -16,10 +16,7 @@ #include "stdosl.h" -shader node_gamma( - color ColorIn = 0.8, - float Gamma = 1.0, - output color ColorOut = 0.0) +shader node_gamma(color ColorIn = 0.8, float Gamma = 1.0, output color ColorOut = 0.0) { - ColorOut = pow(ColorIn, Gamma); + ColorOut = pow(ColorIn, Gamma); } diff --git a/intern/cycles/kernel/shaders/node_geometry.osl b/intern/cycles/kernel/shaders/node_geometry.osl index b0bd7692489..b5c1c6611c1 100644 --- a/intern/cycles/kernel/shaders/node_geometry.osl +++ b/intern/cycles/kernel/shaders/node_geometry.osl @@ -16,55 +16,53 @@ #include "stdosl.h" -shader node_geometry( - normal NormalIn = N, - string bump_offset = "center", +shader node_geometry(normal NormalIn = N, + string bump_offset = "center", - output point Position = point(0.0, 0.0, 0.0), - output normal Normal = normal(0.0, 0.0, 0.0), - output normal Tangent = normal(0.0, 0.0, 0.0), - output normal TrueNormal = normal(0.0, 0.0, 0.0), - output vector Incoming = vector(0.0, 0.0, 0.0), - output point Parametric = point(0.0, 0.0, 0.0), - output float Backfacing = 0.0, - output float Pointiness = 0.0) + output point Position = point(0.0, 0.0, 0.0), + output normal Normal = normal(0.0, 0.0, 0.0), + output normal Tangent = normal(0.0, 0.0, 0.0), + output normal TrueNormal = normal(0.0, 0.0, 0.0), + output vector Incoming = vector(0.0, 0.0, 0.0), + output point Parametric = point(0.0, 0.0, 0.0), + output float Backfacing = 0.0, + output float Pointiness = 0.0) { - Position = P; - Normal = NormalIn; - TrueNormal = Ng; - Incoming = I; - Parametric = point(u, v, 0.0); - Backfacing = backfacing(); + Position = P; + Normal = NormalIn; + TrueNormal = Ng; + Incoming = I; + Parametric = point(u, v, 0.0); + Backfacing = backfacing(); - if (bump_offset == "dx") { - Position += Dx(Position); - Parametric += Dx(Parametric); - } - else if (bump_offset == "dy") { - Position += Dy(Position); - Parametric += Dy(Parametric); - } + if (bump_offset == "dx") { + Position += Dx(Position); + Parametric += Dx(Parametric); + } + else if (bump_offset == "dy") { + Position += Dy(Position); + Parametric += Dy(Parametric); + } - /* first try to get tangent attribute */ - point generated; + /* first try to get tangent attribute */ + point generated; - /* try to create spherical tangent from generated coordinates */ - if (getattribute("geom:generated", generated)) { - normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0); - vector T = transform("object", "world", data); - Tangent = cross(Normal, normalize(cross(T, Normal))); - } - else { - /* otherwise use surface derivatives */ - Tangent = normalize(dPdu); - } + /* try to create spherical tangent from generated coordinates */ + if (getattribute("geom:generated", generated)) { + normal data = normal(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0); + vector T = transform("object", "world", data); + Tangent = cross(Normal, normalize(cross(T, Normal))); + } + else { + /* otherwise use surface derivatives */ + Tangent = normalize(dPdu); + } - getattribute("geom:pointiness", Pointiness); - if (bump_offset == "dx") { - Pointiness += Dx(Pointiness); - } - else if (bump_offset == "dy") { - Pointiness += Dy(Pointiness); - } + getattribute("geom:pointiness", Pointiness); + if (bump_offset == "dx") { + Pointiness += Dx(Pointiness); + } + else if (bump_offset == "dy") { + Pointiness += Dy(Pointiness); + } } - diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl index 2e713861c58..c0b8a002536 100644 --- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl @@ -17,29 +17,27 @@ #include "stdosl.h" #include "node_fresnel.h" -shader node_glass_bsdf( - color Color = 0.8, - string distribution = "sharp", - float Roughness = 0.2, - float IOR = 1.45, - normal Normal = N, - output closure color BSDF = 0) +shader node_glass_bsdf(color Color = 0.8, + string distribution = "sharp", + float Roughness = 0.2, + float IOR = 1.45, + normal Normal = N, + output closure color BSDF = 0) { - float f = max(IOR, 1e-5); - float eta = backfacing() ? 1.0 / f : f; - float cosi = dot(I, Normal); - float Fr = fresnel_dielectric_cos(cosi, eta); - float roughness = Roughness * Roughness; + float f = max(IOR, 1e-5); + float eta = backfacing() ? 1.0 / f : f; + float cosi = dot(I, Normal); + float Fr = fresnel_dielectric_cos(cosi, eta); + float roughness = Roughness * Roughness; - if (distribution == "sharp") - BSDF = Color * (Fr * reflection(Normal) + (1.0 - Fr) * refraction(Normal, eta)); - else if (distribution == "beckmann") - BSDF = Color * (Fr * microfacet_beckmann(Normal, roughness) + - (1.0 - Fr) * microfacet_beckmann_refraction(Normal, roughness, eta)); - else if (distribution == "Multiscatter GGX") - BSDF = Color * microfacet_multi_ggx_glass(Normal, roughness, eta, Color); - else if (distribution == "GGX") - BSDF = Color * (Fr * microfacet_ggx(Normal, roughness) + - (1.0 - Fr) * microfacet_ggx_refraction(Normal, roughness, eta)); + if (distribution == "sharp") + BSDF = Color * (Fr * reflection(Normal) + (1.0 - Fr) * refraction(Normal, eta)); + else if (distribution == "beckmann") + BSDF = Color * (Fr * microfacet_beckmann(Normal, roughness) + + (1.0 - Fr) * microfacet_beckmann_refraction(Normal, roughness, eta)); + else if (distribution == "Multiscatter GGX") + BSDF = Color * microfacet_multi_ggx_glass(Normal, roughness, eta, Color); + else if (distribution == "GGX") + BSDF = Color * (Fr * microfacet_ggx(Normal, roughness) + + (1.0 - Fr) * microfacet_ggx_refraction(Normal, roughness, eta)); } - diff --git a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl index 7415211b56d..2d40ee8d3f6 100644 --- a/intern/cycles/kernel/shaders/node_glossy_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_glossy_bsdf.osl @@ -17,25 +17,22 @@ #include "stdosl.h" #include "node_fresnel.h" -shader node_glossy_bsdf( - color Color = 0.8, - string distribution = "GGX", - float Roughness = 0.2, - normal Normal = N, - output closure color BSDF = 0) +shader node_glossy_bsdf(color Color = 0.8, + string distribution = "GGX", + float Roughness = 0.2, + normal Normal = N, + output closure color BSDF = 0) { - float roughness = Roughness * Roughness; - - if (distribution == "sharp") - BSDF = Color * reflection(Normal); - else if (distribution == "beckmann") - BSDF = Color * microfacet_beckmann(Normal, roughness); - else if (distribution == "GGX") - BSDF = Color * microfacet_ggx(Normal, roughness); - else if (distribution == "Multiscatter GGX") - BSDF = Color * microfacet_multi_ggx(Normal, roughness, Color); - else - BSDF = Color * ashikhmin_shirley(Normal, vector(0, 0, 0), roughness, roughness); + float roughness = Roughness * Roughness; + if (distribution == "sharp") + BSDF = Color * reflection(Normal); + else if (distribution == "beckmann") + BSDF = Color * microfacet_beckmann(Normal, roughness); + else if (distribution == "GGX") + BSDF = Color * microfacet_ggx(Normal, roughness); + else if (distribution == "Multiscatter GGX") + BSDF = Color * microfacet_multi_ggx(Normal, roughness, Color); + else + BSDF = Color * ashikhmin_shirley(Normal, vector(0, 0, 0), roughness, roughness); } - diff --git a/intern/cycles/kernel/shaders/node_gradient_texture.osl b/intern/cycles/kernel/shaders/node_gradient_texture.osl index f458937a18f..52bf466673d 100644 --- a/intern/cycles/kernel/shaders/node_gradient_texture.osl +++ b/intern/cycles/kernel/shaders/node_gradient_texture.osl @@ -21,59 +21,58 @@ float gradient(point p, string type) { - float x, y, z; - - x = p[0]; - y = p[1]; - z = p[2]; + float x, y, z; - float result = 0.0; + x = p[0]; + y = p[1]; + z = p[2]; - if (type == "linear") { - result = x; - } - else if (type == "quadratic") { - float r = max(x, 0.0); - result = r * r; - } - else if (type == "easing") { - float r = min(max(x, 0.0), 1.0); - float t = r * r; - - result = (3.0 * t - 2.0 * t * r); - } - else if (type == "diagonal") { - result = (x + y) * 0.5; - } - else if (type == "radial") { - result = atan2(y, x) / M_2PI + 0.5; - } - else { - float r = max(1.0 - sqrt(x * x + y * y + z * z), 0.0); + float result = 0.0; - if (type == "quadratic_sphere") - result = r * r; - else if (type == "spherical") - result = r; - } + if (type == "linear") { + result = x; + } + else if (type == "quadratic") { + float r = max(x, 0.0); + result = r * r; + } + else if (type == "easing") { + float r = min(max(x, 0.0), 1.0); + float t = r * r; - return result; + result = (3.0 * t - 2.0 * t * r); + } + else if (type == "diagonal") { + result = (x + y) * 0.5; + } + else if (type == "radial") { + result = atan2(y, x) / M_2PI + 0.5; + } + else { + float r = max(1.0 - sqrt(x * x + y * y + z * z), 0.0); + + if (type == "quadratic_sphere") + result = r * r; + else if (type == "spherical") + result = r; + } + + return result; } shader node_gradient_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - string type = "linear", - point Vector = P, - output float Fac = 0.0, - output color Color = 0.0) + int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + string type = "linear", + point Vector = P, + output float Fac = 0.0, + output color Color = 0.0) { - point p = Vector; + point p = Vector; - if (use_mapping) - p = transform(mapping, p); + if (use_mapping) + p = transform(mapping, p); - Fac = gradient(p, type); - Color = color(Fac, Fac, Fac); + Fac = gradient(p, type); + Color = color(Fac, Fac, Fac); } - diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl index ef8f2fae894..bc912087666 100644 --- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl @@ -18,41 +18,40 @@ #include "stdosl.h" -shader node_hair_bsdf( - color Color = 0.8, - string component = "reflection", - float Offset = 0.0, - float RoughnessU = 0.1, - float RoughnessV = 1.0, - normal Tangent = normal(0, 0, 0), - output closure color BSDF = 0) +shader node_hair_bsdf(color Color = 0.8, + string component = "reflection", + float Offset = 0.0, + float RoughnessU = 0.1, + float RoughnessV = 1.0, + normal Tangent = normal(0, 0, 0), + output closure color BSDF = 0) { - float roughnessh = clamp(RoughnessU, 0.001, 1.0); - float roughnessv = clamp(RoughnessV, 0.001, 1.0); - float offset = -Offset; + float roughnessh = clamp(RoughnessU, 0.001, 1.0); + float roughnessv = clamp(RoughnessV, 0.001, 1.0); + float offset = -Offset; - normal T; - float IsCurve = 0; - getattribute("geom:is_curve", IsCurve); + normal T; + float IsCurve = 0; + getattribute("geom:is_curve", IsCurve); - if (isconnected(Tangent)) { - T = Tangent; - } - else if(!IsCurve) { - T = normalize(dPdv); - offset = 0.0; - } - else { - T = normalize(dPdu); - } + if (isconnected(Tangent)) { + T = Tangent; + } + else if (!IsCurve) { + T = normalize(dPdv); + offset = 0.0; + } + else { + T = normalize(dPdu); + } - if (backfacing() && IsCurve) { - BSDF = transparent(); - } - else { - if (component == "reflection") - BSDF = Color * hair_reflection(Ng, roughnessh, roughnessv, T, offset); - else - BSDF = Color * hair_transmission(Ng, roughnessh, roughnessv, T, offset); - } + if (backfacing() && IsCurve) { + BSDF = transparent(); + } + else { + if (component == "reflection") + BSDF = Color * hair_reflection(Ng, roughnessh, roughnessv, T, offset); + else + BSDF = Color * hair_transmission(Ng, roughnessh, roughnessv, T, offset); + } } diff --git a/intern/cycles/kernel/shaders/node_hair_info.osl b/intern/cycles/kernel/shaders/node_hair_info.osl index 19216f67579..991a27c4103 100644 --- a/intern/cycles/kernel/shaders/node_hair_info.osl +++ b/intern/cycles/kernel/shaders/node_hair_info.osl @@ -16,17 +16,15 @@ #include "stdosl.h" -shader node_hair_info( - output float IsStrand = 0.0, - output float Intercept = 0.0, - output float Thickness = 0.0, - output normal TangentNormal = N, - output float Random = 0) +shader node_hair_info(output float IsStrand = 0.0, + output float Intercept = 0.0, + output float Thickness = 0.0, + output normal TangentNormal = N, + output float Random = 0) { - getattribute("geom:is_curve", IsStrand); - getattribute("geom:curve_intercept", Intercept); - getattribute("geom:curve_thickness", Thickness); - getattribute("geom:curve_tangent_normal", TangentNormal); - getattribute("geom:curve_random", Random); + getattribute("geom:is_curve", IsStrand); + getattribute("geom:curve_intercept", Intercept); + getattribute("geom:curve_thickness", Thickness); + getattribute("geom:curve_tangent_normal", TangentNormal); + getattribute("geom:curve_random", Random); } - diff --git a/intern/cycles/kernel/shaders/node_holdout.osl b/intern/cycles/kernel/shaders/node_holdout.osl index 78a9f46fd15..b51bc0543a5 100644 --- a/intern/cycles/kernel/shaders/node_holdout.osl +++ b/intern/cycles/kernel/shaders/node_holdout.osl @@ -16,9 +16,6 @@ #include "stdosl.h" -shader node_holdout( - output closure color Holdout = holdout()) +shader node_holdout(output closure color Holdout = holdout()) { - } - diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl index d72a87a951f..30c56a20a92 100644 --- a/intern/cycles/kernel/shaders/node_hsv.osl +++ b/intern/cycles/kernel/shaders/node_hsv.osl @@ -17,28 +17,26 @@ #include "stdosl.h" #include "node_color.h" -shader node_hsv( - float Hue = 0.5, - float Saturation = 1.0, - float Value = 1.0, - float Fac = 0.5, - color ColorIn = 0.0, - output color ColorOut = 0.0) +shader node_hsv(float Hue = 0.5, + float Saturation = 1.0, + float Value = 1.0, + float Fac = 0.5, + color ColorIn = 0.0, + output color ColorOut = 0.0) { - color Color = rgb_to_hsv(ColorIn); + color Color = rgb_to_hsv(ColorIn); - // remember: fmod doesn't work for negative numbers - Color[0] = fmod(Color[0] + Hue + 0.5, 1.0); - Color[1] = clamp(Color[1] * Saturation, 0.0, 1.0); - Color[2] *= Value; + // remember: fmod doesn't work for negative numbers + Color[0] = fmod(Color[0] + Hue + 0.5, 1.0); + Color[1] = clamp(Color[1] * Saturation, 0.0, 1.0); + Color[2] *= Value; - Color = hsv_to_rgb(Color); + Color = hsv_to_rgb(Color); - // Clamp color to prevent negative values cauzed by oversaturation. - Color[0] = max(Color[0], 0.0); - Color[1] = max(Color[1], 0.0); - Color[2] = max(Color[2], 0.0); + // Clamp color to prevent negative values cauzed by oversaturation. + Color[0] = max(Color[0], 0.0); + Color[1] = max(Color[1], 0.0); + Color[2] = max(Color[2], 0.0); - ColorOut = mix(ColorIn, Color, Fac); + ColorOut = mix(ColorIn, Color, Fac); } - diff --git a/intern/cycles/kernel/shaders/node_ies_light.osl b/intern/cycles/kernel/shaders/node_ies_light.osl index a0954e3a444..ea8c44e09de 100644 --- a/intern/cycles/kernel/shaders/node_ies_light.osl +++ b/intern/cycles/kernel/shaders/node_ies_light.osl @@ -19,24 +19,23 @@ /* IES Light */ -shader node_ies_light( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - int slot = 0, - float Strength = 1.0, - point Vector = I, - output float Fac = 0.0) +shader node_ies_light(int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + int slot = 0, + float Strength = 1.0, + point Vector = I, + output float Fac = 0.0) { - point p = Vector; + point p = Vector; - if (use_mapping) { - p = transform(mapping, p); - } + if (use_mapping) { + p = transform(mapping, p); + } - p = normalize(p); + p = normalize(p); - float v_angle = acos(-p[2]); - float h_angle = atan2(p[0], p[1]) + M_PI; + float v_angle = acos(-p[2]); + float h_angle = atan2(p[0], p[1]) + M_PI; - Fac = Strength * texture(format("@l%d", slot), h_angle, v_angle); + Fac = Strength * texture(format("@l%d", slot), h_angle, v_angle); } diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl index 7cd2922dd4f..df5eda39985 100644 --- a/intern/cycles/kernel/shaders/node_image_texture.osl +++ b/intern/cycles/kernel/shaders/node_image_texture.osl @@ -19,217 +19,217 @@ point texco_remap_square(point co) { - return (co - point(0.5, 0.5, 0.5)) * 2.0; + return (co - point(0.5, 0.5, 0.5)) * 2.0; } point map_to_tube(vector dir) { - float u, v; - v = (dir[2] + 1.0) * 0.5; - float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]); - if (len > 0.0) { - u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5; - } - else { - v = u = 0.0; /* To avoid un-initialized variables. */ - } - return point(u, v, 0.0); + float u, v; + v = (dir[2] + 1.0) * 0.5; + float len = sqrt(dir[0] * dir[0] + dir[1] * dir[1]); + if (len > 0.0) { + u = (1.0 - (atan2(dir[0] / len, dir[1] / len) / M_PI)) * 0.5; + } + else { + v = u = 0.0; /* To avoid un-initialized variables. */ + } + return point(u, v, 0.0); } point map_to_sphere(vector dir) { - float len = length(dir); - float v, u; - if (len > 0.0) { - if (dir[0] == 0.0 && dir[1] == 0.0) { - u = 0.0; /* Othwise domain error. */ - } - else { - u = (1.0 - atan2(dir[0], dir[1]) / M_PI) / 2.0; - } - v = 1.0 - acos(dir[2] / len) / M_PI; - } - else { - v = u = 0.0; /* To avoid un-initialized variables. */ - } - return point(u, v, 0.0); + float len = length(dir); + float v, u; + if (len > 0.0) { + if (dir[0] == 0.0 && dir[1] == 0.0) { + u = 0.0; /* Othwise domain error. */ + } + else { + u = (1.0 - atan2(dir[0], dir[1]) / M_PI) / 2.0; + } + v = 1.0 - acos(dir[2] / len) / M_PI; + } + else { + v = u = 0.0; /* To avoid un-initialized variables. */ + } + return point(u, v, 0.0); } color image_texture_lookup(string filename, string color_space, - float u, float v, + float u, + float v, output float Alpha, int use_alpha, int is_float, string interpolation, string extension) { - color rgb = (color)texture(filename, u, 1.0 - v, "wrap", extension, "interp", interpolation, "alpha", Alpha); + color rgb = (color)texture( + filename, u, 1.0 - v, "wrap", extension, "interp", interpolation, "alpha", Alpha); - if (use_alpha) { - rgb = color_unpremultiply(rgb, Alpha); - - if (!is_float) - rgb = min(rgb, 1.0); - } + if (use_alpha) { + rgb = color_unpremultiply(rgb, Alpha); - if (color_space == "sRGB") { - rgb = color_srgb_to_scene_linear(rgb); - } + if (!is_float) + rgb = min(rgb, 1.0); + } - return rgb; + if (color_space == "sRGB") { + rgb = color_srgb_to_scene_linear(rgb); + } + + return rgb; } -shader node_image_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - point Vector = P, - string filename = "", - string color_space = "sRGB", - string projection = "flat", - string interpolation = "smartcubic", - string extension = "periodic", - float projection_blend = 0.0, - int is_float = 1, - int use_alpha = 1, - output color Color = 0.0, - output float Alpha = 1.0) +shader node_image_texture(int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + point Vector = P, + string filename = "", + string color_space = "sRGB", + string projection = "flat", + string interpolation = "smartcubic", + string extension = "periodic", + float projection_blend = 0.0, + int is_float = 1, + int use_alpha = 1, + output color Color = 0.0, + output float Alpha = 1.0) { - point p = Vector; - - if (use_mapping) - p = transform(mapping, p); - - if (projection == "flat") { - Color = image_texture_lookup(filename, - color_space, - p[0], p[1], - Alpha, - use_alpha, - is_float, - interpolation, - extension); - } - else if (projection == "box") { - /* object space normal */ - vector Nob = transform("world", "object", N); - - /* project from direction vector to barycentric coordinates in triangles */ - Nob = vector(fabs(Nob[0]), fabs(Nob[1]), fabs(Nob[2])); - Nob /= (Nob[0] + Nob[1] + Nob[2]); - - /* basic idea is to think of this as a triangle, each corner representing - * one of the 3 faces of the cube. in the corners we have single textures, - * in between we blend between two textures, and in the middle we a blend - * between three textures. - * - * the Nxyz values are the barycentric coordinates in an equilateral - * triangle, which in case of blending, in the middle has a smaller - * equilateral triangle where 3 textures blend. this divides things into - * 7 zones, with an if () test for each zone */ - - vector weight = vector(0.0, 0.0, 0.0); - float blend = projection_blend; - float limit = 0.5 * (1.0 + blend); - - /* first test for corners with single texture */ - if (Nob[0] > limit * (Nob[0] + Nob[1]) && Nob[0] > limit * (Nob[0] + Nob[2])) { - weight[0] = 1.0; - } - else if (Nob[1] > limit * (Nob[0] + Nob[1]) && Nob[1] > limit * (Nob[1] + Nob[2])) { - weight[1] = 1.0; - } - else if (Nob[2] > limit * (Nob[0] + Nob[2]) && Nob[2] > limit * (Nob[1] + Nob[2])) { - weight[2] = 1.0; - } - else if (blend > 0.0) { - /* in case of blending, test for mixes between two textures */ - if (Nob[2] < (1.0 - limit) * (Nob[1] + Nob[0])) { - weight[0] = Nob[0] / (Nob[0] + Nob[1]); - weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0); - weight[1] = 1.0 - weight[0]; - } - else if (Nob[0] < (1.0 - limit) * (Nob[1] + Nob[2])) { - weight[1] = Nob[1] / (Nob[1] + Nob[2]); - weight[1] = clamp((weight[1] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0); - weight[2] = 1.0 - weight[1]; - } - else if (Nob[1] < (1.0 - limit) * (Nob[0] + Nob[2])) { - weight[0] = Nob[0] / (Nob[0] + Nob[2]); - weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0); - weight[2] = 1.0 - weight[0]; - } - else { - /* last case, we have a mix between three */ - weight[0] = ((2.0 - limit) * Nob[0] + (limit - 1.0)) / (2.0 * limit - 1.0); - weight[1] = ((2.0 - limit) * Nob[1] + (limit - 1.0)) / (2.0 * limit - 1.0); - weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0); - } - } - else { - /* Desperate mode, no valid choice anyway, fallback to one side.*/ - weight[0] = 1.0; - } - - Color = color(0.0, 0.0, 0.0); - Alpha = 0.0; - - float tmp_alpha; - - if (weight[0] > 0.0) { - Color += weight[0] * image_texture_lookup(filename, - color_space, - p[1], p[2], - tmp_alpha, - use_alpha, - is_float, - interpolation, - extension); - Alpha += weight[0] * tmp_alpha; - } - if (weight[1] > 0.0) { - Color += weight[1] * image_texture_lookup(filename, - color_space, - p[0], p[2], - tmp_alpha, - use_alpha, - is_float, - interpolation, - extension); - Alpha += weight[1] * tmp_alpha; - } - if (weight[2] > 0.0) { - Color += weight[2] * image_texture_lookup(filename, - color_space, - p[1], p[0], - tmp_alpha, - use_alpha, - is_float, - interpolation, - extension); - Alpha += weight[2] * tmp_alpha; - } - } - else if (projection == "sphere") { - point projected = map_to_sphere(texco_remap_square(p)); - Color = image_texture_lookup(filename, - color_space, - projected[0], projected[1], - Alpha, - use_alpha, - is_float, - interpolation, - extension); - } - else if (projection == "tube") { - point projected = map_to_tube(texco_remap_square(p)); - Color = image_texture_lookup(filename, - color_space, - projected[0], projected[1], - Alpha, - use_alpha, - is_float, - interpolation, - extension); - } + point p = Vector; + + if (use_mapping) + p = transform(mapping, p); + + if (projection == "flat") { + Color = image_texture_lookup( + filename, color_space, p[0], p[1], Alpha, use_alpha, is_float, interpolation, extension); + } + else if (projection == "box") { + /* object space normal */ + vector Nob = transform("world", "object", N); + + /* project from direction vector to barycentric coordinates in triangles */ + Nob = vector(fabs(Nob[0]), fabs(Nob[1]), fabs(Nob[2])); + Nob /= (Nob[0] + Nob[1] + Nob[2]); + + /* basic idea is to think of this as a triangle, each corner representing + * one of the 3 faces of the cube. in the corners we have single textures, + * in between we blend between two textures, and in the middle we a blend + * between three textures. + * + * the Nxyz values are the barycentric coordinates in an equilateral + * triangle, which in case of blending, in the middle has a smaller + * equilateral triangle where 3 textures blend. this divides things into + * 7 zones, with an if () test for each zone */ + + vector weight = vector(0.0, 0.0, 0.0); + float blend = projection_blend; + float limit = 0.5 * (1.0 + blend); + + /* first test for corners with single texture */ + if (Nob[0] > limit * (Nob[0] + Nob[1]) && Nob[0] > limit * (Nob[0] + Nob[2])) { + weight[0] = 1.0; + } + else if (Nob[1] > limit * (Nob[0] + Nob[1]) && Nob[1] > limit * (Nob[1] + Nob[2])) { + weight[1] = 1.0; + } + else if (Nob[2] > limit * (Nob[0] + Nob[2]) && Nob[2] > limit * (Nob[1] + Nob[2])) { + weight[2] = 1.0; + } + else if (blend > 0.0) { + /* in case of blending, test for mixes between two textures */ + if (Nob[2] < (1.0 - limit) * (Nob[1] + Nob[0])) { + weight[0] = Nob[0] / (Nob[0] + Nob[1]); + weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0); + weight[1] = 1.0 - weight[0]; + } + else if (Nob[0] < (1.0 - limit) * (Nob[1] + Nob[2])) { + weight[1] = Nob[1] / (Nob[1] + Nob[2]); + weight[1] = clamp((weight[1] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0); + weight[2] = 1.0 - weight[1]; + } + else if (Nob[1] < (1.0 - limit) * (Nob[0] + Nob[2])) { + weight[0] = Nob[0] / (Nob[0] + Nob[2]); + weight[0] = clamp((weight[0] - 0.5 * (1.0 - blend)) / blend, 0.0, 1.0); + weight[2] = 1.0 - weight[0]; + } + else { + /* last case, we have a mix between three */ + weight[0] = ((2.0 - limit) * Nob[0] + (limit - 1.0)) / (2.0 * limit - 1.0); + weight[1] = ((2.0 - limit) * Nob[1] + (limit - 1.0)) / (2.0 * limit - 1.0); + weight[2] = ((2.0 - limit) * Nob[2] + (limit - 1.0)) / (2.0 * limit - 1.0); + } + } + else { + /* Desperate mode, no valid choice anyway, fallback to one side.*/ + weight[0] = 1.0; + } + + Color = color(0.0, 0.0, 0.0); + Alpha = 0.0; + + float tmp_alpha; + + if (weight[0] > 0.0) { + Color += weight[0] * image_texture_lookup(filename, + color_space, + p[1], + p[2], + tmp_alpha, + use_alpha, + is_float, + interpolation, + extension); + Alpha += weight[0] * tmp_alpha; + } + if (weight[1] > 0.0) { + Color += weight[1] * image_texture_lookup(filename, + color_space, + p[0], + p[2], + tmp_alpha, + use_alpha, + is_float, + interpolation, + extension); + Alpha += weight[1] * tmp_alpha; + } + if (weight[2] > 0.0) { + Color += weight[2] * image_texture_lookup(filename, + color_space, + p[1], + p[0], + tmp_alpha, + use_alpha, + is_float, + interpolation, + extension); + Alpha += weight[2] * tmp_alpha; + } + } + else if (projection == "sphere") { + point projected = map_to_sphere(texco_remap_square(p)); + Color = image_texture_lookup(filename, + color_space, + projected[0], + projected[1], + Alpha, + use_alpha, + is_float, + interpolation, + extension); + } + else if (projection == "tube") { + point projected = map_to_tube(texco_remap_square(p)); + Color = image_texture_lookup(filename, + color_space, + projected[0], + projected[1], + Alpha, + use_alpha, + is_float, + interpolation, + extension); + } } diff --git a/intern/cycles/kernel/shaders/node_invert.osl b/intern/cycles/kernel/shaders/node_invert.osl index b33b0a43d63..c7d41e4e129 100644 --- a/intern/cycles/kernel/shaders/node_invert.osl +++ b/intern/cycles/kernel/shaders/node_invert.osl @@ -16,12 +16,8 @@ #include "stdosl.h" -shader node_invert( - float Fac = 1.0, - color ColorIn = 0.8, - output color ColorOut = 0.8) +shader node_invert(float Fac = 1.0, color ColorIn = 0.8, output color ColorOut = 0.8) { - color ColorInv = color(1.0) - ColorIn; - ColorOut = mix(ColorIn, ColorInv, Fac); + color ColorInv = color(1.0) - ColorIn; + ColorOut = mix(ColorIn, ColorInv, Fac); } - diff --git a/intern/cycles/kernel/shaders/node_layer_weight.osl b/intern/cycles/kernel/shaders/node_layer_weight.osl index f583df25773..7c46f28b41b 100644 --- a/intern/cycles/kernel/shaders/node_layer_weight.osl +++ b/intern/cycles/kernel/shaders/node_layer_weight.osl @@ -17,29 +17,28 @@ #include "stdosl.h" #include "node_fresnel.h" -shader node_layer_weight( - float Blend = 0.5, - normal Normal = N, - output float Fresnel = 0.0, - output float Facing = 0.0) +shader node_layer_weight(float Blend = 0.5, + normal Normal = N, + output float Fresnel = 0.0, + output float Facing = 0.0) { - float blend = Blend; - float cosi = dot(I, Normal); + float blend = Blend; + float cosi = dot(I, Normal); - /* Fresnel */ - float eta = max(1.0 - Blend, 1e-5); - eta = backfacing() ? eta : 1.0 / eta; - Fresnel = fresnel_dielectric_cos(cosi, eta); + /* Fresnel */ + float eta = max(1.0 - Blend, 1e-5); + eta = backfacing() ? eta : 1.0 / eta; + Fresnel = fresnel_dielectric_cos(cosi, eta); - /* Facing */ - Facing = fabs(cosi); + /* Facing */ + Facing = fabs(cosi); - if (blend != 0.5) { - blend = clamp(blend, 0.0, 1.0 - 1e-5); - blend = (blend < 0.5) ? 2.0 * blend : 0.5 / (1.0 - blend); + if (blend != 0.5) { + blend = clamp(blend, 0.0, 1.0 - 1e-5); + blend = (blend < 0.5) ? 2.0 * blend : 0.5 / (1.0 - blend); - Facing = pow(Facing, blend); - } + Facing = pow(Facing, blend); + } - Facing = 1.0 - Facing; + Facing = 1.0 - Facing; } diff --git a/intern/cycles/kernel/shaders/node_light_falloff.osl b/intern/cycles/kernel/shaders/node_light_falloff.osl index a594e33d643..d0d7dd9c5aa 100644 --- a/intern/cycles/kernel/shaders/node_light_falloff.osl +++ b/intern/cycles/kernel/shaders/node_light_falloff.osl @@ -16,29 +16,27 @@ #include "stdosl.h" -shader node_light_falloff( - float Strength = 0.0, - float Smooth = 0.0, - output float Quadratic = 0.0, - output float Linear = 0.0, - output float Constant = 0.0) +shader node_light_falloff(float Strength = 0.0, + float Smooth = 0.0, + output float Quadratic = 0.0, + output float Linear = 0.0, + output float Constant = 0.0) { - float ray_length = 0.0; - float strength = Strength; - getattribute("path:ray_length", ray_length); + float ray_length = 0.0; + float strength = Strength; + getattribute("path:ray_length", ray_length); - if (Smooth > 0.0) { - float squared = ray_length * ray_length; - strength *= squared / (Smooth + squared); - } + if (Smooth > 0.0) { + float squared = ray_length * ray_length; + strength *= squared / (Smooth + squared); + } - /* Quadratic */ - Quadratic = strength; - - /* Linear */ - Linear = (strength * ray_length); + /* Quadratic */ + Quadratic = strength; - /* Constant */ - Constant = (strength * ray_length * ray_length); -} + /* Linear */ + Linear = (strength * ray_length); + /* Constant */ + Constant = (strength * ray_length * ray_length); +} diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl index 64fe4c20132..c4a3624a67f 100644 --- a/intern/cycles/kernel/shaders/node_light_path.osl +++ b/intern/cycles/kernel/shaders/node_light_path.osl @@ -16,51 +16,49 @@ #include "stdosl.h" -shader node_light_path( - output float IsCameraRay = 0.0, - output float IsShadowRay = 0.0, - output float IsDiffuseRay = 0.0, - output float IsGlossyRay = 0.0, - output float IsSingularRay = 0.0, - output float IsReflectionRay = 0.0, - output float IsTransmissionRay = 0.0, - output float IsVolumeScatterRay = 0.0, - output float RayLength = 0.0, - output float RayDepth = 0.0, - output float DiffuseDepth = 0.0, - output float GlossyDepth = 0.0, - output float TransparentDepth = 0.0, - output float TransmissionDepth = 0.0) +shader node_light_path(output float IsCameraRay = 0.0, + output float IsShadowRay = 0.0, + output float IsDiffuseRay = 0.0, + output float IsGlossyRay = 0.0, + output float IsSingularRay = 0.0, + output float IsReflectionRay = 0.0, + output float IsTransmissionRay = 0.0, + output float IsVolumeScatterRay = 0.0, + output float RayLength = 0.0, + output float RayDepth = 0.0, + output float DiffuseDepth = 0.0, + output float GlossyDepth = 0.0, + output float TransparentDepth = 0.0, + output float TransmissionDepth = 0.0) { - IsCameraRay = raytype("camera"); - IsShadowRay = raytype("shadow"); - IsDiffuseRay = raytype("diffuse"); - IsGlossyRay = raytype("glossy"); - IsSingularRay = raytype("singular"); - IsReflectionRay = raytype("reflection"); - IsTransmissionRay = raytype("refraction"); - IsVolumeScatterRay = raytype("volume_scatter"); + IsCameraRay = raytype("camera"); + IsShadowRay = raytype("shadow"); + IsDiffuseRay = raytype("diffuse"); + IsGlossyRay = raytype("glossy"); + IsSingularRay = raytype("singular"); + IsReflectionRay = raytype("reflection"); + IsTransmissionRay = raytype("refraction"); + IsVolumeScatterRay = raytype("volume_scatter"); - getattribute("path:ray_length", RayLength); + getattribute("path:ray_length", RayLength); - int ray_depth; - getattribute("path:ray_depth", ray_depth); - RayDepth = (float)ray_depth; + int ray_depth; + getattribute("path:ray_depth", ray_depth); + RayDepth = (float)ray_depth; - int diffuse_depth; - getattribute("path:diffuse_depth", diffuse_depth); - DiffuseDepth = (float)diffuse_depth; + int diffuse_depth; + getattribute("path:diffuse_depth", diffuse_depth); + DiffuseDepth = (float)diffuse_depth; - int glossy_depth; - getattribute("path:glossy_depth", glossy_depth); - GlossyDepth = (float)glossy_depth; + int glossy_depth; + getattribute("path:glossy_depth", glossy_depth); + GlossyDepth = (float)glossy_depth; - int transparent_depth; - getattribute("path:transparent_depth", transparent_depth); - TransparentDepth = (float)transparent_depth; + int transparent_depth; + getattribute("path:transparent_depth", transparent_depth); + TransparentDepth = (float)transparent_depth; - int transmission_depth; - getattribute("path:transmission_depth", transmission_depth); - TransmissionDepth = (float)transmission_depth; + int transmission_depth; + getattribute("path:transmission_depth", transmission_depth); + TransmissionDepth = (float)transmission_depth; } - diff --git a/intern/cycles/kernel/shaders/node_magic_texture.osl b/intern/cycles/kernel/shaders/node_magic_texture.osl index 8d6af391e04..aa700e575ef 100644 --- a/intern/cycles/kernel/shaders/node_magic_texture.osl +++ b/intern/cycles/kernel/shaders/node_magic_texture.osl @@ -21,91 +21,89 @@ color magic(point p, int n, float distortion) { - float dist = distortion; - - float x = sin(( p[0] + p[1] + p[2]) * 5.0); - float y = cos((-p[0] + p[1] - p[2]) * 5.0); - float z = -cos((-p[0] - p[1] + p[2]) * 5.0); - - if (n > 0) { - x *= dist; - y *= dist; - z *= dist; - y = -cos(x - y + z); - y *= dist; - - if (n > 1) { - x = cos(x - y - z); - x *= dist; - - if (n > 2) { - z = sin(-x - y - z); - z *= dist; - - if (n > 3) { - x = -cos(-x + y - z); - x *= dist; - - if (n > 4) { - y = -sin(-x + y + z); - y *= dist; - - if (n > 5) { - y = -cos(-x + y + z); - y *= dist; - - if (n > 6) { - x = cos(x + y + z); - x *= dist; - - if (n > 7) { - z = sin(x + y - z); - z *= dist; - - if (n > 8) { - x = -cos(-x - y + z); - x *= dist; - - if (n > 9) { - y = -sin(x - y + z); - y *= dist; - } - } - } - } - } - } - } - } - } - } - - if (dist != 0.0) { - dist *= 2.0; - x /= dist; - y /= dist; - z /= dist; - } - - return color(0.5 - x, 0.5 - y, 0.5 - z); + float dist = distortion; + + float x = sin((p[0] + p[1] + p[2]) * 5.0); + float y = cos((-p[0] + p[1] - p[2]) * 5.0); + float z = -cos((-p[0] - p[1] + p[2]) * 5.0); + + if (n > 0) { + x *= dist; + y *= dist; + z *= dist; + y = -cos(x - y + z); + y *= dist; + + if (n > 1) { + x = cos(x - y - z); + x *= dist; + + if (n > 2) { + z = sin(-x - y - z); + z *= dist; + + if (n > 3) { + x = -cos(-x + y - z); + x *= dist; + + if (n > 4) { + y = -sin(-x + y + z); + y *= dist; + + if (n > 5) { + y = -cos(-x + y + z); + y *= dist; + + if (n > 6) { + x = cos(x + y + z); + x *= dist; + + if (n > 7) { + z = sin(x + y - z); + z *= dist; + + if (n > 8) { + x = -cos(-x - y + z); + x *= dist; + + if (n > 9) { + y = -sin(x - y + z); + y *= dist; + } + } + } + } + } + } + } + } + } + } + + if (dist != 0.0) { + dist *= 2.0; + x /= dist; + y /= dist; + z /= dist; + } + + return color(0.5 - x, 0.5 - y, 0.5 - z); } -shader node_magic_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - int depth = 2, - float Distortion = 5.0, - float Scale = 5.0, - point Vector = P, - output float Fac = 0.0, - output color Color = 0.0) +shader node_magic_texture(int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + int depth = 2, + float Distortion = 5.0, + float Scale = 5.0, + point Vector = P, + output float Fac = 0.0, + output color Color = 0.0) { - point p = Vector; + point p = Vector; - if (use_mapping) - p = transform(mapping, p); + if (use_mapping) + p = transform(mapping, p); - Color = magic(p * Scale, depth, Distortion); - Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0); + Color = magic(p * Scale, depth, Distortion); + Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0); } - diff --git a/intern/cycles/kernel/shaders/node_mapping.osl b/intern/cycles/kernel/shaders/node_mapping.osl index 69106957ee4..f5cc2d1c5dd 100644 --- a/intern/cycles/kernel/shaders/node_mapping.osl +++ b/intern/cycles/kernel/shaders/node_mapping.osl @@ -16,18 +16,17 @@ #include "stdosl.h" -shader node_mapping( - matrix Matrix = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - point mapping_min = point(0.0, 0.0, 0.0), - point mapping_max = point(0.0, 0.0, 0.0), - int use_minmax = 0, - point VectorIn = point(0.0, 0.0, 0.0), - output point VectorOut = point(0.0, 0.0, 0.0)) +shader node_mapping(matrix Matrix = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + point mapping_min = point(0.0, 0.0, 0.0), + point mapping_max = point(0.0, 0.0, 0.0), + int use_minmax = 0, + point VectorIn = point(0.0, 0.0, 0.0), + output point VectorOut = point(0.0, 0.0, 0.0)) { - point p = transform(Matrix, VectorIn); + point p = transform(Matrix, VectorIn); - if (use_minmax) - p = min(max(mapping_min, p), mapping_max); - - VectorOut = p; + if (use_minmax) + p = min(max(mapping_min, p), mapping_max); + + VectorOut = p; } diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl index aa9f6e671c3..8830339e05f 100644 --- a/intern/cycles/kernel/shaders/node_math.osl +++ b/intern/cycles/kernel/shaders/node_math.osl @@ -18,107 +18,105 @@ float safe_divide(float a, float b) { - float result; + float result; - if (b == 0.0) - result = 0.0; - else - result = a / b; - - return result; + if (b == 0.0) + result = 0.0; + else + result = a / b; + + return result; } float safe_modulo(float a, float b) { - float result; + float result; + + if (b == 0.0) + result = 0.0; + else + result = fmod(a, b); - if (b == 0.0) - result = 0.0; - else - result = fmod(a, b); - - return result; + return result; } float safe_sqrt(float a) { - float result; + float result; - if (a > 0.0) - result = sqrt(a); - else - result = 0.0; + if (a > 0.0) + result = sqrt(a); + else + result = 0.0; - return result; + return result; } float safe_log(float a, float b) { - if (a < 0.0 || b < 0.0) - return 0.0; - - return log(a) / log(b); + if (a < 0.0 || b < 0.0) + return 0.0; + + return log(a) / log(b); } -shader node_math( - string type = "add", - int use_clamp = 0, - float Value1 = 0.0, - float Value2 = 0.0, - output float Value = 0.0) +shader node_math(string type = "add", + int use_clamp = 0, + float Value1 = 0.0, + float Value2 = 0.0, + output float Value = 0.0) { - /* OSL asin, acos, pow check for values that could give rise to nan */ + /* OSL asin, acos, pow check for values that could give rise to nan */ - if (type == "add") - Value = Value1 + Value2; - else if (type == "subtract") - Value = Value1 - Value2; - else if (type == "multiply") - Value = Value1 * Value2; - else if (type == "divide") - Value = safe_divide(Value1, Value2); - else if (type == "sine") - Value = sin(Value1); - else if (type == "cosine") - Value = cos(Value1); - else if (type == "tangent") - Value = tan(Value1); - else if (type == "arcsine") - Value = asin(Value1); - else if (type == "arccosine") - Value = acos(Value1); - else if (type == "arctangent") - Value = atan(Value1); - else if (type == "power") - Value = pow(Value1, Value2); - else if (type == "logarithm") - Value = safe_log(Value1, Value2); - else if (type == "minimum") - Value = min(Value1, Value2); - else if (type == "maximum") - Value = max(Value1, Value2); - else if (type == "round") - Value = floor(Value1 + 0.5); - else if (type == "less_than") - Value = Value1 < Value2; - else if (type == "greater_than") - Value = Value1 > Value2; - else if (type == "modulo") - Value = safe_modulo(Value1, Value2); - else if (type == "absolute") - Value = fabs(Value1); - else if (type == "arctan2") - Value = atan2(Value1, Value2); - else if (type == "floor") - Value = floor(Value1); - else if (type == "ceil") - Value = ceil(Value1); - else if (type == "fract") - Value = Value1 - floor(Value1); - else if (type == "sqrt") - Value = safe_sqrt(Value1); + if (type == "add") + Value = Value1 + Value2; + else if (type == "subtract") + Value = Value1 - Value2; + else if (type == "multiply") + Value = Value1 * Value2; + else if (type == "divide") + Value = safe_divide(Value1, Value2); + else if (type == "sine") + Value = sin(Value1); + else if (type == "cosine") + Value = cos(Value1); + else if (type == "tangent") + Value = tan(Value1); + else if (type == "arcsine") + Value = asin(Value1); + else if (type == "arccosine") + Value = acos(Value1); + else if (type == "arctangent") + Value = atan(Value1); + else if (type == "power") + Value = pow(Value1, Value2); + else if (type == "logarithm") + Value = safe_log(Value1, Value2); + else if (type == "minimum") + Value = min(Value1, Value2); + else if (type == "maximum") + Value = max(Value1, Value2); + else if (type == "round") + Value = floor(Value1 + 0.5); + else if (type == "less_than") + Value = Value1 < Value2; + else if (type == "greater_than") + Value = Value1 > Value2; + else if (type == "modulo") + Value = safe_modulo(Value1, Value2); + else if (type == "absolute") + Value = fabs(Value1); + else if (type == "arctan2") + Value = atan2(Value1, Value2); + else if (type == "floor") + Value = floor(Value1); + else if (type == "ceil") + Value = ceil(Value1); + else if (type == "fract") + Value = Value1 - floor(Value1); + else if (type == "sqrt") + Value = safe_sqrt(Value1); - if (use_clamp) - Value = clamp(Value, 0.0, 1.0); + if (use_clamp) + Value = clamp(Value, 0.0, 1.0); } - diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl index 0862c34b6e1..8caea6803ed 100644 --- a/intern/cycles/kernel/shaders/node_mix.osl +++ b/intern/cycles/kernel/shaders/node_mix.osl @@ -19,311 +19,312 @@ color node_mix_blend(float t, color col1, color col2) { - return mix(col1, col2, t); + return mix(col1, col2, t); } color node_mix_add(float t, color col1, color col2) { - return mix(col1, col1 + col2, t); + return mix(col1, col1 + col2, t); } color node_mix_mul(float t, color col1, color col2) { - return mix(col1, col1 * col2, t); + return mix(col1, col1 * col2, t); } color node_mix_screen(float t, color col1, color col2) { - float tm = 1.0 - t; + float tm = 1.0 - t; - return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1); + return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1); } color node_mix_overlay(float t, color col1, color col2) { - float tm = 1.0 - t; - - color outcol = col1; - - if (outcol[0] < 0.5) - outcol[0] *= tm + 2.0 * t * col2[0]; - else - outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]); - - if (outcol[1] < 0.5) - outcol[1] *= tm + 2.0 * t * col2[1]; - else - outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]); - - if (outcol[2] < 0.5) - outcol[2] *= tm + 2.0 * t * col2[2]; - else - outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]); - - return outcol; + float tm = 1.0 - t; + + color outcol = col1; + + if (outcol[0] < 0.5) + outcol[0] *= tm + 2.0 * t * col2[0]; + else + outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]); + + if (outcol[1] < 0.5) + outcol[1] *= tm + 2.0 * t * col2[1]; + else + outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]); + + if (outcol[2] < 0.5) + outcol[2] *= tm + 2.0 * t * col2[2]; + else + outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]); + + return outcol; } color node_mix_sub(float t, color col1, color col2) { - return mix(col1, col1 - col2, t); + return mix(col1, col1 - col2, t); } color node_mix_div(float t, color col1, color col2) { - float tm = 1.0 - t; + float tm = 1.0 - t; - color outcol = col1; + color outcol = col1; - if (col2[0] != 0.0) outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0]; - if (col2[1] != 0.0) outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1]; - if (col2[2] != 0.0) outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2]; + if (col2[0] != 0.0) + outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0]; + if (col2[1] != 0.0) + outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1]; + if (col2[2] != 0.0) + outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2]; - return outcol; + return outcol; } color node_mix_diff(float t, color col1, color col2) { - return mix(col1, abs(col1 - col2), t); + return mix(col1, abs(col1 - col2), t); } color node_mix_dark(float t, color col1, color col2) { - return min(col1, col2) * t + col1 * (1.0 - t); + return min(col1, col2) * t + col1 * (1.0 - t); } color node_mix_light(float t, color col1, color col2) { - return max(col1, col2 * t); + return max(col1, col2 * t); } color node_mix_dodge(float t, color col1, color col2) { - color outcol = col1; - - if (outcol[0] != 0.0) { - float tmp = 1.0 - t * col2[0]; - if (tmp <= 0.0) - outcol[0] = 1.0; - else if ((tmp = outcol[0] / tmp) > 1.0) - outcol[0] = 1.0; - else - outcol[0] = tmp; - } - if (outcol[1] != 0.0) { - float tmp = 1.0 - t * col2[1]; - if (tmp <= 0.0) - outcol[1] = 1.0; - else if ((tmp = outcol[1] / tmp) > 1.0) - outcol[1] = 1.0; - else - outcol[1] = tmp; - } - if (outcol[2] != 0.0) { - float tmp = 1.0 - t * col2[2]; - if (tmp <= 0.0) - outcol[2] = 1.0; - else if ((tmp = outcol[2] / tmp) > 1.0) - outcol[2] = 1.0; - else - outcol[2] = tmp; - } - - return outcol; + color outcol = col1; + + if (outcol[0] != 0.0) { + float tmp = 1.0 - t * col2[0]; + if (tmp <= 0.0) + outcol[0] = 1.0; + else if ((tmp = outcol[0] / tmp) > 1.0) + outcol[0] = 1.0; + else + outcol[0] = tmp; + } + if (outcol[1] != 0.0) { + float tmp = 1.0 - t * col2[1]; + if (tmp <= 0.0) + outcol[1] = 1.0; + else if ((tmp = outcol[1] / tmp) > 1.0) + outcol[1] = 1.0; + else + outcol[1] = tmp; + } + if (outcol[2] != 0.0) { + float tmp = 1.0 - t * col2[2]; + if (tmp <= 0.0) + outcol[2] = 1.0; + else if ((tmp = outcol[2] / tmp) > 1.0) + outcol[2] = 1.0; + else + outcol[2] = tmp; + } + + return outcol; } color node_mix_burn(float t, color col1, color col2) { - float tmp, tm = 1.0 - t; - - color outcol = col1; - - tmp = tm + t * col2[0]; - if (tmp <= 0.0) - outcol[0] = 0.0; - else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0) - outcol[0] = 0.0; - else if (tmp > 1.0) - outcol[0] = 1.0; - else - outcol[0] = tmp; - - tmp = tm + t * col2[1]; - if (tmp <= 0.0) - outcol[1] = 0.0; - else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0) - outcol[1] = 0.0; - else if (tmp > 1.0) - outcol[1] = 1.0; - else - outcol[1] = tmp; - - tmp = tm + t * col2[2]; - if (tmp <= 0.0) - outcol[2] = 0.0; - else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0) - outcol[2] = 0.0; - else if (tmp > 1.0) - outcol[2] = 1.0; - else - outcol[2] = tmp; - - return outcol; + float tmp, tm = 1.0 - t; + + color outcol = col1; + + tmp = tm + t * col2[0]; + if (tmp <= 0.0) + outcol[0] = 0.0; + else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0) + outcol[0] = 0.0; + else if (tmp > 1.0) + outcol[0] = 1.0; + else + outcol[0] = tmp; + + tmp = tm + t * col2[1]; + if (tmp <= 0.0) + outcol[1] = 0.0; + else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0) + outcol[1] = 0.0; + else if (tmp > 1.0) + outcol[1] = 1.0; + else + outcol[1] = tmp; + + tmp = tm + t * col2[2]; + if (tmp <= 0.0) + outcol[2] = 0.0; + else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0) + outcol[2] = 0.0; + else if (tmp > 1.0) + outcol[2] = 1.0; + else + outcol[2] = tmp; + + return outcol; } color node_mix_hue(float t, color col1, color col2) { - color outcol = col1; - color hsv2 = rgb_to_hsv(col2); + color outcol = col1; + color hsv2 = rgb_to_hsv(col2); - if (hsv2[1] != 0.0) { - color hsv = rgb_to_hsv(outcol); - hsv[0] = hsv2[0]; - color tmp = hsv_to_rgb(hsv); + if (hsv2[1] != 0.0) { + color hsv = rgb_to_hsv(outcol); + hsv[0] = hsv2[0]; + color tmp = hsv_to_rgb(hsv); - outcol = mix(outcol, tmp, t); - } + outcol = mix(outcol, tmp, t); + } - return outcol; + return outcol; } color node_mix_sat(float t, color col1, color col2) { - float tm = 1.0 - t; + float tm = 1.0 - t; - color outcol = col1; + color outcol = col1; - color hsv = rgb_to_hsv(outcol); + color hsv = rgb_to_hsv(outcol); - if (hsv[1] != 0.0) { - color hsv2 = rgb_to_hsv(col2); + if (hsv[1] != 0.0) { + color hsv2 = rgb_to_hsv(col2); - hsv[1] = tm * hsv[1] + t * hsv2[1]; - outcol = hsv_to_rgb(hsv); - } + hsv[1] = tm * hsv[1] + t * hsv2[1]; + outcol = hsv_to_rgb(hsv); + } - return outcol; + return outcol; } color node_mix_val(float t, color col1, color col2) { - float tm = 1.0 - t; + float tm = 1.0 - t; - color hsv = rgb_to_hsv(col1); - color hsv2 = rgb_to_hsv(col2); + color hsv = rgb_to_hsv(col1); + color hsv2 = rgb_to_hsv(col2); - hsv[2] = tm * hsv[2] + t * hsv2[2]; + hsv[2] = tm * hsv[2] + t * hsv2[2]; - return hsv_to_rgb(hsv); + return hsv_to_rgb(hsv); } color node_mix_color(float t, color col1, color col2) { - color outcol = col1; - color hsv2 = rgb_to_hsv(col2); + color outcol = col1; + color hsv2 = rgb_to_hsv(col2); - if (hsv2[1] != 0.0) { - color hsv = rgb_to_hsv(outcol); - hsv[0] = hsv2[0]; - hsv[1] = hsv2[1]; - color tmp = hsv_to_rgb(hsv); + if (hsv2[1] != 0.0) { + color hsv = rgb_to_hsv(outcol); + hsv[0] = hsv2[0]; + hsv[1] = hsv2[1]; + color tmp = hsv_to_rgb(hsv); - outcol = mix(outcol, tmp, t); - } + outcol = mix(outcol, tmp, t); + } - return outcol; + return outcol; } color node_mix_soft(float t, color col1, color col2) { - float tm = 1.0 - t; + float tm = 1.0 - t; - color one = color(1.0); - color scr = one - (one - col2) * (one - col1); + color one = color(1.0); + color scr = one - (one - col2) * (one - col1); - return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr); + return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr); } color node_mix_linear(float t, color col1, color col2) { - color outcol = col1; - - if (col2[0] > 0.5) - outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5)); - else - outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0); - - if (col2[1] > 0.5) - outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5)); - else - outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0); - - if (col2[2] > 0.5) - outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5)); - else - outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0); - - return outcol; + color outcol = col1; + + if (col2[0] > 0.5) + outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5)); + else + outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0); + + if (col2[1] > 0.5) + outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5)); + else + outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0); + + if (col2[2] > 0.5) + outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5)); + else + outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0); + + return outcol; } color node_mix_clamp(color col) { - color outcol = col; + color outcol = col; - outcol[0] = clamp(col[0], 0.0, 1.0); - outcol[1] = clamp(col[1], 0.0, 1.0); - outcol[2] = clamp(col[2], 0.0, 1.0); + outcol[0] = clamp(col[0], 0.0, 1.0); + outcol[1] = clamp(col[1], 0.0, 1.0); + outcol[2] = clamp(col[2], 0.0, 1.0); - return outcol; + return outcol; } -shader node_mix( - string type = "mix", - int use_clamp = 0, - float Fac = 0.5, - color Color1 = 0.0, - color Color2 = 0.0, - output color Color = 0.0) +shader node_mix(string type = "mix", + int use_clamp = 0, + float Fac = 0.5, + color Color1 = 0.0, + color Color2 = 0.0, + output color Color = 0.0) { - float t = clamp(Fac, 0.0, 1.0); - - if (type == "mix") - Color = node_mix_blend(t, Color1, Color2); - if (type == "add") - Color = node_mix_add(t, Color1, Color2); - if (type == "multiply") - Color = node_mix_mul(t, Color1, Color2); - if (type == "screen") - Color = node_mix_screen(t, Color1, Color2); - if (type == "overlay") - Color = node_mix_overlay(t, Color1, Color2); - if (type == "subtract") - Color = node_mix_sub(t, Color1, Color2); - if (type == "divide") - Color = node_mix_div(t, Color1, Color2); - if (type == "difference") - Color = node_mix_diff(t, Color1, Color2); - if (type == "darken") - Color = node_mix_dark(t, Color1, Color2); - if (type == "lighten") - Color = node_mix_light(t, Color1, Color2); - if (type == "dodge") - Color = node_mix_dodge(t, Color1, Color2); - if (type == "burn") - Color = node_mix_burn(t, Color1, Color2); - if (type == "hue") - Color = node_mix_hue(t, Color1, Color2); - if (type == "saturation") - Color = node_mix_sat(t, Color1, Color2); - if (type == "value") - Color = node_mix_val (t, Color1, Color2); - if (type == "color") - Color = node_mix_color(t, Color1, Color2); - if (type == "soft_light") - Color = node_mix_soft(t, Color1, Color2); - if (type == "linear_light") - Color = node_mix_linear(t, Color1, Color2); - - if (use_clamp) - Color = node_mix_clamp(Color); + float t = clamp(Fac, 0.0, 1.0); + + if (type == "mix") + Color = node_mix_blend(t, Color1, Color2); + if (type == "add") + Color = node_mix_add(t, Color1, Color2); + if (type == "multiply") + Color = node_mix_mul(t, Color1, Color2); + if (type == "screen") + Color = node_mix_screen(t, Color1, Color2); + if (type == "overlay") + Color = node_mix_overlay(t, Color1, Color2); + if (type == "subtract") + Color = node_mix_sub(t, Color1, Color2); + if (type == "divide") + Color = node_mix_div(t, Color1, Color2); + if (type == "difference") + Color = node_mix_diff(t, Color1, Color2); + if (type == "darken") + Color = node_mix_dark(t, Color1, Color2); + if (type == "lighten") + Color = node_mix_light(t, Color1, Color2); + if (type == "dodge") + Color = node_mix_dodge(t, Color1, Color2); + if (type == "burn") + Color = node_mix_burn(t, Color1, Color2); + if (type == "hue") + Color = node_mix_hue(t, Color1, Color2); + if (type == "saturation") + Color = node_mix_sat(t, Color1, Color2); + if (type == "value") + Color = node_mix_val(t, Color1, Color2); + if (type == "color") + Color = node_mix_color(t, Color1, Color2); + if (type == "soft_light") + Color = node_mix_soft(t, Color1, Color2); + if (type == "linear_light") + Color = node_mix_linear(t, Color1, Color2); + + if (use_clamp) + Color = node_mix_clamp(Color); } - diff --git a/intern/cycles/kernel/shaders/node_mix_closure.osl b/intern/cycles/kernel/shaders/node_mix_closure.osl index 5946dfdaaba..517c59c8786 100644 --- a/intern/cycles/kernel/shaders/node_mix_closure.osl +++ b/intern/cycles/kernel/shaders/node_mix_closure.osl @@ -16,13 +16,11 @@ #include "stdosl.h" -shader node_mix_closure( - float Fac = 0.5, - closure color Closure1 = 0, - closure color Closure2 = 0, - output closure color Closure = 0) +shader node_mix_closure(float Fac = 0.5, + closure color Closure1 = 0, + closure color Closure2 = 0, + output closure color Closure = 0) { - float t = clamp(Fac, 0.0, 1.0); - Closure = (1.0 - t) * Closure1 + t * Closure2; + float t = clamp(Fac, 0.0, 1.0); + Closure = (1.0 - t) * Closure1 + t * Closure2; } - diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl index 454b3834081..a7877c43d46 100644 --- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl +++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl @@ -28,24 +28,24 @@ float noise_musgrave_fBm(point ip, float H, float lacunarity, float octaves) { - float rmd; - float value = 0.0; - float pwr = 1.0; - float pwHL = pow(lacunarity, -H); - int i; - point p = ip; - - for (i = 0; i < (int)octaves; i++) { - value += safe_noise(p, "signed") * pwr; - pwr *= pwHL; - p *= lacunarity; - } - - rmd = octaves - floor(octaves); - if (rmd != 0.0) - value += rmd * safe_noise(p, "signed") * pwr; - - return value; + float rmd; + float value = 0.0; + float pwr = 1.0; + float pwHL = pow(lacunarity, -H); + int i; + point p = ip; + + for (i = 0; i < (int)octaves; i++) { + value += safe_noise(p, "signed") * pwr; + pwr *= pwHL; + p *= lacunarity; + } + + rmd = octaves - floor(octaves); + if (rmd != 0.0) + value += rmd * safe_noise(p, "signed") * pwr; + + return value; } /* Musgrave Multifractal @@ -57,24 +57,24 @@ float noise_musgrave_fBm(point ip, float H, float lacunarity, float octaves) float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float octaves) { - float rmd; - float value = 1.0; - float pwr = 1.0; - float pwHL = pow(lacunarity, -H); - int i; - point p = ip; - - for (i = 0; i < (int)octaves; i++) { - value *= (pwr * safe_noise(p, "signed") + 1.0); - pwr *= pwHL; - p *= lacunarity; - } - - rmd = octaves - floor(octaves); - if (rmd != 0.0) - value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */ - - return value; + float rmd; + float value = 1.0; + float pwr = 1.0; + float pwHL = pow(lacunarity, -H); + int i; + point p = ip; + + for (i = 0; i < (int)octaves; i++) { + value *= (pwr * safe_noise(p, "signed") + 1.0); + pwr *= pwHL; + p *= lacunarity; + } + + rmd = octaves - floor(octaves); + if (rmd != 0.0) + value *= (rmd * pwr * safe_noise(p, "signed") + 1.0); /* correct? */ + + return value; } /* Musgrave Heterogeneous Terrain @@ -85,32 +85,33 @@ float noise_musgrave_multi_fractal(point ip, float H, float lacunarity, float oc * offset: raises the terrain from `sea level' */ -float noise_musgrave_hetero_terrain(point ip, float H, float lacunarity, float octaves, float offset) +float noise_musgrave_hetero_terrain( + point ip, float H, float lacunarity, float octaves, float offset) { - float value, increment, rmd; - float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - int i; - point p = ip; - - /* first unscaled octave of function; later octaves are scaled */ - value = offset + safe_noise(p, "signed"); - p *= lacunarity; - - for (i = 1; i < (int)octaves; i++) { - increment = (safe_noise(p, "signed") + offset) * pwr * value; - value += increment; - pwr *= pwHL; - p *= lacunarity; - } - - rmd = octaves - floor(octaves); - if (rmd != 0.0) { - increment = (safe_noise(p, "signed") + offset) * pwr * value; - value += rmd * increment; - } - - return value; + float value, increment, rmd; + float pwHL = pow(lacunarity, -H); + float pwr = pwHL; + int i; + point p = ip; + + /* first unscaled octave of function; later octaves are scaled */ + value = offset + safe_noise(p, "signed"); + p *= lacunarity; + + for (i = 1; i < (int)octaves; i++) { + increment = (safe_noise(p, "signed") + offset) * pwr * value; + value += increment; + pwr *= pwHL; + p *= lacunarity; + } + + rmd = octaves - floor(octaves); + if (rmd != 0.0) { + increment = (safe_noise(p, "signed") + offset) * pwr * value; + value += rmd * increment; + } + + return value; } /* Hybrid Additive/Multiplicative Multifractal Terrain @@ -121,35 +122,35 @@ float noise_musgrave_hetero_terrain(point ip, float H, float lacunarity, float o * offset: raises the terrain from `sea level' */ -float noise_musgrave_hybrid_multi_fractal(point ip, float H, float lacunarity, - float octaves, float offset, float gain) +float noise_musgrave_hybrid_multi_fractal( + point ip, float H, float lacunarity, float octaves, float offset, float gain) { - float result, signal, weight, rmd; - float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - int i; - point p = ip; - - result = safe_noise(p, "signed") + offset; - weight = gain * result; - p *= lacunarity; - - for (i = 1; (weight > 0.001) && (i < (int)octaves); i++) { - if (weight > 1.0) - weight = 1.0; - - signal = (safe_noise(p, "signed") + offset) * pwr; - pwr *= pwHL; - result += weight * signal; - weight *= gain * signal; - p *= lacunarity; - } - - rmd = octaves - floor(octaves); - if (rmd != 0.0) - result += rmd * ((safe_noise(p, "signed") + offset) * pwr); - - return result; + float result, signal, weight, rmd; + float pwHL = pow(lacunarity, -H); + float pwr = pwHL; + int i; + point p = ip; + + result = safe_noise(p, "signed") + offset; + weight = gain * result; + p *= lacunarity; + + for (i = 1; (weight > 0.001) && (i < (int)octaves); i++) { + if (weight > 1.0) + weight = 1.0; + + signal = (safe_noise(p, "signed") + offset) * pwr; + pwr *= pwHL; + result += weight * signal; + weight *= gain * signal; + p *= lacunarity; + } + + rmd = octaves - floor(octaves); + if (rmd != 0.0) + result += rmd * ((safe_noise(p, "signed") + offset) * pwr); + + return result; } /* Ridged Multifractal Terrain @@ -160,72 +161,73 @@ float noise_musgrave_hybrid_multi_fractal(point ip, float H, float lacunarity, * offset: raises the terrain from `sea level' */ -float noise_musgrave_ridged_multi_fractal(point ip, float H, float lacunarity, - float octaves, float offset, float gain) +float noise_musgrave_ridged_multi_fractal( + point ip, float H, float lacunarity, float octaves, float offset, float gain) { - float result, signal, weight; - float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - int i; - point p = ip; - - signal = offset - fabs(safe_noise(p, "signed")); - signal *= signal; - result = signal; - weight = 1.0; - - for (i = 1; i < (int)octaves; i++) { - p *= lacunarity; - weight = clamp(signal * gain, 0.0, 1.0); - signal = offset - fabs(safe_noise(p, "signed")); - signal *= signal; - signal *= weight; - result += signal * pwr; - pwr *= pwHL; - } - - return result; + float result, signal, weight; + float pwHL = pow(lacunarity, -H); + float pwr = pwHL; + int i; + point p = ip; + + signal = offset - fabs(safe_noise(p, "signed")); + signal *= signal; + result = signal; + weight = 1.0; + + for (i = 1; i < (int)octaves; i++) { + p *= lacunarity; + weight = clamp(signal * gain, 0.0, 1.0); + signal = offset - fabs(safe_noise(p, "signed")); + signal *= signal; + signal *= weight; + result += signal * pwr; + pwr *= pwHL; + } + + return result; } /* Shader */ shader node_musgrave_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - string type = "fBM", - float Dimension = 2.0, - float Lacunarity = 1.0, - float Detail = 2.0, - float Offset = 0.0, - float Gain = 1.0, - float Scale = 5.0, - point Vector = P, - output float Fac = 0.0, - output color Color = 0.0) + int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + string type = "fBM", + float Dimension = 2.0, + float Lacunarity = 1.0, + float Detail = 2.0, + float Offset = 0.0, + float Gain = 1.0, + float Scale = 5.0, + point Vector = P, + output float Fac = 0.0, + output color Color = 0.0) { - float dimension = max(Dimension, 1e-5); - float octaves = clamp(Detail, 0.0, 16.0); - float lacunarity = max(Lacunarity, 1e-5); - float intensity = 1.0; - - point p = Vector; - - if (use_mapping) - p = transform(mapping, p); - - p = p * Scale; - - if (type == "multifractal") - Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves); - else if (type == "fBM") - Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves); - else if (type == "hybrid_multifractal") - Fac = intensity * noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain); - else if (type == "ridged_multifractal") - Fac = intensity * noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain); - else if (type == "hetero_terrain") - Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset); - - Color = color(Fac, Fac, Fac); + float dimension = max(Dimension, 1e-5); + float octaves = clamp(Detail, 0.0, 16.0); + float lacunarity = max(Lacunarity, 1e-5); + float intensity = 1.0; + + point p = Vector; + + if (use_mapping) + p = transform(mapping, p); + + p = p * Scale; + + if (type == "multifractal") + Fac = intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves); + else if (type == "fBM") + Fac = intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves); + else if (type == "hybrid_multifractal") + Fac = intensity * + noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain); + else if (type == "ridged_multifractal") + Fac = intensity * + noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, Offset, Gain); + else if (type == "hetero_terrain") + Fac = intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, Offset); + + Color = color(Fac, Fac, Fac); } - diff --git a/intern/cycles/kernel/shaders/node_noise_texture.osl b/intern/cycles/kernel/shaders/node_noise_texture.osl index 42a30897341..2cbd571e206 100644 --- a/intern/cycles/kernel/shaders/node_noise_texture.osl +++ b/intern/cycles/kernel/shaders/node_noise_texture.osl @@ -21,41 +21,40 @@ float noise(point ip, float distortion, float detail, output color Color) { - point r; - point p = ip; - int hard = 0; - - if (distortion != 0.0) { - r[0] = safe_noise(p + point(13.5), "unsigned") * distortion; - r[1] = safe_noise(p, "unsigned") * distortion; - r[2] = safe_noise(p - point(13.5), "unsigned") * distortion; - - p += r; - } - - float fac = noise_turbulence(p, detail, hard); - - Color = color(fac, noise_turbulence(point(p[1], p[0], p[2]), detail, hard), - noise_turbulence(point(p[1], p[2], p[0]), detail, hard)); - - return fac; + point r; + point p = ip; + int hard = 0; + + if (distortion != 0.0) { + r[0] = safe_noise(p + point(13.5), "unsigned") * distortion; + r[1] = safe_noise(p, "unsigned") * distortion; + r[2] = safe_noise(p - point(13.5), "unsigned") * distortion; + + p += r; + } + + float fac = noise_turbulence(p, detail, hard); + + Color = color(fac, + noise_turbulence(point(p[1], p[0], p[2]), detail, hard), + noise_turbulence(point(p[1], p[2], p[0]), detail, hard)); + + return fac; } -shader node_noise_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - float Distortion = 0.0, - float Scale = 5.0, - float Detail = 2.0, - point Vector = P, - output float Fac = 0.0, - output color Color = 0.0) +shader node_noise_texture(int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + float Distortion = 0.0, + float Scale = 5.0, + float Detail = 2.0, + point Vector = P, + output float Fac = 0.0, + output color Color = 0.0) { - point p = Vector; + point p = Vector; - if (use_mapping) - p = transform(mapping, p); + if (use_mapping) + p = transform(mapping, p); - Fac = noise(p * Scale, Distortion, Detail, Color); + Fac = noise(p * Scale, Distortion, Detail, Color); } - diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl index 7307971eddd..1d20c3e7cac 100644 --- a/intern/cycles/kernel/shaders/node_normal.osl +++ b/intern/cycles/kernel/shaders/node_normal.osl @@ -16,13 +16,11 @@ #include "stdosl.h" -shader node_normal( - normal direction = normal(0.0, 0.0, 0.0), - normal NormalIn = normal(0.0, 0.0, 0.0), - output normal NormalOut = normal(0.0, 0.0, 0.0), - output float Dot = 1.0) +shader node_normal(normal direction = normal(0.0, 0.0, 0.0), + normal NormalIn = normal(0.0, 0.0, 0.0), + output normal NormalOut = normal(0.0, 0.0, 0.0), + output float Dot = 1.0) { - NormalOut = normalize(direction); - Dot = dot(NormalOut, normalize(NormalIn)); + NormalOut = normalize(direction); + Dot = dot(NormalOut, normalize(NormalIn)); } - diff --git a/intern/cycles/kernel/shaders/node_normal_map.osl b/intern/cycles/kernel/shaders/node_normal_map.osl index fda6f12a5da..90b593d00bc 100644 --- a/intern/cycles/kernel/shaders/node_normal_map.osl +++ b/intern/cycles/kernel/shaders/node_normal_map.osl @@ -16,79 +16,75 @@ #include "stdosl.h" -shader node_normal_map( - normal NormalIn = N, - float Strength = 1.0, - color Color = color(0.5, 0.5, 1.0), - string space = "tangent", - string attr_name = "geom:tangent", - string attr_sign_name = "geom:tangent_sign", - output normal Normal = NormalIn) +shader node_normal_map(normal NormalIn = N, + float Strength = 1.0, + color Color = color(0.5, 0.5, 1.0), + string space = "tangent", + string attr_name = "geom:tangent", + string attr_sign_name = "geom:tangent_sign", + output normal Normal = NormalIn) { - color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5); - int is_backfacing = backfacing(); + color mcolor = 2.0 * color(Color[0] - 0.5, Color[1] - 0.5, Color[2] - 0.5); + int is_backfacing = backfacing(); - if (space == "tangent") { - vector tangent; - vector ninterp; - float tangent_sign; - float is_smooth; + if (space == "tangent") { + vector tangent; + vector ninterp; + float tangent_sign; + float is_smooth; - getattribute("geom:is_smooth", is_smooth); - if (!is_smooth) { - ninterp = normalize(transform("world", "object", Ng)); + getattribute("geom:is_smooth", is_smooth); + if (!is_smooth) { + ninterp = normalize(transform("world", "object", Ng)); - /* the normal is already inverted, which is too soon for the math here */ - if (is_backfacing) { - ninterp = -ninterp; - } - } + /* the normal is already inverted, which is too soon for the math here */ + if (is_backfacing) { + ninterp = -ninterp; + } + } - // get _unnormalized_ interpolated normal and tangent - if (getattribute(attr_name, tangent) && - getattribute(attr_sign_name, tangent_sign) && - (!is_smooth || getattribute("geom:N", ninterp))) - { - // apply normal map - vector B = tangent_sign * cross(ninterp, tangent); - Normal = normalize(mcolor[0] * tangent + mcolor[1] * B + mcolor[2] * ninterp); + // get _unnormalized_ interpolated normal and tangent + if (getattribute(attr_name, tangent) && getattribute(attr_sign_name, tangent_sign) && + (!is_smooth || getattribute("geom:N", ninterp))) { + // apply normal map + vector B = tangent_sign * cross(ninterp, tangent); + Normal = normalize(mcolor[0] * tangent + mcolor[1] * B + mcolor[2] * ninterp); - // transform to world space - Normal = normalize(transform("object", "world", Normal)); - } - else { - Normal = normal(0, 0, 0); - } - } - else if (space == "object") { - Normal = normalize(transform("object", "world", vector(mcolor))); - } - else if (space == "world") { - Normal = normalize(vector(mcolor)); - } - else if (space == "blender_object") { - /* strange blender convention */ - mcolor[1] = -mcolor[1]; - mcolor[2] = -mcolor[2]; - - Normal = normalize(transform("object", "world", vector(mcolor))); - } - else if (space == "blender_world") { - /* strange blender convention */ - mcolor[1] = -mcolor[1]; - mcolor[2] = -mcolor[2]; - - Normal = normalize(vector(mcolor)); - } + // transform to world space + Normal = normalize(transform("object", "world", Normal)); + } + else { + Normal = normal(0, 0, 0); + } + } + else if (space == "object") { + Normal = normalize(transform("object", "world", vector(mcolor))); + } + else if (space == "world") { + Normal = normalize(vector(mcolor)); + } + else if (space == "blender_object") { + /* strange blender convention */ + mcolor[1] = -mcolor[1]; + mcolor[2] = -mcolor[2]; - /* invert normal for backfacing polygons */ - if (is_backfacing) { - Normal = -Normal; - } + Normal = normalize(transform("object", "world", vector(mcolor))); + } + else if (space == "blender_world") { + /* strange blender convention */ + mcolor[1] = -mcolor[1]; + mcolor[2] = -mcolor[2]; - if (Strength != 1.0) - Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0)); + Normal = normalize(vector(mcolor)); + } - Normal = ensure_valid_reflection(Ng, I, Normal); -} + /* invert normal for backfacing polygons */ + if (is_backfacing) { + Normal = -Normal; + } + + if (Strength != 1.0) + Normal = normalize(NormalIn + (Normal - NormalIn) * max(Strength, 0.0)); + Normal = ensure_valid_reflection(Ng, I, Normal); +} diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl index dd7c663b8d8..0904a30a53f 100644 --- a/intern/cycles/kernel/shaders/node_object_info.osl +++ b/intern/cycles/kernel/shaders/node_object_info.osl @@ -16,15 +16,13 @@ #include "stdosl.h" -shader node_object_info( - output point Location = point(0.0, 0.0, 0.0), - output float ObjectIndex = 0.0, - output float MaterialIndex = 0.0, - output float Random = 0.0) +shader node_object_info(output point Location = point(0.0, 0.0, 0.0), + output float ObjectIndex = 0.0, + output float MaterialIndex = 0.0, + output float Random = 0.0) { - getattribute("object:location", Location); - getattribute("object:index", ObjectIndex); - getattribute("material:index", MaterialIndex); - getattribute("object:random", Random); + getattribute("object:location", Location); + getattribute("object:index", ObjectIndex); + getattribute("material:index", MaterialIndex); + getattribute("object:random", Random); } - diff --git a/intern/cycles/kernel/shaders/node_output_displacement.osl b/intern/cycles/kernel/shaders/node_output_displacement.osl index 5dbef0244fe..fa7f603980b 100644 --- a/intern/cycles/kernel/shaders/node_output_displacement.osl +++ b/intern/cycles/kernel/shaders/node_output_displacement.osl @@ -18,6 +18,5 @@ displacement node_output_displacement(vector Displacement = 0.0) { - P += Displacement; + P += Displacement; } - diff --git a/intern/cycles/kernel/shaders/node_output_surface.osl b/intern/cycles/kernel/shaders/node_output_surface.osl index 2cc4575a8c8..013666145da 100644 --- a/intern/cycles/kernel/shaders/node_output_surface.osl +++ b/intern/cycles/kernel/shaders/node_output_surface.osl @@ -18,6 +18,5 @@ surface node_output_surface(closure color Surface = 0) { - Ci = Surface; + Ci = Surface; } - diff --git a/intern/cycles/kernel/shaders/node_output_volume.osl b/intern/cycles/kernel/shaders/node_output_volume.osl index f220ba866e3..dd479e751b3 100644 --- a/intern/cycles/kernel/shaders/node_output_volume.osl +++ b/intern/cycles/kernel/shaders/node_output_volume.osl @@ -18,6 +18,5 @@ volume node_output_volume(closure color Volume = 0) { - Ci = Volume; + Ci = Volume; } - diff --git a/intern/cycles/kernel/shaders/node_particle_info.osl b/intern/cycles/kernel/shaders/node_particle_info.osl index 2a0252d5e45..e286c33a1ff 100644 --- a/intern/cycles/kernel/shaders/node_particle_info.osl +++ b/intern/cycles/kernel/shaders/node_particle_info.osl @@ -16,23 +16,21 @@ #include "stdosl.h" -shader node_particle_info( - output float Index = 0.0, - output float Random = 0.0, - output float Age = 0.0, - output float Lifetime = 0.0, - output point Location = point(0.0, 0.0, 0.0), - output float Size = 0.0, - output vector Velocity = point(0.0, 0.0, 0.0), - output vector AngularVelocity = point(0.0, 0.0, 0.0)) +shader node_particle_info(output float Index = 0.0, + output float Random = 0.0, + output float Age = 0.0, + output float Lifetime = 0.0, + output point Location = point(0.0, 0.0, 0.0), + output float Size = 0.0, + output vector Velocity = point(0.0, 0.0, 0.0), + output vector AngularVelocity = point(0.0, 0.0, 0.0)) { - getattribute("particle:index", Index); - getattribute("particle:random", Random); - getattribute("particle:age", Age); - getattribute("particle:lifetime", Lifetime); - getattribute("particle:location", Location); - getattribute("particle:size", Size); - getattribute("particle:velocity", Velocity); - getattribute("particle:angular_velocity", AngularVelocity); + getattribute("particle:index", Index); + getattribute("particle:random", Random); + getattribute("particle:age", Age); + getattribute("particle:lifetime", Lifetime); + getattribute("particle:location", Location); + getattribute("particle:size", Size); + getattribute("particle:velocity", Velocity); + getattribute("particle:angular_velocity", AngularVelocity); } - diff --git a/intern/cycles/kernel/shaders/node_principled_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_bsdf.osl index 6f54ba3a462..657ced9b6e6 100644 --- a/intern/cycles/kernel/shaders/node_principled_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_principled_bsdf.osl @@ -17,111 +17,144 @@ #include "stdosl.h" #include "node_fresnel.h" -shader node_principled_bsdf( - string distribution = "Multiscatter GGX", - string subsurface_method = "burley", - color BaseColor = color(0.8, 0.8, 0.8), - float Subsurface = 0.0, - vector SubsurfaceRadius = vector(1.0, 1.0, 1.0), - color SubsurfaceColor = color(0.7, 0.1, 0.1), - float Metallic = 0.0, - float Specular = 0.5, - float SpecularTint = 0.0, - float Roughness = 0.5, - float Anisotropic = 0.0, - float AnisotropicRotation = 0.0, - float Sheen = 0.0, - float SheenTint = 0.5, - float Clearcoat = 0.0, - float ClearcoatRoughness = 0.03, - float IOR = 1.45, - float Transmission = 0.0, - float TransmissionRoughness = 0.0, - normal Normal = N, - normal ClearcoatNormal = N, - normal Tangent = normalize(dPdu), - output closure color BSDF = 0) +shader node_principled_bsdf(string distribution = "Multiscatter GGX", + string subsurface_method = "burley", + color BaseColor = color(0.8, 0.8, 0.8), + float Subsurface = 0.0, + vector SubsurfaceRadius = vector(1.0, 1.0, 1.0), + color SubsurfaceColor = color(0.7, 0.1, 0.1), + float Metallic = 0.0, + float Specular = 0.5, + float SpecularTint = 0.0, + float Roughness = 0.5, + float Anisotropic = 0.0, + float AnisotropicRotation = 0.0, + float Sheen = 0.0, + float SheenTint = 0.5, + float Clearcoat = 0.0, + float ClearcoatRoughness = 0.03, + float IOR = 1.45, + float Transmission = 0.0, + float TransmissionRoughness = 0.0, + normal Normal = N, + normal ClearcoatNormal = N, + normal Tangent = normalize(dPdu), + output closure color BSDF = 0) { - float f = max(IOR, 1e-5); - float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transmission, 0.0, 1.0)); - float final_transmission = clamp(Transmission, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0)); - float specular_weight = (1.0 - final_transmission); - - vector T = Tangent; - - float m_cdlum = luminance(BaseColor); - color m_ctint = m_cdlum > 0.0 ? BaseColor / m_cdlum : color(0.0, 0.0, 0.0); // normalize lum. to isolate hue+sat - - /* rotate tangent */ - if (AnisotropicRotation != 0.0) - T = rotate(T, AnisotropicRotation * M_2PI, point(0.0, 0.0, 0.0), Normal); - - if (diffuse_weight > 1e-5) { - if (Subsurface > 1e-5) { - color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface); - if (subsurface_method == "burley") { - BSDF = mixed_ss_base_color * bssrdf("principled", Normal, Subsurface * SubsurfaceRadius, SubsurfaceColor, "roughness", Roughness); - } - else { - BSDF = mixed_ss_base_color * bssrdf("principled_random_walk", Normal, Subsurface * SubsurfaceRadius, mixed_ss_base_color, "roughness", Roughness); - } - } - else { - BSDF = BaseColor * principled_diffuse(Normal, Roughness); - } - - if (Sheen > 1e-5) { - color sheen_color = color(1.0, 1.0, 1.0) * (1.0 - SheenTint) + m_ctint * SheenTint; - - BSDF = BSDF + sheen_color * Sheen * principled_sheen(Normal); - } - - BSDF = BSDF * diffuse_weight; - } - - if (specular_weight > 1e-5) { - float aspect = sqrt(1.0 - Anisotropic * 0.9); - float r2 = Roughness * Roughness; - - float alpha_x = r2 / aspect; - float alpha_y = r2 * aspect; - - color tmp_col = color(1.0, 1.0, 1.0) * (1.0 - SpecularTint) + m_ctint * SpecularTint; - - color Cspec0 = (Specular * 0.08 * tmp_col) * (1.0 - Metallic) + BaseColor * Metallic; - - if (distribution == "GGX" || Roughness <= 0.075) { - BSDF = BSDF + specular_weight * microfacet_ggx_aniso_fresnel(Normal, T, alpha_x, alpha_y, (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, BaseColor, Cspec0); - } else { - BSDF = BSDF + specular_weight * microfacet_multi_ggx_aniso_fresnel(Normal, T, alpha_x, alpha_y, (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, BaseColor, Cspec0); - } - } - - if (final_transmission > 1e-5) { - color Cspec0 = BaseColor * SpecularTint + color(1.0, 1.0, 1.0) * (1.0 - SpecularTint); - float eta = backfacing() ? 1.0 / f : f; - - if (distribution == "GGX" || Roughness <= 5e-2) { - float cosNO = dot(Normal, I); - float Fr = fresnel_dielectric_cos(cosNO, eta); - - float refl_roughness = Roughness; - if (Roughness <= 1e-2) - refl_roughness = 0.0; - - float transmission_roughness = refl_roughness; - if (distribution == "GGX") - transmission_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - TransmissionRoughness); - - BSDF = BSDF + final_transmission * (Fr * microfacet_ggx_fresnel(Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) + - (1.0 - Fr) * BaseColor * microfacet_ggx_refraction(Normal, transmission_roughness * transmission_roughness, eta)); - } else { - BSDF = BSDF + final_transmission * microfacet_multi_ggx_glass_fresnel(Normal, Roughness * Roughness, eta, BaseColor, Cspec0); - } - } - - if (Clearcoat > 1e-5) { - BSDF = BSDF + principled_clearcoat(ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness); - } + float f = max(IOR, 1e-5); + float diffuse_weight = (1.0 - clamp(Metallic, 0.0, 1.0)) * (1.0 - clamp(Transmission, 0.0, 1.0)); + float final_transmission = clamp(Transmission, 0.0, 1.0) * (1.0 - clamp(Metallic, 0.0, 1.0)); + float specular_weight = (1.0 - final_transmission); + + vector T = Tangent; + + float m_cdlum = luminance(BaseColor); + color m_ctint = m_cdlum > 0.0 ? BaseColor / m_cdlum : + color(0.0, 0.0, 0.0); // normalize lum. to isolate hue+sat + + /* rotate tangent */ + if (AnisotropicRotation != 0.0) + T = rotate(T, AnisotropicRotation * M_2PI, point(0.0, 0.0, 0.0), Normal); + + if (diffuse_weight > 1e-5) { + if (Subsurface > 1e-5) { + color mixed_ss_base_color = SubsurfaceColor * Subsurface + BaseColor * (1.0 - Subsurface); + if (subsurface_method == "burley") { + BSDF = mixed_ss_base_color * bssrdf("principled", + Normal, + Subsurface * SubsurfaceRadius, + SubsurfaceColor, + "roughness", + Roughness); + } + else { + BSDF = mixed_ss_base_color * bssrdf("principled_random_walk", + Normal, + Subsurface * SubsurfaceRadius, + mixed_ss_base_color, + "roughness", + Roughness); + } + } + else { + BSDF = BaseColor * principled_diffuse(Normal, Roughness); + } + + if (Sheen > 1e-5) { + color sheen_color = color(1.0, 1.0, 1.0) * (1.0 - SheenTint) + m_ctint * SheenTint; + + BSDF = BSDF + sheen_color * Sheen * principled_sheen(Normal); + } + + BSDF = BSDF * diffuse_weight; + } + + if (specular_weight > 1e-5) { + float aspect = sqrt(1.0 - Anisotropic * 0.9); + float r2 = Roughness * Roughness; + + float alpha_x = r2 / aspect; + float alpha_y = r2 * aspect; + + color tmp_col = color(1.0, 1.0, 1.0) * (1.0 - SpecularTint) + m_ctint * SpecularTint; + + color Cspec0 = (Specular * 0.08 * tmp_col) * (1.0 - Metallic) + BaseColor * Metallic; + + if (distribution == "GGX" || Roughness <= 0.075) { + BSDF = BSDF + specular_weight * + microfacet_ggx_aniso_fresnel(Normal, + T, + alpha_x, + alpha_y, + (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, + BaseColor, + Cspec0); + } + else { + BSDF = BSDF + specular_weight * microfacet_multi_ggx_aniso_fresnel( + Normal, + T, + alpha_x, + alpha_y, + (2.0 / (1.0 - sqrt(0.08 * Specular))) - 1.0, + BaseColor, + Cspec0); + } + } + + if (final_transmission > 1e-5) { + color Cspec0 = BaseColor * SpecularTint + color(1.0, 1.0, 1.0) * (1.0 - SpecularTint); + float eta = backfacing() ? 1.0 / f : f; + + if (distribution == "GGX" || Roughness <= 5e-2) { + float cosNO = dot(Normal, I); + float Fr = fresnel_dielectric_cos(cosNO, eta); + + float refl_roughness = Roughness; + if (Roughness <= 1e-2) + refl_roughness = 0.0; + + float transmission_roughness = refl_roughness; + if (distribution == "GGX") + transmission_roughness = 1.0 - (1.0 - refl_roughness) * (1.0 - TransmissionRoughness); + + BSDF = BSDF + + final_transmission * + (Fr * microfacet_ggx_fresnel( + Normal, refl_roughness * refl_roughness, eta, BaseColor, Cspec0) + + (1.0 - Fr) * BaseColor * + microfacet_ggx_refraction( + Normal, transmission_roughness * transmission_roughness, eta)); + } + else { + BSDF = BSDF + + final_transmission * microfacet_multi_ggx_glass_fresnel( + Normal, Roughness * Roughness, eta, BaseColor, Cspec0); + } + } + + if (Clearcoat > 1e-5) { + BSDF = BSDF + principled_clearcoat( + ClearcoatNormal, Clearcoat, ClearcoatRoughness * ClearcoatRoughness); + } } - diff --git a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl index 757a88f8ece..bf986438fca 100644 --- a/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_principled_hair_bsdf.osl @@ -18,88 +18,88 @@ color log3(color a) { - return color(log(a[0]), log(a[1]), log(a[2])); + return color(log(a[0]), log(a[1]), log(a[2])); } color sigma_from_concentration(float eumelanin, float pheomelanin) { - return eumelanin*color(0.506, 0.841, 1.653) + pheomelanin*color(0.343, 0.733, 1.924); + return eumelanin * color(0.506, 0.841, 1.653) + pheomelanin * color(0.343, 0.733, 1.924); } color sigma_from_reflectance(color c, float azimuthal_roughness) { - float x = azimuthal_roughness; - float roughness_fac = (((((0.245*x) + 5.574)*x - 10.73)*x + 2.532)*x - 0.215)*x + 5.969; - color sigma = log3(c) / roughness_fac; - return sigma * sigma; + float x = azimuthal_roughness; + float roughness_fac = (((((0.245 * x) + 5.574) * x - 10.73) * x + 2.532) * x - 0.215) * x + + 5.969; + color sigma = log3(c) / roughness_fac; + return sigma * sigma; } -shader node_principled_hair_bsdf( - color Color = color(0.017513, 0.005763, 0.002059), - float Melanin = 0.8, - float MelaninRedness = 1.0, - float RandomColor = 0.0, - color Tint = 1.0, - color AbsorptionCoefficient = color(0.245531, 0.52, 1.365), - normal Normal = Ng, - string parametrization = "Absorption coefficient", - float Offset = radians(2), - float Roughness = 0.3, - float RadialRoughness = 0.3, - float RandomRoughness = 0.0, - float Coat = 0.0, - float IOR = 1.55, - string AttrRandom = "geom:curve_random", - float Random = 0.0, +shader node_principled_hair_bsdf(color Color = color(0.017513, 0.005763, 0.002059), + float Melanin = 0.8, + float MelaninRedness = 1.0, + float RandomColor = 0.0, + color Tint = 1.0, + color AbsorptionCoefficient = color(0.245531, 0.52, 1.365), + normal Normal = Ng, + string parametrization = "Absorption coefficient", + float Offset = radians(2), + float Roughness = 0.3, + float RadialRoughness = 0.3, + float RandomRoughness = 0.0, + float Coat = 0.0, + float IOR = 1.55, + string AttrRandom = "geom:curve_random", + float Random = 0.0, - output closure color BSDF = 0) + output closure color BSDF = 0) { - /* Get random value from curve in none is specified. */ - float random_value = 0.0; + /* Get random value from curve in none is specified. */ + float random_value = 0.0; - if (isconnected(Random)) { - random_value = Random; - } - else { - getattribute(AttrRandom, random_value); - } + if (isconnected(Random)) { + random_value = Random; + } + else { + getattribute(AttrRandom, random_value); + } - /* Compute roughness. */ - float factor_random_roughness = 1.0 + 2.0*(random_value - 0.5)*RandomRoughness; - float m0_roughness = 1.0 - clamp(Coat, 0.0, 1.0); - float roughness = Roughness*factor_random_roughness; - float radial_roughness = RadialRoughness*factor_random_roughness; + /* Compute roughness. */ + float factor_random_roughness = 1.0 + 2.0 * (random_value - 0.5) * RandomRoughness; + float m0_roughness = 1.0 - clamp(Coat, 0.0, 1.0); + float roughness = Roughness * factor_random_roughness; + float radial_roughness = RadialRoughness * factor_random_roughness; - /* Compute absorption. */ - color sigma; + /* Compute absorption. */ + color sigma; - if (parametrization == "Absorption coefficient") { - sigma = AbsorptionCoefficient; - } - else if (parametrization == "Melanin concentration") { - /* Randomize melanin. */ - float factor_random_color = 1.0 + 2.0*(random_value - 0.5) * RandomColor; - float melanin = Melanin * factor_random_color; + if (parametrization == "Absorption coefficient") { + sigma = AbsorptionCoefficient; + } + else if (parametrization == "Melanin concentration") { + /* Randomize melanin. */ + float factor_random_color = 1.0 + 2.0 * (random_value - 0.5) * RandomColor; + float melanin = Melanin * factor_random_color; - /* Map melanin 0..inf from more perceptually linear 0..1. */ - melanin = -log(max(1.0 - melanin, 0.0001)); + /* Map melanin 0..inf from more perceptually linear 0..1. */ + melanin = -log(max(1.0 - melanin, 0.0001)); - /* Benedikt Bitterli's melanin ratio remapping. */ - float eumelanin = melanin * (1.0 - MelaninRedness); - float pheomelanin = melanin * MelaninRedness; - color melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin); + /* Benedikt Bitterli's melanin ratio remapping. */ + float eumelanin = melanin * (1.0 - MelaninRedness); + float pheomelanin = melanin * MelaninRedness; + color melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin); - /* Optional tint. */ - color tint_sigma = sigma_from_reflectance(Tint, radial_roughness); - sigma = melanin_sigma + tint_sigma; - } - else if (parametrization == "Direct coloring"){ - sigma = sigma_from_reflectance(Color, radial_roughness); - } - else { - /* Fallback to brownish hair, same as defaults for melanin. */ - sigma = sigma_from_concentration(0.0, 0.8054375); - } + /* Optional tint. */ + color tint_sigma = sigma_from_reflectance(Tint, radial_roughness); + sigma = melanin_sigma + tint_sigma; + } + else if (parametrization == "Direct coloring") { + sigma = sigma_from_reflectance(Color, radial_roughness); + } + else { + /* Fallback to brownish hair, same as defaults for melanin. */ + sigma = sigma_from_concentration(0.0, 0.8054375); + } - BSDF = principled_hair(Normal, sigma, roughness, radial_roughness, m0_roughness, Offset, IOR); + BSDF = principled_hair(Normal, sigma, roughness, radial_roughness, m0_roughness, Offset, IOR); } diff --git a/intern/cycles/kernel/shaders/node_principled_volume.osl b/intern/cycles/kernel/shaders/node_principled_volume.osl index ea8d6ab12c5..39cf6837eb2 100644 --- a/intern/cycles/kernel/shaders/node_principled_volume.osl +++ b/intern/cycles/kernel/shaders/node_principled_volume.osl @@ -16,80 +16,78 @@ #include "stdosl.h" -shader node_principled_volume( - color Color = color(0.5, 0.5, 0.5), - float Density = 1.0, - float Anisotropy = 0.0, - color AbsorptionColor = color(0.0, 0.0, 0.0), - float EmissionStrength = 0.0, - color EmissionColor = color(1.0, 1.0, 1.0), - float BlackbodyIntensity = 0.0, - color BlackbodyTint = color(1.0, 1.0, 1.0), - float Temperature = 1500.0, - string DensityAttribute = "geom:density", - string ColorAttribute = "geom:color", - string TemperatureAttribute = "geom:temperature", - output closure color Volume = 0) +shader node_principled_volume(color Color = color(0.5, 0.5, 0.5), + float Density = 1.0, + float Anisotropy = 0.0, + color AbsorptionColor = color(0.0, 0.0, 0.0), + float EmissionStrength = 0.0, + color EmissionColor = color(1.0, 1.0, 1.0), + float BlackbodyIntensity = 0.0, + color BlackbodyTint = color(1.0, 1.0, 1.0), + float Temperature = 1500.0, + string DensityAttribute = "geom:density", + string ColorAttribute = "geom:color", + string TemperatureAttribute = "geom:temperature", + output closure color Volume = 0) { - /* Compute density. */ - float primitive_density = 1.0; - float density = max(Density, 0.0); + /* Compute density. */ + float primitive_density = 1.0; + float density = max(Density, 0.0); - if(density > 1e-5) { - if(getattribute(DensityAttribute, primitive_density)) { - density = max(density * primitive_density, 0.0); - } - } + if (density > 1e-5) { + if (getattribute(DensityAttribute, primitive_density)) { + density = max(density * primitive_density, 0.0); + } + } - if(density > 1e-5) { - /* Compute scattering color. */ - color scatter_color = Color; - color primitive_color; - if(getattribute(ColorAttribute, primitive_color)) { - scatter_color *= primitive_color; - } + if (density > 1e-5) { + /* Compute scattering color. */ + color scatter_color = Color; + color primitive_color; + if (getattribute(ColorAttribute, primitive_color)) { + scatter_color *= primitive_color; + } - /* Add scattering and absorption closures. */ - color scatter_coeff = scatter_color; - color absorption_color = sqrt(max(AbsorptionColor, 0.0)); - color absorption_coeff = max(1.0 - scatter_color, 0.0) * max(1.0 - absorption_color, 0.0); - Volume = scatter_coeff * density * henyey_greenstein(Anisotropy) + - absorption_coeff * density * absorption(); - } + /* Add scattering and absorption closures. */ + color scatter_coeff = scatter_color; + color absorption_color = sqrt(max(AbsorptionColor, 0.0)); + color absorption_coeff = max(1.0 - scatter_color, 0.0) * max(1.0 - absorption_color, 0.0); + Volume = scatter_coeff * density * henyey_greenstein(Anisotropy) + + absorption_coeff * density * absorption(); + } - /* Compute emission. */ - float emission_strength = max(EmissionStrength, 0.0); - float blackbody_intensity = BlackbodyIntensity; + /* Compute emission. */ + float emission_strength = max(EmissionStrength, 0.0); + float blackbody_intensity = BlackbodyIntensity; - if(emission_strength > 1e-5) { - Volume += emission_strength * EmissionColor * emission(); - } + if (emission_strength > 1e-5) { + Volume += emission_strength * EmissionColor * emission(); + } - if(blackbody_intensity > 1e-3) { - float T = Temperature; + if (blackbody_intensity > 1e-3) { + float T = Temperature; - /* Add temperature from attribute if available. */ - float temperature; - if(getattribute(TemperatureAttribute, temperature)) { - T *= max(temperature, 0.0); - } + /* Add temperature from attribute if available. */ + float temperature; + if (getattribute(TemperatureAttribute, temperature)) { + T *= max(temperature, 0.0); + } - T = max(T, 0.0); + T = max(T, 0.0); - /* Stefan-Boltzman law. */ - float T4 = (T * T) * (T * T); - float sigma = 5.670373e-8 * 1e-6 / M_PI; - float intensity = sigma * mix(1.0, T4, blackbody_intensity); + /* Stefan-Boltzman law. */ + float T4 = (T * T) * (T * T); + float sigma = 5.670373e-8 * 1e-6 / M_PI; + float intensity = sigma * mix(1.0, T4, blackbody_intensity); - if(intensity > 1e-5) { - color bb = blackbody(T); - float l = luminance(bb); + if (intensity > 1e-5) { + color bb = blackbody(T); + float l = luminance(bb); - if(l != 0.0) { - bb *= BlackbodyTint * intensity / l; - Volume += bb * emission(); - } - } - } + if (l != 0.0) { + bb *= BlackbodyTint * intensity / l; + Volume += bb * emission(); + } + } + } } - diff --git a/intern/cycles/kernel/shaders/node_ramp_util.h b/intern/cycles/kernel/shaders/node_ramp_util.h index d07d5a98316..f7fb07b257d 100644 --- a/intern/cycles/kernel/shaders/node_ramp_util.h +++ b/intern/cycles/kernel/shaders/node_ramp_util.h @@ -18,72 +18,76 @@ color rgb_ramp_lookup(color ramp[], float at, int interpolate, int extrapolate) { - float f = at; - int table_size = arraylength(ramp); + float f = at; + int table_size = arraylength(ramp); - if ((f < 0.0 || f > 1.0) && extrapolate) { - color t0, dy; - if (f < 0.0) { - t0 = ramp[0]; - dy = t0 - ramp[1]; - f = -f; - } - else { - t0 = ramp[table_size - 1]; - dy = t0 - ramp[table_size - 2]; - f = f - 1.0; - } - return t0 + dy * f * (table_size - 1); - } + if ((f < 0.0 || f > 1.0) && extrapolate) { + color t0, dy; + if (f < 0.0) { + t0 = ramp[0]; + dy = t0 - ramp[1]; + f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0; + } + return t0 + dy * f * (table_size - 1); + } - f = clamp(at, 0.0, 1.0) * (table_size - 1); + f = clamp(at, 0.0, 1.0) * (table_size - 1); - /* clamp int as well in case of NaN */ - int i = (int)f; - if (i < 0) i = 0; - if (i >= table_size) i = table_size - 1; - float t = f - (float)i; + /* clamp int as well in case of NaN */ + int i = (int)f; + if (i < 0) + i = 0; + if (i >= table_size) + i = table_size - 1; + float t = f - (float)i; - color result = ramp[i]; + color result = ramp[i]; - if (interpolate && t > 0.0) - result = (1.0 - t) * result + t * ramp[i + 1]; + if (interpolate && t > 0.0) + result = (1.0 - t) * result + t * ramp[i + 1]; - return result; + return result; } float rgb_ramp_lookup(float ramp[], float at, int interpolate, int extrapolate) { - float f = at; - int table_size = arraylength(ramp); + float f = at; + int table_size = arraylength(ramp); - if ((f < 0.0 || f > 1.0) && extrapolate) { - float t0, dy; - if (f < 0.0) { - t0 = ramp[0]; - dy = t0 - ramp[1]; - f = -f; - } - else { - t0 = ramp[table_size - 1]; - dy = t0 - ramp[table_size - 2]; - f = f - 1.0; - } - return t0 + dy * f * (table_size - 1); - } + if ((f < 0.0 || f > 1.0) && extrapolate) { + float t0, dy; + if (f < 0.0) { + t0 = ramp[0]; + dy = t0 - ramp[1]; + f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0; + } + return t0 + dy * f * (table_size - 1); + } - f = clamp(at, 0.0, 1.0) * (table_size - 1); + f = clamp(at, 0.0, 1.0) * (table_size - 1); - /* clamp int as well in case of NaN */ - int i = (int)f; - if (i < 0) i = 0; - if (i >= table_size) i = table_size - 1; - float t = f - (float)i; + /* clamp int as well in case of NaN */ + int i = (int)f; + if (i < 0) + i = 0; + if (i >= table_size) + i = table_size - 1; + float t = f - (float)i; - float result = ramp[i]; + float result = ramp[i]; - if (interpolate && t > 0.0) - result = (1.0 - t) * result + t * ramp[i + 1]; + if (interpolate && t > 0.0) + result = (1.0 - t) * result + t * ramp[i + 1]; - return result; + return result; } diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl index eaab7282243..941d99dd44d 100644 --- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl @@ -16,23 +16,21 @@ #include "stdosl.h" -shader node_refraction_bsdf( - color Color = 0.8, - string distribution = "sharp", - float Roughness = 0.2, - float IOR = 1.45, - normal Normal = N, - output closure color BSDF = 0) +shader node_refraction_bsdf(color Color = 0.8, + string distribution = "sharp", + float Roughness = 0.2, + float IOR = 1.45, + normal Normal = N, + output closure color BSDF = 0) { - float f = max(IOR, 1e-5); - float eta = backfacing() ? 1.0 / f : f; - float roughness = Roughness * Roughness; + float f = max(IOR, 1e-5); + float eta = backfacing() ? 1.0 / f : f; + float roughness = Roughness * Roughness; - if (distribution == "sharp") - BSDF = Color * refraction(Normal, eta); - else if (distribution == "beckmann") - BSDF = Color * microfacet_beckmann_refraction(Normal, roughness, eta); - else if (distribution == "GGX") - BSDF = Color * microfacet_ggx_refraction(Normal, roughness, eta); + if (distribution == "sharp") + BSDF = Color * refraction(Normal, eta); + else if (distribution == "beckmann") + BSDF = Color * microfacet_beckmann_refraction(Normal, roughness, eta); + else if (distribution == "GGX") + BSDF = Color * microfacet_ggx_refraction(Normal, roughness, eta); } - diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl index 0d5eeea5c43..e34eb027cc3 100644 --- a/intern/cycles/kernel/shaders/node_rgb_curves.osl +++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl @@ -17,25 +17,23 @@ #include "stdosl.h" #include "node_ramp_util.h" -shader node_rgb_curves( - color ramp[] = {0.0}, - float min_x = 0.0, - float max_x = 1.0, +shader node_rgb_curves(color ramp[] = {0.0}, + float min_x = 0.0, + float max_x = 1.0, - color ColorIn = 0.0, - float Fac = 0.0, - output color ColorOut = 0.0) + color ColorIn = 0.0, + float Fac = 0.0, + output color ColorOut = 0.0) { - color c = (ColorIn - color(min_x, min_x, min_x)) / (max_x - min_x); + color c = (ColorIn - color(min_x, min_x, min_x)) / (max_x - min_x); - color r = rgb_ramp_lookup(ramp, c[0], 1, 1); - color g = rgb_ramp_lookup(ramp, c[1], 1, 1); - color b = rgb_ramp_lookup(ramp, c[2], 1, 1); + color r = rgb_ramp_lookup(ramp, c[0], 1, 1); + color g = rgb_ramp_lookup(ramp, c[1], 1, 1); + color b = rgb_ramp_lookup(ramp, c[2], 1, 1); - ColorOut[0] = r[0]; - ColorOut[1] = g[1]; - ColorOut[2] = b[2]; + ColorOut[0] = r[0]; + ColorOut[1] = g[1]; + ColorOut[2] = b[2]; - ColorOut = mix(ColorIn, ColorOut, Fac); + ColorOut = mix(ColorIn, ColorOut, Fac); } - diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl index 4e7d8fdcf65..c9f9746a4fb 100644 --- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl +++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl @@ -17,16 +17,14 @@ #include "stdosl.h" #include "node_ramp_util.h" -shader node_rgb_ramp( - color ramp_color[] = {0.0}, - float ramp_alpha[] = {0.0}, - int interpolate = 1, +shader node_rgb_ramp(color ramp_color[] = {0.0}, + float ramp_alpha[] = {0.0}, + int interpolate = 1, - float Fac = 0.0, - output color Color = 0.0, - output float Alpha = 1.0) + float Fac = 0.0, + output color Color = 0.0, + output float Alpha = 1.0) { - Color = rgb_ramp_lookup(ramp_color, Fac, interpolate, 0); - Alpha = rgb_ramp_lookup(ramp_alpha, Fac, interpolate, 0); + Color = rgb_ramp_lookup(ramp_color, Fac, interpolate, 0); + Alpha = rgb_ramp_lookup(ramp_alpha, Fac, interpolate, 0); } - diff --git a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl index 903dfcdc881..837d6caf5fc 100644 --- a/intern/cycles/kernel/shaders/node_rgb_to_bw.osl +++ b/intern/cycles/kernel/shaders/node_rgb_to_bw.osl @@ -16,10 +16,7 @@ #include "stdosl.h" -shader node_rgb_to_bw( - color Color = 0.0, - output float Val = 0.0) +shader node_rgb_to_bw(color Color = 0.0, output float Val = 0.0) { - Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722; + Val = Color[0] * 0.2126 + Color[1] * 0.7152 + Color[2] * 0.0722; } - diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl index 002e2750fca..fce5716f372 100644 --- a/intern/cycles/kernel/shaders/node_scatter_volume.osl +++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl @@ -16,12 +16,10 @@ #include "stdosl.h" -shader node_scatter_volume( - color Color = color(0.8, 0.8, 0.8), - float Density = 1.0, - float Anisotropy = 0.0, - output closure color Volume = 0) +shader node_scatter_volume(color Color = color(0.8, 0.8, 0.8), + float Density = 1.0, + float Anisotropy = 0.0, + output closure color Volume = 0) { - Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy); + Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy); } - diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl index 2a804040294..c77ed1f3755 100644 --- a/intern/cycles/kernel/shaders/node_separate_hsv.osl +++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl @@ -17,15 +17,14 @@ #include "stdosl.h" #include "node_color.h" -shader node_separate_hsv( - color Color = 0.8, - output float H = 0.0, - output float S = 0.0, - output float V = 0.0) +shader node_separate_hsv(color Color = 0.8, + output float H = 0.0, + output float S = 0.0, + output float V = 0.0) { - color col = rgb_to_hsv(Color); - - H = col[0]; - S = col[1]; - V = col[2]; + color col = rgb_to_hsv(Color); + + H = col[0]; + S = col[1]; + V = col[2]; } diff --git a/intern/cycles/kernel/shaders/node_separate_rgb.osl b/intern/cycles/kernel/shaders/node_separate_rgb.osl index 43d9e3aa4b1..ee64add27e2 100644 --- a/intern/cycles/kernel/shaders/node_separate_rgb.osl +++ b/intern/cycles/kernel/shaders/node_separate_rgb.osl @@ -16,13 +16,12 @@ #include "stdosl.h" -shader node_separate_rgb( - color Image = 0.8, - output float R = 0.0, - output float G = 0.0, - output float B = 0.0) +shader node_separate_rgb(color Image = 0.8, + output float R = 0.0, + output float G = 0.0, + output float B = 0.0) { - R = Image[0]; - G = Image[1]; - B = Image[2]; + R = Image[0]; + G = Image[1]; + B = Image[2]; } diff --git a/intern/cycles/kernel/shaders/node_separate_xyz.osl b/intern/cycles/kernel/shaders/node_separate_xyz.osl index e1963a1902f..8a563f5e920 100644 --- a/intern/cycles/kernel/shaders/node_separate_xyz.osl +++ b/intern/cycles/kernel/shaders/node_separate_xyz.osl @@ -16,13 +16,12 @@ #include "stdosl.h" -shader node_separate_xyz( - vector Vector = 0.8, - output float X = 0.0, - output float Y = 0.0, - output float Z = 0.0) +shader node_separate_xyz(vector Vector = 0.8, + output float X = 0.0, + output float Y = 0.0, + output float Z = 0.0) { - X = Vector[0]; - Y = Vector[1]; - Z = Vector[2]; + X = Vector[0]; + Y = Vector[1]; + Z = Vector[2]; } diff --git a/intern/cycles/kernel/shaders/node_set_normal.osl b/intern/cycles/kernel/shaders/node_set_normal.osl index 7ca7ac9350c..9541b829ef7 100644 --- a/intern/cycles/kernel/shaders/node_set_normal.osl +++ b/intern/cycles/kernel/shaders/node_set_normal.osl @@ -16,11 +16,8 @@ #include "stdosl.h" -surface node_set_normal( - normal Direction = N, - output normal Normal = N) +surface node_set_normal(normal Direction = N, output normal Normal = N) { - N = Direction; - Normal = Direction; + N = Direction; + Normal = Direction; } - diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl index a6c187d15f2..9b29e5489c2 100644 --- a/intern/cycles/kernel/shaders/node_sky_texture.osl +++ b/intern/cycles/kernel/shaders/node_sky_texture.osl @@ -19,115 +19,122 @@ float sky_angle_between(float thetav, float phiv, float theta, float phi) { - float cospsi = sin(thetav) * sin(theta) * cos(phi - phiv) + cos(thetav) * cos(theta); + float cospsi = sin(thetav) * sin(theta) * cos(phi - phiv) + cos(thetav) * cos(theta); - if (cospsi > 1.0) - return 0.0; - if (cospsi < -1.0) - return M_PI; + if (cospsi > 1.0) + return 0.0; + if (cospsi < -1.0) + return M_PI; - return acos(cospsi); + return acos(cospsi); } vector sky_spherical_coordinates(vector dir) { - return vector(acos(dir[2]), atan2(dir[0], dir[1]), 0); + return vector(acos(dir[2]), atan2(dir[0], dir[1]), 0); } /* Preetham */ float sky_perez_function(float lam[9], float theta, float gamma) { - float ctheta = cos(theta); - float cgamma = cos(gamma); + float ctheta = cos(theta); + float cgamma = cos(gamma); - return (1.0 + lam[0] * exp(lam[1] / ctheta)) * (1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma); + return (1.0 + lam[0] * exp(lam[1] / ctheta)) * + (1.0 + lam[2] * exp(lam[3] * gamma) + lam[4] * cgamma * cgamma); } color sky_radiance_old(normal dir, - float sunphi, float suntheta, color radiance, - float config_x[9], float config_y[9], float config_z[9]) + float sunphi, + float suntheta, + color radiance, + float config_x[9], + float config_y[9], + float config_z[9]) { - /* convert vector to spherical coordinates */ - vector spherical = sky_spherical_coordinates(dir); - float theta = spherical[0]; - float phi = spherical[1]; + /* convert vector to spherical coordinates */ + vector spherical = sky_spherical_coordinates(dir); + float theta = spherical[0]; + float phi = spherical[1]; - /* angle between sun direction and dir */ - float gamma = sky_angle_between(theta, phi, suntheta, sunphi); + /* angle between sun direction and dir */ + float gamma = sky_angle_between(theta, phi, suntheta, sunphi); - /* clamp theta to horizon */ - theta = min(theta, M_PI_2 - 0.001); + /* clamp theta to horizon */ + theta = min(theta, M_PI_2 - 0.001); - /* compute xyY color space values */ - float x = radiance[1] * sky_perez_function(config_y, theta, gamma); - float y = radiance[2] * sky_perez_function(config_z, theta, gamma); - float Y = radiance[0] * sky_perez_function(config_x, theta, gamma); + /* compute xyY color space values */ + float x = radiance[1] * sky_perez_function(config_y, theta, gamma); + float y = radiance[2] * sky_perez_function(config_z, theta, gamma); + float Y = radiance[0] * sky_perez_function(config_x, theta, gamma); - /* convert to RGB */ - color xyz = xyY_to_xyz(x, y, Y); - return xyz_to_rgb(xyz[0], xyz[1], xyz[2]); + /* convert to RGB */ + color xyz = xyY_to_xyz(x, y, Y); + return xyz_to_rgb(xyz[0], xyz[1], xyz[2]); } /* Hosek / Wilkie */ float sky_radiance_internal(float config[9], float theta, float gamma) { - float ctheta = cos(theta); - float cgamma = cos(gamma); - - float expM = exp(config[4] * gamma); - float rayM = cgamma * cgamma; - float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5); - float zenith = sqrt(ctheta); - - return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) * - (config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith); + float ctheta = cos(theta); + float cgamma = cos(gamma); + + float expM = exp(config[4] * gamma); + float rayM = cgamma * cgamma; + float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5); + float zenith = sqrt(ctheta); + + return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) * + (config[2] + config[3] * expM + config[5] * rayM + config[6] * mieM + config[7] * zenith); } color sky_radiance_new(normal dir, - float sunphi, float suntheta, color radiance, - float config_x[9], float config_y[9], float config_z[9]) + float sunphi, + float suntheta, + color radiance, + float config_x[9], + float config_y[9], + float config_z[9]) { - /* convert vector to spherical coordinates */ - vector spherical = sky_spherical_coordinates(dir); - float theta = spherical[0]; - float phi = spherical[1]; + /* convert vector to spherical coordinates */ + vector spherical = sky_spherical_coordinates(dir); + float theta = spherical[0]; + float phi = spherical[1]; - /* angle between sun direction and dir */ - float gamma = sky_angle_between(theta, phi, suntheta, sunphi); + /* angle between sun direction and dir */ + float gamma = sky_angle_between(theta, phi, suntheta, sunphi); - /* clamp theta to horizon */ - theta = min(theta, M_PI_2 - 0.001); + /* clamp theta to horizon */ + theta = min(theta, M_PI_2 - 0.001); - /* compute xyz color space values */ - float x = sky_radiance_internal(config_x, theta, gamma) * radiance[0]; - float y = sky_radiance_internal(config_y, theta, gamma) * radiance[1]; - float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2]; + /* compute xyz color space values */ + float x = sky_radiance_internal(config_x, theta, gamma) * radiance[0]; + float y = sky_radiance_internal(config_y, theta, gamma) * radiance[1]; + float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2]; - /* convert to RGB and adjust strength */ - return xyz_to_rgb(x, y, z) * (M_2PI / 683); + /* convert to RGB and adjust strength */ + return xyz_to_rgb(x, y, z) * (M_2PI / 683); } -shader node_sky_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - vector Vector = P, - string type = "hosek_wilkie", - float theta = 0.0, - float phi = 0.0, - color radiance = color(0.0, 0.0, 0.0), - float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - output color Color = color(0.0, 0.0, 0.0)) +shader node_sky_texture(int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vector Vector = P, + string type = "hosek_wilkie", + float theta = 0.0, + float phi = 0.0, + color radiance = color(0.0, 0.0, 0.0), + float config_x[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + float config_y[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + float config_z[9] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + output color Color = color(0.0, 0.0, 0.0)) { - vector p = Vector; - - if (use_mapping) - p = transform(mapping, p); - - if (type == "hosek_wilkie") - Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z); - else - Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z); -} + vector p = Vector; + if (use_mapping) + p = transform(mapping, p); + + if (type == "hosek_wilkie") + Color = sky_radiance_new(p, phi, theta, radiance, config_x, config_y, config_z); + else + Color = sky_radiance_old(p, phi, theta, radiance, config_x, config_y, config_z); +} diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl index 0df3256e1fd..e12199d8c3d 100644 --- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl +++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl @@ -16,23 +16,30 @@ #include "stdosl.h" -shader node_subsurface_scattering( - color Color = 0.8, - float Scale = 1.0, - vector Radius = vector(0.1, 0.1, 0.1), - float TextureBlur = 0.0, - float Sharpness = 0.0, - string falloff = "cubic", - normal Normal = N, - output closure color BSSRDF = 0) +shader node_subsurface_scattering(color Color = 0.8, + float Scale = 1.0, + vector Radius = vector(0.1, 0.1, 0.1), + float TextureBlur = 0.0, + float Sharpness = 0.0, + string falloff = "cubic", + normal Normal = N, + output closure color BSSRDF = 0) { - if (falloff == "gaussian") - BSSRDF = Color * bssrdf("gaussian", Normal, Scale * Radius, Color, "texture_blur", TextureBlur); - else if (falloff == "cubic") - BSSRDF = Color * bssrdf("cubic", Normal, Scale * Radius, Color, "texture_blur", TextureBlur, "sharpness", Sharpness); - else if (falloff == "burley") - BSSRDF = Color * bssrdf("burley", Normal, Scale * Radius, Color, "texture_blur", TextureBlur); - else - BSSRDF = Color * bssrdf("random_walk", Normal, Scale * Radius, Color, "texture_blur", TextureBlur); + if (falloff == "gaussian") + BSSRDF = Color * + bssrdf("gaussian", Normal, Scale * Radius, Color, "texture_blur", TextureBlur); + else if (falloff == "cubic") + BSSRDF = Color * bssrdf("cubic", + Normal, + Scale * Radius, + Color, + "texture_blur", + TextureBlur, + "sharpness", + Sharpness); + else if (falloff == "burley") + BSSRDF = Color * bssrdf("burley", Normal, Scale * Radius, Color, "texture_blur", TextureBlur); + else + BSSRDF = Color * + bssrdf("random_walk", Normal, Scale * Radius, Color, "texture_blur", TextureBlur); } - diff --git a/intern/cycles/kernel/shaders/node_tangent.osl b/intern/cycles/kernel/shaders/node_tangent.osl index c527070a2c8..44eb9973f3d 100644 --- a/intern/cycles/kernel/shaders/node_tangent.osl +++ b/intern/cycles/kernel/shaders/node_tangent.osl @@ -16,33 +16,31 @@ #include "stdosl.h" -shader node_tangent( - normal NormalIn = N, - string attr_name = "geom:tangent", - string direction_type = "radial", - string axis = "z", - output normal Tangent = normalize(dPdu)) +shader node_tangent(normal NormalIn = N, + string attr_name = "geom:tangent", + string direction_type = "radial", + string axis = "z", + output normal Tangent = normalize(dPdu)) { - vector T; + vector T; - if (direction_type == "uv_map") { - getattribute(attr_name, T); - } - else if (direction_type == "radial") { - point generated; + if (direction_type == "uv_map") { + getattribute(attr_name, T); + } + else if (direction_type == "radial") { + point generated; - if (!getattribute("geom:generated", generated)) - generated = P; + if (!getattribute("geom:generated", generated)) + generated = P; - if (axis == "x") - T = vector(0.0, -(generated[2] - 0.5), (generated[1] - 0.5)); - else if (axis == "y") - T = vector(-(generated[2] - 0.5), 0.0, (generated[0] - 0.5)); - else - T = vector(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0); - } + if (axis == "x") + T = vector(0.0, -(generated[2] - 0.5), (generated[1] - 0.5)); + else if (axis == "y") + T = vector(-(generated[2] - 0.5), 0.0, (generated[0] - 0.5)); + else + T = vector(-(generated[1] - 0.5), (generated[0] - 0.5), 0.0); + } - T = transform("object", "world", T); - Tangent = cross(NormalIn, normalize(cross(T, NormalIn))); + T = transform("object", "world", T); + Tangent = cross(NormalIn, normalize(cross(T, NormalIn))); } - diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h index 88305fb320f..e1f3b900ee5 100644 --- a/intern/cycles/kernel/shaders/node_texture.h +++ b/intern/cycles/kernel/shaders/node_texture.h @@ -18,148 +18,148 @@ color cellnoise_color(point p) { - float r = cellnoise(p); - float g = cellnoise(point(p[1], p[0], p[2])); - float b = cellnoise(point(p[1], p[2], p[0])); + float r = cellnoise(p); + float g = cellnoise(point(p[1], p[0], p[2])); + float b = cellnoise(point(p[1], p[2], p[0])); - return color(r, g, b); + return color(r, g, b); } void voronoi(point p, float e, float da[4], point pa[4]) { - /* returns distances in da and point coords in pa */ - int xx, yy, zz, xi, yi, zi; - - xi = (int)floor(p[0]); - yi = (int)floor(p[1]); - zi = (int)floor(p[2]); - - da[0] = 1e10; - da[1] = 1e10; - da[2] = 1e10; - da[3] = 1e10; - - for (xx = xi - 1; xx <= xi + 1; xx++) { - for (yy = yi - 1; yy <= yi + 1; yy++) { - for (zz = zi - 1; zz <= zi + 1; zz++) { - point ip = point(xx, yy, zz); - point vp = (point)cellnoise_color(ip); - point pd = p - (vp + ip); - float d = dot(pd, pd); - - vp += point(xx, yy, zz); - - if (d < da[0]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = da[0]; - da[0] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = pa[0]; - pa[0] = vp; - } - else if (d < da[1]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = vp; - } - else if (d < da[2]) { - da[3] = da[2]; - da[2] = d; - - pa[3] = pa[2]; - pa[2] = vp; - } - else if (d < da[3]) { - da[3] = d; - pa[3] = vp; - } - } - } - } + /* returns distances in da and point coords in pa */ + int xx, yy, zz, xi, yi, zi; + + xi = (int)floor(p[0]); + yi = (int)floor(p[1]); + zi = (int)floor(p[2]); + + da[0] = 1e10; + da[1] = 1e10; + da[2] = 1e10; + da[3] = 1e10; + + for (xx = xi - 1; xx <= xi + 1; xx++) { + for (yy = yi - 1; yy <= yi + 1; yy++) { + for (zz = zi - 1; zz <= zi + 1; zz++) { + point ip = point(xx, yy, zz); + point vp = (point)cellnoise_color(ip); + point pd = p - (vp + ip); + float d = dot(pd, pd); + + vp += point(xx, yy, zz); + + if (d < da[0]) { + da[3] = da[2]; + da[2] = da[1]; + da[1] = da[0]; + da[0] = d; + + pa[3] = pa[2]; + pa[2] = pa[1]; + pa[1] = pa[0]; + pa[0] = vp; + } + else if (d < da[1]) { + da[3] = da[2]; + da[2] = da[1]; + da[1] = d; + + pa[3] = pa[2]; + pa[2] = pa[1]; + pa[1] = vp; + } + else if (d < da[2]) { + da[3] = da[2]; + da[2] = d; + + pa[3] = pa[2]; + pa[2] = vp; + } + else if (d < da[3]) { + da[3] = d; + pa[3] = vp; + } + } + } + } } /* Noise Bases */ float safe_noise(point p, string type) { - float f = 0.0; + float f = 0.0; - /* Perlin noise in range -1..1 */ - if (type == "signed") - f = noise("perlin", p); + /* Perlin noise in range -1..1 */ + if (type == "signed") + f = noise("perlin", p); - /* Perlin noise in range 0..1 */ - else - f = noise(p); + /* Perlin noise in range 0..1 */ + else + f = noise(p); - /* can happen for big coordinates, things even out to 0.5 then anyway */ - if (!isfinite(f)) - return 0.5; + /* can happen for big coordinates, things even out to 0.5 then anyway */ + if (!isfinite(f)) + return 0.5; - return f; + return f; } /* Turbulence */ float noise_turbulence(point p, float details, int hard) { - float fscale = 1.0; - float amp = 1.0; - float sum = 0.0; - int i, n; + float fscale = 1.0; + float amp = 1.0; + float sum = 0.0; + int i, n; - float octaves = clamp(details, 0.0, 16.0); - n = (int)octaves; + float octaves = clamp(details, 0.0, 16.0); + n = (int)octaves; - for (i = 0; i <= n; i++) { - float t = safe_noise(fscale * p, "unsigned"); + for (i = 0; i <= n; i++) { + float t = safe_noise(fscale * p, "unsigned"); - if (hard) - t = fabs(2.0 * t - 1.0); + if (hard) + t = fabs(2.0 * t - 1.0); - sum += t * amp; - amp *= 0.5; - fscale *= 2.0; - } + sum += t * amp; + amp *= 0.5; + fscale *= 2.0; + } - float rmd = octaves - floor(octaves); + float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - float t = safe_noise(fscale * p, "unsigned"); + if (rmd != 0.0) { + float t = safe_noise(fscale * p, "unsigned"); - if (hard) - t = fabs(2.0 * t - 1.0); + if (hard) + t = fabs(2.0 * t - 1.0); - float sum2 = sum + t * amp; + float sum2 = sum + t * amp; - sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1)); - sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1)); + sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1)); + sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1)); - return (1.0 - rmd) * sum + rmd * sum2; - } - else { - sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1)); - return sum; - } + return (1.0 - rmd) * sum + rmd * sum2; + } + else { + sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1)); + return sum; + } } /* Utility */ float nonzero(float f, float eps) { - float r; + float r; - if (abs(f) < eps) - r = sign(f) * eps; - else - r = f; + if (abs(f) < eps) + r = sign(f) * eps; + else + r = f; - return r; + return r; } diff --git a/intern/cycles/kernel/shaders/node_texture_coordinate.osl b/intern/cycles/kernel/shaders/node_texture_coordinate.osl index 9e2109fa082..13861653d04 100644 --- a/intern/cycles/kernel/shaders/node_texture_coordinate.osl +++ b/intern/cycles/kernel/shaders/node_texture_coordinate.osl @@ -17,82 +17,81 @@ #include "stdosl.h" shader node_texture_coordinate( - normal NormalIn = N, - int is_background = 0, - int is_volume = 0, - int from_dupli = 0, - int use_transform = 0, - string bump_offset = "center", - matrix object_itfm = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + normal NormalIn = N, + int is_background = 0, + int is_volume = 0, + int from_dupli = 0, + int use_transform = 0, + string bump_offset = "center", + matrix object_itfm = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - output point Generated = point(0.0, 0.0, 0.0), - output point UV = point(0.0, 0.0, 0.0), - output point Object = point(0.0, 0.0, 0.0), - output point Camera = point(0.0, 0.0, 0.0), - output point Window = point(0.0, 0.0, 0.0), - output normal Normal = normal(0.0, 0.0, 0.0), - output point Reflection = point(0.0, 0.0, 0.0)) + output point Generated = point(0.0, 0.0, 0.0), + output point UV = point(0.0, 0.0, 0.0), + output point Object = point(0.0, 0.0, 0.0), + output point Camera = point(0.0, 0.0, 0.0), + output point Window = point(0.0, 0.0, 0.0), + output normal Normal = normal(0.0, 0.0, 0.0), + output point Reflection = point(0.0, 0.0, 0.0)) { - if (is_background) { - Generated = P; - UV = point(0.0, 0.0, 0.0); - Object = P; - point Pcam = transform("camera", "world", point(0, 0, 0)); - Camera = transform("camera", P + Pcam); - getattribute("NDC", Window); - Normal = NormalIn; - Reflection = I; - } - else { - if (from_dupli) { - getattribute("geom:dupli_generated", Generated); - getattribute("geom:dupli_uv", UV); - } - else if (is_volume) { - Generated = transform("object", P); + if (is_background) { + Generated = P; + UV = point(0.0, 0.0, 0.0); + Object = P; + point Pcam = transform("camera", "world", point(0, 0, 0)); + Camera = transform("camera", P + Pcam); + getattribute("NDC", Window); + Normal = NormalIn; + Reflection = I; + } + else { + if (from_dupli) { + getattribute("geom:dupli_generated", Generated); + getattribute("geom:dupli_uv", UV); + } + else if (is_volume) { + Generated = transform("object", P); - matrix tfm; - if (getattribute("geom:generated_transform", tfm)) - Generated = transform(tfm, Generated); + matrix tfm; + if (getattribute("geom:generated_transform", tfm)) + Generated = transform(tfm, Generated); - getattribute("geom:uv", UV); - } - else { - getattribute("geom:generated", Generated); - getattribute("geom:uv", UV); - } + getattribute("geom:uv", UV); + } + else { + getattribute("geom:generated", Generated); + getattribute("geom:uv", UV); + } - if (use_transform) { - Object = transform(object_itfm, P); - } - else { - Object = transform("object", P); - } - Camera = transform("camera", P); - Window = transform("NDC", P); - Normal = transform("world", "object", NormalIn); - Reflection = -reflect(I, NormalIn); - } + if (use_transform) { + Object = transform(object_itfm, P); + } + else { + Object = transform("object", P); + } + Camera = transform("camera", P); + Window = transform("NDC", P); + Normal = transform("world", "object", NormalIn); + Reflection = -reflect(I, NormalIn); + } - if (bump_offset == "dx") { - if (!from_dupli) { - Generated += Dx(Generated); - UV += Dx(UV); - } - Object += Dx(Object); - Camera += Dx(Camera); - Window += Dx(Window); - } - else if (bump_offset == "dy") { - if (!from_dupli) { - Generated += Dy(Generated); - UV += Dy(UV); - } - Object += Dy(Object); - Camera += Dy(Camera); - Window += Dy(Window); - } + if (bump_offset == "dx") { + if (!from_dupli) { + Generated += Dx(Generated); + UV += Dx(UV); + } + Object += Dx(Object); + Camera += Dx(Camera); + Window += Dx(Window); + } + else if (bump_offset == "dy") { + if (!from_dupli) { + Generated += Dy(Generated); + UV += Dy(UV); + } + Object += Dy(Object); + Camera += Dy(Camera); + Window += Dy(Window); + } - Window[2] = 0.0; + Window[2] = 0.0; } - diff --git a/intern/cycles/kernel/shaders/node_toon_bsdf.osl b/intern/cycles/kernel/shaders/node_toon_bsdf.osl index ae68a463e46..ed3a0b25c60 100644 --- a/intern/cycles/kernel/shaders/node_toon_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_toon_bsdf.osl @@ -16,17 +16,15 @@ #include "stdosl.h" -shader node_toon_bsdf( - color Color = 0.8, - string component = "diffuse", - float Size = 0.5, - float Smooth = 0.0, - normal Normal = N, - output closure color BSDF = 0) +shader node_toon_bsdf(color Color = 0.8, + string component = "diffuse", + float Size = 0.5, + float Smooth = 0.0, + normal Normal = N, + output closure color BSDF = 0) { - if (component == "diffuse") - BSDF = Color * diffuse_toon(Normal, Size, Smooth); - else if (component == "glossy") - BSDF = Color * glossy_toon(Normal, Size, Smooth); + if (component == "diffuse") + BSDF = Color * diffuse_toon(Normal, Size, Smooth); + else if (component == "glossy") + BSDF = Color * glossy_toon(Normal, Size, Smooth); } - diff --git a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl index 94d23d35326..7ce1ab08c59 100644 --- a/intern/cycles/kernel/shaders/node_translucent_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_translucent_bsdf.osl @@ -16,11 +16,7 @@ #include "stdosl.h" -shader node_translucent_bsdf( - color Color = 0.8, - normal Normal = N, - output closure color BSDF = 0) +shader node_translucent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0) { - BSDF = Color * translucent(Normal); + BSDF = Color * translucent(Normal); } - diff --git a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl index 5d6798f19a6..a735513ba89 100644 --- a/intern/cycles/kernel/shaders/node_transparent_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_transparent_bsdf.osl @@ -16,11 +16,7 @@ #include "stdosl.h" -shader node_transparent_bsdf( - color Color = 0.8, - normal Normal = N, - output closure color BSDF = 0) +shader node_transparent_bsdf(color Color = 0.8, normal Normal = N, output closure color BSDF = 0) { - BSDF = Color * transparent(); + BSDF = Color * transparent(); } - diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl index b46b2e73457..6f2887be63c 100644 --- a/intern/cycles/kernel/shaders/node_uv_map.osl +++ b/intern/cycles/kernel/shaders/node_uv_map.osl @@ -16,30 +16,29 @@ #include "stdosl.h" -shader node_uv_map( - int from_dupli = 0, - string attribute = "", - string bump_offset = "center", - output point UV = point(0.0, 0.0, 0.0)) +shader node_uv_map(int from_dupli = 0, + string attribute = "", + string bump_offset = "center", + output point UV = point(0.0, 0.0, 0.0)) { - if (from_dupli) { - getattribute("geom:dupli_uv", UV); - } - else { - if (attribute == "") - getattribute("geom:uv", UV); - else - getattribute(attribute, UV); - } + if (from_dupli) { + getattribute("geom:dupli_uv", UV); + } + else { + if (attribute == "") + getattribute("geom:uv", UV); + else + getattribute(attribute, UV); + } - if (bump_offset == "dx") { - if (!from_dupli) { - UV += Dx(UV); - } - } - else if (bump_offset == "dy") { - if (!from_dupli) { - UV += Dy(UV); - } - } + if (bump_offset == "dx") { + if (!from_dupli) { + UV += Dx(UV); + } + } + else if (bump_offset == "dy") { + if (!from_dupli) { + UV += Dy(UV); + } + } } diff --git a/intern/cycles/kernel/shaders/node_value.osl b/intern/cycles/kernel/shaders/node_value.osl index f75388d1f76..398e2c0e392 100644 --- a/intern/cycles/kernel/shaders/node_value.osl +++ b/intern/cycles/kernel/shaders/node_value.osl @@ -16,16 +16,14 @@ #include "stdosl.h" -shader node_value( - float value_value = 0.0, - vector vector_value = vector(0.0, 0.0, 0.0), - color color_value = 0.0, - output float Value = 0.0, - output vector Vector = vector(0.0, 0.0, 0.0), - output color Color = 0.0) +shader node_value(float value_value = 0.0, + vector vector_value = vector(0.0, 0.0, 0.0), + color color_value = 0.0, + output float Value = 0.0, + output vector Vector = vector(0.0, 0.0, 0.0), + output color Color = 0.0) { - Value = value_value; - Vector = vector_value; - Color = color_value; + Value = value_value; + Vector = vector_value; + Color = color_value; } - diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl index ff284c48e0a..e8c8036b550 100644 --- a/intern/cycles/kernel/shaders/node_vector_curves.osl +++ b/intern/cycles/kernel/shaders/node_vector_curves.osl @@ -17,25 +17,23 @@ #include "stdosl.h" #include "node_ramp_util.h" -shader node_vector_curves( - color ramp[] = {0.0}, - float min_x = 0.0, - float max_x = 1.0, +shader node_vector_curves(color ramp[] = {0.0}, + float min_x = 0.0, + float max_x = 1.0, - vector VectorIn = vector(0.0, 0.0, 0.0), - float Fac = 0.0, - output vector VectorOut = vector(0.0, 0.0, 0.0)) + vector VectorIn = vector(0.0, 0.0, 0.0), + float Fac = 0.0, + output vector VectorOut = vector(0.0, 0.0, 0.0)) { - vector c = (VectorIn - vector(min_x, min_x, min_x)) / (max_x - min_x); + vector c = (VectorIn - vector(min_x, min_x, min_x)) / (max_x - min_x); - color r = rgb_ramp_lookup(ramp, c[0], 1, 1); - color g = rgb_ramp_lookup(ramp, c[0], 1, 1); - color b = rgb_ramp_lookup(ramp, c[0], 1, 1); + color r = rgb_ramp_lookup(ramp, c[0], 1, 1); + color g = rgb_ramp_lookup(ramp, c[0], 1, 1); + color b = rgb_ramp_lookup(ramp, c[0], 1, 1); - VectorOut[0] = r[0]; - VectorOut[1] = g[1]; - VectorOut[2] = b[2]; + VectorOut[0] = r[0]; + VectorOut[1] = g[1]; + VectorOut[2] = b[2]; - VectorOut = mix(VectorIn, VectorOut, Fac); + VectorOut = mix(VectorIn, VectorOut, Fac); } - diff --git a/intern/cycles/kernel/shaders/node_vector_displacement.osl b/intern/cycles/kernel/shaders/node_vector_displacement.osl index b19bc228e37..e9bd336347f 100644 --- a/intern/cycles/kernel/shaders/node_vector_displacement.osl +++ b/intern/cycles/kernel/shaders/node_vector_displacement.osl @@ -16,45 +16,43 @@ #include "stdosl.h" -shader node_vector_displacement( - color Vector = color(0.0, 0.0, 0.0), - float Midlevel = 0.0, - float Scale = 1.0, - string space = "tangent", - string attr_name = "geom:tangent", - string attr_sign_name = "geom:tangent_sign", - output vector Displacement = vector(0.0, 0.0, 0.0)) +shader node_vector_displacement(color Vector = color(0.0, 0.0, 0.0), + float Midlevel = 0.0, + float Scale = 1.0, + string space = "tangent", + string attr_name = "geom:tangent", + string attr_sign_name = "geom:tangent_sign", + output vector Displacement = vector(0.0, 0.0, 0.0)) { - vector offset = (Vector - vector(Midlevel)) * Scale; - - if(space == "tangent") { - /* Tangent space. */ - vector N_object = normalize(transform("world", "object", N)); - - vector T_object; - if(getattribute(attr_name, T_object)) { - T_object = normalize(T_object); - } - else { - T_object = normalize(dPdu); - } - - vector B_object = normalize(cross(N_object, T_object)); - float tangent_sign; - if(getattribute(attr_sign_name, tangent_sign)) { - B_object *= tangent_sign; - } - - Displacement = T_object*offset[0] + N_object*offset[1] + B_object*offset[2]; - } - else { - /* Object or world space. */ - Displacement = offset; - } - - if(space != "world") { - /* Tangent or object space. */ - Displacement = transform("object", "world", Displacement); - } + vector offset = (Vector - vector(Midlevel)) * Scale; + + if (space == "tangent") { + /* Tangent space. */ + vector N_object = normalize(transform("world", "object", N)); + + vector T_object; + if (getattribute(attr_name, T_object)) { + T_object = normalize(T_object); + } + else { + T_object = normalize(dPdu); + } + + vector B_object = normalize(cross(N_object, T_object)); + float tangent_sign; + if (getattribute(attr_sign_name, tangent_sign)) { + B_object *= tangent_sign; + } + + Displacement = T_object * offset[0] + N_object * offset[1] + B_object * offset[2]; + } + else { + /* Object or world space. */ + Displacement = offset; + } + + if (space != "world") { + /* Tangent or object space. */ + Displacement = transform("object", "world", Displacement); + } } - diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl index a7e3637402e..10bb0c7283c 100644 --- a/intern/cycles/kernel/shaders/node_vector_math.osl +++ b/intern/cycles/kernel/shaders/node_vector_math.osl @@ -16,36 +16,34 @@ #include "stdosl.h" -shader node_vector_math( - string type = "add", - vector Vector1 = vector(0.0, 0.0, 0.0), - vector Vector2 = vector(0.0, 0.0, 0.0), - output float Value = 0.0, - output vector Vector = vector(0.0, 0.0, 0.0)) +shader node_vector_math(string type = "add", + vector Vector1 = vector(0.0, 0.0, 0.0), + vector Vector2 = vector(0.0, 0.0, 0.0), + output float Value = 0.0, + output vector Vector = vector(0.0, 0.0, 0.0)) { - if (type == "add") { - Vector = Vector1 + Vector2; - Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0; - } - else if (type == "subtract") { - Vector = Vector1 - Vector2; - Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0; - } - else if (type == "average") { - Value = length(Vector1 + Vector2); - Vector = normalize(Vector1 + Vector2); - } - else if (type == "dot_product") { - Value = dot(Vector1, Vector2); - } - else if (type == "cross_product") { - vector c = cross(Vector1, Vector2); - Value = length(c); - Vector = normalize(c); - } - else if (type == "normalize") { - Value = length(Vector1); - Vector = normalize(Vector1); - } + if (type == "add") { + Vector = Vector1 + Vector2; + Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0; + } + else if (type == "subtract") { + Vector = Vector1 - Vector2; + Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0; + } + else if (type == "average") { + Value = length(Vector1 + Vector2); + Vector = normalize(Vector1 + Vector2); + } + else if (type == "dot_product") { + Value = dot(Vector1, Vector2); + } + else if (type == "cross_product") { + vector c = cross(Vector1, Vector2); + Value = length(c); + Vector = normalize(c); + } + else if (type == "normalize") { + Value = length(Vector1); + Vector = normalize(Vector1); + } } - diff --git a/intern/cycles/kernel/shaders/node_vector_transform.osl b/intern/cycles/kernel/shaders/node_vector_transform.osl index afb95b340d1..22939577be0 100644 --- a/intern/cycles/kernel/shaders/node_vector_transform.osl +++ b/intern/cycles/kernel/shaders/node_vector_transform.osl @@ -16,21 +16,19 @@ #include "stdosl.h" -shader node_vector_transform( - string type = "vector", - string convert_from = "world", - string convert_to = "object", - vector VectorIn = vector(0.0, 0.0, 0.0), - output vector VectorOut = vector(0.0, 0.0, 0.0)) +shader node_vector_transform(string type = "vector", + string convert_from = "world", + string convert_to = "object", + vector VectorIn = vector(0.0, 0.0, 0.0), + output vector VectorOut = vector(0.0, 0.0, 0.0)) { - if (type == "vector" || type == "normal") { - VectorOut = transform(convert_from, convert_to, VectorIn); - if (type == "normal") - VectorOut = normalize(VectorOut); - } - else if (type == "point") { - point Point = (point)VectorIn; - VectorOut = transform(convert_from, convert_to, Point); - } + if (type == "vector" || type == "normal") { + VectorOut = transform(convert_from, convert_to, VectorIn); + if (type == "normal") + VectorOut = normalize(VectorOut); + } + else if (type == "point") { + point Point = (point)VectorIn; + VectorOut = transform(convert_from, convert_to, Point); + } } - diff --git a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl index 456c26998c8..9290b845325 100644 --- a/intern/cycles/kernel/shaders/node_velvet_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_velvet_bsdf.osl @@ -17,14 +17,12 @@ #include "stdosl.h" #include "node_fresnel.h" -shader node_velvet_bsdf( - color Color = 0.8, - float Sigma = 0.0, - normal Normal = N, - output closure color BSDF = 0) +shader node_velvet_bsdf(color Color = 0.8, + float Sigma = 0.0, + normal Normal = N, + output closure color BSDF = 0) { - float sigma = clamp(Sigma, 0.0, 1.0); + float sigma = clamp(Sigma, 0.0, 1.0); - BSDF = Color * ashikhmin_velvet(Normal, sigma); + BSDF = Color * ashikhmin_velvet(Normal, sigma); } - diff --git a/intern/cycles/kernel/shaders/node_voronoi_texture.osl b/intern/cycles/kernel/shaders/node_voronoi_texture.osl index 2e47d74a414..34c86d5b98d 100644 --- a/intern/cycles/kernel/shaders/node_voronoi_texture.osl +++ b/intern/cycles/kernel/shaders/node_voronoi_texture.osl @@ -19,147 +19,146 @@ void voronoi_m(point p, string metric, float e, float da[4], point pa[4]) { - /* Compute the distance to and the position of the four closest neighbors to p. - * - * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern). - * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will - * contain the distance to the closest point and its coordinates respectively. - */ - int xx, yy, zz, xi, yi, zi; - - xi = (int)floor(p[0]); - yi = (int)floor(p[1]); - zi = (int)floor(p[2]); - - da[0] = 1e10; - da[1] = 1e10; - da[2] = 1e10; - da[3] = 1e10; - - for (xx = xi - 1; xx <= xi + 1; xx++) { - for (yy = yi - 1; yy <= yi + 1; yy++) { - for (zz = zi - 1; zz <= zi + 1; zz++) { - point ip = point(xx, yy, zz); - point vp = (point)cellnoise_color(ip); - point pd = p - (vp + ip); - - float d = 0.0; - if (metric == "distance") { - d = dot(pd, pd); - } - else if (metric == "manhattan") { - d = fabs(pd[0]) + fabs(pd[1]) + fabs(pd[2]); - } - else if (metric == "chebychev") { - d = max(fabs(pd[0]), max(fabs(pd[1]), fabs(pd[2]))); - } - else if (metric == "minkowski") { - d = pow(pow(fabs(pd[0]), e) + pow(fabs(pd[1]), e) + pow(fabs(pd[2]), e), 1.0/e); - } - - vp += point(xx, yy, zz); - - if (d < da[0]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = da[0]; - da[0] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = pa[0]; - pa[0] = vp; - } - else if (d < da[1]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = vp; - } - else if (d < da[2]) { - da[3] = da[2]; - da[2] = d; - - pa[3] = pa[2]; - pa[2] = vp; - } - else if (d < da[3]) { - da[3] = d; - pa[3] = vp; - } - } - } - } + /* Compute the distance to and the position of the four closest neighbors to p. + * + * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern). + * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will + * contain the distance to the closest point and its coordinates respectively. + */ + int xx, yy, zz, xi, yi, zi; + + xi = (int)floor(p[0]); + yi = (int)floor(p[1]); + zi = (int)floor(p[2]); + + da[0] = 1e10; + da[1] = 1e10; + da[2] = 1e10; + da[3] = 1e10; + + for (xx = xi - 1; xx <= xi + 1; xx++) { + for (yy = yi - 1; yy <= yi + 1; yy++) { + for (zz = zi - 1; zz <= zi + 1; zz++) { + point ip = point(xx, yy, zz); + point vp = (point)cellnoise_color(ip); + point pd = p - (vp + ip); + + float d = 0.0; + if (metric == "distance") { + d = dot(pd, pd); + } + else if (metric == "manhattan") { + d = fabs(pd[0]) + fabs(pd[1]) + fabs(pd[2]); + } + else if (metric == "chebychev") { + d = max(fabs(pd[0]), max(fabs(pd[1]), fabs(pd[2]))); + } + else if (metric == "minkowski") { + d = pow(pow(fabs(pd[0]), e) + pow(fabs(pd[1]), e) + pow(fabs(pd[2]), e), 1.0 / e); + } + + vp += point(xx, yy, zz); + + if (d < da[0]) { + da[3] = da[2]; + da[2] = da[1]; + da[1] = da[0]; + da[0] = d; + + pa[3] = pa[2]; + pa[2] = pa[1]; + pa[1] = pa[0]; + pa[0] = vp; + } + else if (d < da[1]) { + da[3] = da[2]; + da[2] = da[1]; + da[1] = d; + + pa[3] = pa[2]; + pa[2] = pa[1]; + pa[1] = vp; + } + else if (d < da[2]) { + da[3] = da[2]; + da[2] = d; + + pa[3] = pa[2]; + pa[2] = vp; + } + else if (d < da[3]) { + da[3] = d; + pa[3] = vp; + } + } + } + } } /* Voronoi */ shader node_voronoi_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - string coloring = "intensity", - string metric = "distance", - string feature = "F1", - float Exponent = 1.0, - float Scale = 5.0, - point Vector = P, - output float Fac = 0.0, - output color Color = 0.0) + int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + string coloring = "intensity", + string metric = "distance", + string feature = "F1", + float Exponent = 1.0, + float Scale = 5.0, + point Vector = P, + output float Fac = 0.0, + output color Color = 0.0) { - point p = Vector; - - if (use_mapping) - p = transform(mapping, p); - - /* compute distance and point coordinate of 4 nearest neighbours */ - float da[4]; - point pa[4]; - - /* compute distance and point coordinate of 4 nearest neighbours */ - voronoi_m(p * Scale, metric, Exponent, da, pa); - - if (coloring == "intensity") { - /* Intensity output */ - if (feature == "F1") { - Fac = fabs(da[0]); - } - else if (feature == "F2") { - Fac = fabs(da[1]); - } - else if (feature == "F3") { - Fac = fabs(da[2]); - } - else if (feature == "F4") { - Fac = fabs(da[3]); - } - else if (feature == "F2F1") { - Fac = fabs(da[1] - da[0]); - } - Color = color(Fac); - } - else { - /* Color output */ - if (feature == "F1") { - Color = pa[0]; - } - else if (feature == "F2") { - Color = pa[1]; - } - else if (feature == "F3") { - Color = pa[2]; - } - else if (feature == "F4") { - Color = pa[3]; - } - else if (feature == "F2F1") { - Color = fabs(pa[1] - pa[0]); - } - - Color = cellnoise_color(Color); - Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0); - } + point p = Vector; + + if (use_mapping) + p = transform(mapping, p); + + /* compute distance and point coordinate of 4 nearest neighbours */ + float da[4]; + point pa[4]; + + /* compute distance and point coordinate of 4 nearest neighbours */ + voronoi_m(p * Scale, metric, Exponent, da, pa); + + if (coloring == "intensity") { + /* Intensity output */ + if (feature == "F1") { + Fac = fabs(da[0]); + } + else if (feature == "F2") { + Fac = fabs(da[1]); + } + else if (feature == "F3") { + Fac = fabs(da[2]); + } + else if (feature == "F4") { + Fac = fabs(da[3]); + } + else if (feature == "F2F1") { + Fac = fabs(da[1] - da[0]); + } + Color = color(Fac); + } + else { + /* Color output */ + if (feature == "F1") { + Color = pa[0]; + } + else if (feature == "F2") { + Color = pa[1]; + } + else if (feature == "F3") { + Color = pa[2]; + } + else if (feature == "F4") { + Color = pa[3]; + } + else if (feature == "F2F1") { + Color = fabs(pa[1] - pa[0]); + } + + Color = cellnoise_color(Color); + Fac = (Color[0] + Color[1] + Color[2]) * (1.0 / 3.0); + } } - diff --git a/intern/cycles/kernel/shaders/node_voxel_texture.osl b/intern/cycles/kernel/shaders/node_voxel_texture.osl index 9253febd64a..0e4484561d8 100644 --- a/intern/cycles/kernel/shaders/node_voxel_texture.osl +++ b/intern/cycles/kernel/shaders/node_voxel_texture.osl @@ -16,32 +16,30 @@ #include "stdosl.h" -shader node_voxel_texture( - string filename = "", - string interpolation = "linear", - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - point Vector = P, - output float Density = 0, - output color Color = 0) +shader node_voxel_texture(string filename = "", + string interpolation = "linear", + int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + point Vector = P, + output float Density = 0, + output color Color = 0) { - point p = Vector; - if (use_mapping) { - p = transform(mapping, p); - } - else { - p = transform("object", Vector); - matrix tfm; - if (getattribute("geom:generated_transform", tfm)) - p = transform(tfm, p); - } - if (p[0] < 0.0 || p[1] < 0.0 || p[2] < 0.0 || - p[0] > 1.0 || p[1] > 1.0 || p[2] > 1.0) - { - Density = 0; - Color = color(0, 0, 0); - } - else { - Color = (color)texture3d(filename, p, "wrap", "periodic", "interp", interpolation, "alpha", Density); - } + point p = Vector; + if (use_mapping) { + p = transform(mapping, p); + } + else { + p = transform("object", Vector); + matrix tfm; + if (getattribute("geom:generated_transform", tfm)) + p = transform(tfm, p); + } + if (p[0] < 0.0 || p[1] < 0.0 || p[2] < 0.0 || p[0] > 1.0 || p[1] > 1.0 || p[2] > 1.0) { + Density = 0; + Color = color(0, 0, 0); + } + else { + Color = (color)texture3d( + filename, p, "wrap", "periodic", "interp", interpolation, "alpha", Density); + } } diff --git a/intern/cycles/kernel/shaders/node_wave_texture.osl b/intern/cycles/kernel/shaders/node_wave_texture.osl index 71bc9324705..dfc2dbfb800 100644 --- a/intern/cycles/kernel/shaders/node_wave_texture.osl +++ b/intern/cycles/kernel/shaders/node_wave_texture.osl @@ -21,49 +21,47 @@ float wave(point p, string type, string profile, float detail, float distortion, float dscale) { - float n = 0.0; + float n = 0.0; - if (type == "bands") { - n = (p[0] + p[1] + p[2]) * 10.0; - } - else if (type == "rings") { - n = length(p) * 20.0; - } + if (type == "bands") { + n = (p[0] + p[1] + p[2]) * 10.0; + } + else if (type == "rings") { + n = length(p) * 20.0; + } - if (distortion != 0.0) { - n = n + (distortion * noise_turbulence(p * dscale, detail, 0)); - } + if (distortion != 0.0) { + n = n + (distortion * noise_turbulence(p * dscale, detail, 0)); + } - if (profile == "sine") { - return 0.5 + 0.5 * sin(n); - } - else { - /* Saw profile */ - n /= M_2PI; - n -= (int) n; - return (n < 0.0) ? n + 1.0 : n; - } + if (profile == "sine") { + return 0.5 + 0.5 * sin(n); + } + else { + /* Saw profile */ + n /= M_2PI; + n -= (int)n; + return (n < 0.0) ? n + 1.0 : n; + } } -shader node_wave_texture( - int use_mapping = 0, - matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - string type = "bands", - string profile = "sine", - float Scale = 5.0, - float Distortion = 0.0, - float Detail = 2.0, - float DetailScale = 1.0, - point Vector = P, - output float Fac = 0.0, - output color Color = 0.0) +shader node_wave_texture(int use_mapping = 0, + matrix mapping = matrix(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + string type = "bands", + string profile = "sine", + float Scale = 5.0, + float Distortion = 0.0, + float Detail = 2.0, + float DetailScale = 1.0, + point Vector = P, + output float Fac = 0.0, + output color Color = 0.0) { - point p = Vector; + point p = Vector; - if (use_mapping) - p = transform(mapping, p); + if (use_mapping) + p = transform(mapping, p); - Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale); - Color = Fac; + Fac = wave(p * Scale, type, profile, Detail, Distortion, DetailScale); + Color = Fac; } - diff --git a/intern/cycles/kernel/shaders/node_wavelength.osl b/intern/cycles/kernel/shaders/node_wavelength.osl index 79e7043d4bf..c8c6eecb171 100644 --- a/intern/cycles/kernel/shaders/node_wavelength.osl +++ b/intern/cycles/kernel/shaders/node_wavelength.osl @@ -16,10 +16,7 @@ #include "stdosl.h" -shader node_wavelength( - float Wavelength = 500.0, - output color Color = 0.0) +shader node_wavelength(float Wavelength = 500.0, output color Color = 0.0) { - Color = wavelength_color(Wavelength); + Color = wavelength_color(Wavelength); } - diff --git a/intern/cycles/kernel/shaders/node_wireframe.osl b/intern/cycles/kernel/shaders/node_wireframe.osl index 5cc214495dd..ea4bd3a4c87 100644 --- a/intern/cycles/kernel/shaders/node_wireframe.osl +++ b/intern/cycles/kernel/shaders/node_wireframe.osl @@ -17,25 +17,24 @@ #include "stdosl.h" #include "oslutil.h" -shader node_wireframe( - string bump_offset = "center", - int use_pixel_size = 0, - float Size = 0.01, - output float Fac = 0.0) +shader node_wireframe(string bump_offset = "center", + int use_pixel_size = 0, + float Size = 0.01, + output float Fac = 0.0) { - Fac = wireframe("triangles", Size, use_pixel_size); - /* TODO(sergey): Since we can't use autodiff here we do algebraic - * calculation of derivatives by definition. We could probably - * optimize this a bit by doing some extra calculation in wireframe(). - */ - if (bump_offset == "dx") { - point dx = Dx(P); - P -= dx; - Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dx); - } - else if (bump_offset == "dy") { - point dy = Dy(P); - P -= dy; - Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dy); - } + Fac = wireframe("triangles", Size, use_pixel_size); + /* TODO(sergey): Since we can't use autodiff here we do algebraic + * calculation of derivatives by definition. We could probably + * optimize this a bit by doing some extra calculation in wireframe(). + */ + if (bump_offset == "dx") { + point dx = Dx(P); + P -= dx; + Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dx); + } + else if (bump_offset == "dy") { + point dy = Dy(P); + P -= dy; + Fac += (Fac - wireframe("triangles", Size, use_pixel_size)) / length(dy); + } } diff --git a/intern/cycles/kernel/shaders/oslutil.h b/intern/cycles/kernel/shaders/oslutil.h index 592a8ad12d9..d48bfa4a665 100644 --- a/intern/cycles/kernel/shaders/oslutil.h +++ b/intern/cycles/kernel/shaders/oslutil.h @@ -39,57 +39,63 @@ // float wireframe(string edge_type, float line_width, int raster) { - // ray differentials are so big in diffuse context that this function would always return "wire" - if (raytype("path:diffuse")) return 0.0; + // ray differentials are so big in diffuse context that this function would always return "wire" + if (raytype("path:diffuse")) + return 0.0; - int np = 0; - point p[64]; - float pixelWidth = 1; + int np = 0; + point p[64]; + float pixelWidth = 1; - if (edge_type == "triangles") - { - np = 3; - if (!getattribute("geom:trianglevertices", p)) - return 0.0; - } - else if (edge_type == "polygons" || edge_type == "patches") - { - getattribute("geom:numpolyvertices", np); - if (np < 3 || !getattribute("geom:polyvertices", p)) - return 0.0; - } + if (edge_type == "triangles") { + np = 3; + if (!getattribute("geom:trianglevertices", p)) + return 0.0; + } + else if (edge_type == "polygons" || edge_type == "patches") { + getattribute("geom:numpolyvertices", np); + if (np < 3 || !getattribute("geom:polyvertices", p)) + return 0.0; + } - if (raster) - { - // Project the derivatives of P to the viewing plane defined - // by I so we have a measure of how big is a pixel at this point - float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I); - float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I); - // Take the average of both axis' length - pixelWidth = (pixelWidthX + pixelWidthY) / 2; - } + if (raster) { + // Project the derivatives of P to the viewing plane defined + // by I so we have a measure of how big is a pixel at this point + float pixelWidthX = length(Dx(P) - dot(Dx(P), I) * I); + float pixelWidthY = length(Dy(P) - dot(Dy(P), I) * I); + // Take the average of both axis' length + pixelWidth = (pixelWidthX + pixelWidthY) / 2; + } - // Use half the width as the neighbor face will render the - // other half. And take the square for fast comparison - pixelWidth *= 0.5 * line_width; - pixelWidth *= pixelWidth; - for (int i = 0; i < np; i++) - { - int i2 = i ? i - 1 : np - 1; - vector dir = P - p[i]; - vector edge = p[i] - p[i2]; - vector crs = cross(edge, dir); - // At this point dot(crs, crs) / dot(edge, edge) is - // the square of area / length(edge) == square of the - // distance to the edge. - if (dot(crs, crs) < (dot(edge, edge) * pixelWidth)) - return 1; - } - return 0; + // Use half the width as the neighbor face will render the + // other half. And take the square for fast comparison + pixelWidth *= 0.5 * line_width; + pixelWidth *= pixelWidth; + for (int i = 0; i < np; i++) { + int i2 = i ? i - 1 : np - 1; + vector dir = P - p[i]; + vector edge = p[i] - p[i2]; + vector crs = cross(edge, dir); + // At this point dot(crs, crs) / dot(edge, edge) is + // the square of area / length(edge) == square of the + // distance to the edge. + if (dot(crs, crs) < (dot(edge, edge) * pixelWidth)) + return 1; + } + return 0; } -float wireframe(string edge_type, float line_width) { return wireframe(edge_type, line_width, 1); } -float wireframe(string edge_type) { return wireframe(edge_type, 1.0, 1); } -float wireframe() { return wireframe("polygons", 1.0, 1); } +float wireframe(string edge_type, float line_width) +{ + return wireframe(edge_type, line_width, 1); +} +float wireframe(string edge_type) +{ + return wireframe(edge_type, 1.0, 1); +} +float wireframe() +{ + return wireframe("polygons", 1.0, 1); +} -#endif /* CCL_OSLUTIL_H */ +#endif /* CCL_OSLUTIL_H */ diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 7136c746321..9b9720ffff9 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -25,124 +25,215 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ///////////////////////////////////////////////////////////////////////////// - #ifndef CCL_STDOSL_H #define CCL_STDOSL_H - #ifndef M_PI -#define M_PI 3.1415926535897932 /* pi */ -#define M_PI_2 1.5707963267948966 /* pi/2 */ -#define M_PI_4 0.7853981633974483 /* pi/4 */ -#define M_2_PI 0.6366197723675813 /* 2/pi */ -#define M_2PI 6.2831853071795865 /* 2*pi */ -#define M_4PI 12.566370614359173 /* 4*pi */ -#define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */ -#define M_E 2.7182818284590452 /* e (Euler's number) */ -#define M_LN2 0.6931471805599453 /* ln(2) */ -#define M_LN10 2.3025850929940457 /* ln(10) */ -#define M_LOG2E 1.4426950408889634 /* log_2(e) */ -#define M_LOG10E 0.4342944819032518 /* log_10(e) */ -#define M_SQRT2 1.4142135623730950 /* sqrt(2) */ -#define M_SQRT1_2 0.7071067811865475 /* 1/sqrt(2) */ +# define M_PI 3.1415926535897932 /* pi */ +# define M_PI_2 1.5707963267948966 /* pi/2 */ +# define M_PI_4 0.7853981633974483 /* pi/4 */ +# define M_2_PI 0.6366197723675813 /* 2/pi */ +# define M_2PI 6.2831853071795865 /* 2*pi */ +# define M_4PI 12.566370614359173 /* 4*pi */ +# define M_2_SQRTPI 1.1283791670955126 /* 2/sqrt(pi) */ +# define M_E 2.7182818284590452 /* e (Euler's number) */ +# define M_LN2 0.6931471805599453 /* ln(2) */ +# define M_LN10 2.3025850929940457 /* ln(10) */ +# define M_LOG2E 1.4426950408889634 /* log_2(e) */ +# define M_LOG10E 0.4342944819032518 /* log_10(e) */ +# define M_SQRT2 1.4142135623730950 /* sqrt(2) */ +# define M_SQRT1_2 0.7071067811865475 /* 1/sqrt(2) */ #endif - - // Declaration of built-in functions and closures -#define BUILTIN [[ int builtin = 1 ]] +#define BUILTIN [[int builtin = 1]] #define BUILTIN_DERIV [[ int builtin = 1, int deriv = 1 ]] -#define PERCOMP1(name) \ - normal name (normal x) BUILTIN; \ - vector name (vector x) BUILTIN; \ - point name (point x) BUILTIN; \ - color name (color x) BUILTIN; \ - float name (float x) BUILTIN; - -#define PERCOMP2(name) \ - normal name (normal x, normal y) BUILTIN; \ - vector name (vector x, vector y) BUILTIN; \ - point name (point x, point y) BUILTIN; \ - color name (color x, color y) BUILTIN; \ - float name (float x, float y) BUILTIN; - -#define PERCOMP2F(name) \ - normal name (normal x, float y) BUILTIN; \ - vector name (vector x, float y) BUILTIN; \ - point name (point x, float y) BUILTIN; \ - color name (color x, float y) BUILTIN; \ - float name (float x, float y) BUILTIN; - +#define PERCOMP1(name) \ + normal name(normal x) BUILTIN; \ + vector name(vector x) BUILTIN; \ + point name(point x) BUILTIN; \ + color name(color x) BUILTIN; \ + float name(float x) BUILTIN; + +#define PERCOMP2(name) \ + normal name(normal x, normal y) BUILTIN; \ + vector name(vector x, vector y) BUILTIN; \ + point name(point x, point y) BUILTIN; \ + color name(color x, color y) BUILTIN; \ + float name(float x, float y) BUILTIN; + +#define PERCOMP2F(name) \ + normal name(normal x, float y) BUILTIN; \ + vector name(vector x, float y) BUILTIN; \ + point name(point x, float y) BUILTIN; \ + color name(color x, float y) BUILTIN; \ + float name(float x, float y) BUILTIN; // Basic math -normal degrees (normal x) { return x*(180.0/M_PI); } -vector degrees (vector x) { return x*(180.0/M_PI); } -point degrees (point x) { return x*(180.0/M_PI); } -color degrees (color x) { return x*(180.0/M_PI); } -float degrees (float x) { return x*(180.0/M_PI); } -normal radians (normal x) { return x*(M_PI/180.0); } -vector radians (vector x) { return x*(M_PI/180.0); } -point radians (point x) { return x*(M_PI/180.0); } -color radians (color x) { return x*(M_PI/180.0); } -float radians (float x) { return x*(M_PI/180.0); } -PERCOMP1 (cos) -PERCOMP1 (sin) -PERCOMP1 (tan) -PERCOMP1 (acos) -PERCOMP1 (asin) -PERCOMP1 (atan) -PERCOMP2 (atan2) -PERCOMP1 (cosh) -PERCOMP1 (sinh) -PERCOMP1 (tanh) -PERCOMP2F (pow) -PERCOMP1 (exp) -PERCOMP1 (exp2) -PERCOMP1 (expm1) -PERCOMP1 (log) -point log (point a, float b) { return log(a)/log(b); } -vector log (vector a, float b) { return log(a)/log(b); } -color log (color a, float b) { return log(a)/log(b); } -float log (float a, float b) { return log(a)/log(b); } -PERCOMP1 (log2) -PERCOMP1 (log10) -PERCOMP1 (logb) -PERCOMP1 (sqrt) -PERCOMP1 (inversesqrt) -float hypot (float a, float b) { return sqrt (a*a + b*b); } -float hypot (float a, float b, float c) { return sqrt (a*a + b*b + c*c); } -PERCOMP1 (abs) -int abs (int x) BUILTIN; -PERCOMP1 (fabs) -int fabs (int x) BUILTIN; -PERCOMP1 (sign) -PERCOMP1 (floor) -PERCOMP1 (ceil) -PERCOMP1 (round) -PERCOMP1 (trunc) -PERCOMP2 (fmod) -PERCOMP2F (fmod) -int mod (int a, int b) { return a - b*(int)floor(a/b); } -point mod (point a, point b) { return a - b*floor(a/b); } -vector mod (vector a, vector b) { return a - b*floor(a/b); } -normal mod (normal a, normal b) { return a - b*floor(a/b); } -color mod (color a, color b) { return a - b*floor(a/b); } -point mod (point a, float b) { return a - b*floor(a/b); } -vector mod (vector a, float b) { return a - b*floor(a/b); } -normal mod (normal a, float b) { return a - b*floor(a/b); } -color mod (color a, float b) { return a - b*floor(a/b); } -float mod (float a, float b) { return a - b*floor(a/b); } -PERCOMP2 (min) -int min (int a, int b) BUILTIN; -PERCOMP2 (max) -int max (int a, int b) BUILTIN; -normal clamp (normal x, normal minval, normal maxval) { return max(min(x,maxval),minval); } -vector clamp (vector x, vector minval, vector maxval) { return max(min(x,maxval),minval); } -point clamp (point x, point minval, point maxval) { return max(min(x,maxval),minval); } -color clamp (color x, color minval, color maxval) { return max(min(x,maxval),minval); } -float clamp (float x, float minval, float maxval) { return max(min(x,maxval),minval); } -int clamp (int x, int minval, int maxval) { return max(min(x,maxval),minval); } +normal degrees(normal x) +{ + return x * (180.0 / M_PI); +} +vector degrees(vector x) +{ + return x * (180.0 / M_PI); +} +point degrees(point x) +{ + return x * (180.0 / M_PI); +} +color degrees(color x) +{ + return x * (180.0 / M_PI); +} +float degrees(float x) +{ + return x * (180.0 / M_PI); +} +normal radians(normal x) +{ + return x * (M_PI / 180.0); +} +vector radians(vector x) +{ + return x * (M_PI / 180.0); +} +point radians(point x) +{ + return x * (M_PI / 180.0); +} +color radians(color x) +{ + return x * (M_PI / 180.0); +} +float radians(float x) +{ + return x * (M_PI / 180.0); +} +PERCOMP1(cos) +PERCOMP1(sin) +PERCOMP1(tan) +PERCOMP1(acos) +PERCOMP1(asin) +PERCOMP1(atan) +PERCOMP2(atan2) +PERCOMP1(cosh) +PERCOMP1(sinh) +PERCOMP1(tanh) +PERCOMP2F(pow) +PERCOMP1(exp) +PERCOMP1(exp2) +PERCOMP1(expm1) +PERCOMP1(log) +point log(point a, float b) +{ + return log(a) / log(b); +} +vector log(vector a, float b) +{ + return log(a) / log(b); +} +color log(color a, float b) +{ + return log(a) / log(b); +} +float log(float a, float b) +{ + return log(a) / log(b); +} +PERCOMP1(log2) +PERCOMP1(log10) +PERCOMP1(logb) +PERCOMP1(sqrt) +PERCOMP1(inversesqrt) +float hypot(float a, float b) +{ + return sqrt(a * a + b * b); +} +float hypot(float a, float b, float c) +{ + return sqrt(a * a + b * b + c * c); +} +PERCOMP1(abs) +int abs(int x) BUILTIN; +PERCOMP1(fabs) +int fabs(int x) BUILTIN; +PERCOMP1(sign) +PERCOMP1(floor) +PERCOMP1(ceil) +PERCOMP1(round) +PERCOMP1(trunc) +PERCOMP2(fmod) +PERCOMP2F(fmod) +int mod(int a, int b) +{ + return a - b * (int)floor(a / b); +} +point mod(point a, point b) +{ + return a - b * floor(a / b); +} +vector mod(vector a, vector b) +{ + return a - b * floor(a / b); +} +normal mod(normal a, normal b) +{ + return a - b * floor(a / b); +} +color mod(color a, color b) +{ + return a - b * floor(a / b); +} +point mod(point a, float b) +{ + return a - b * floor(a / b); +} +vector mod(vector a, float b) +{ + return a - b * floor(a / b); +} +normal mod(normal a, float b) +{ + return a - b * floor(a / b); +} +color mod(color a, float b) +{ + return a - b * floor(a / b); +} +float mod(float a, float b) +{ + return a - b * floor(a / b); +} +PERCOMP2(min) +int min(int a, int b) BUILTIN; +PERCOMP2(max) +int max(int a, int b) BUILTIN; +normal clamp(normal x, normal minval, normal maxval) +{ + return max(min(x, maxval), minval); +} +vector clamp(vector x, vector minval, vector maxval) +{ + return max(min(x, maxval), minval); +} +point clamp(point x, point minval, point maxval) +{ + return max(min(x, maxval), minval); +} +color clamp(color x, color minval, color maxval) +{ + return max(min(x, maxval), minval); +} +float clamp(float x, float minval, float maxval) +{ + return max(min(x, maxval), minval); +} +int clamp(int x, int minval, int maxval) +{ + return max(min(x, maxval), minval); +} #if 0 normal mix (normal x, normal y, normal a) { return x*(1-a) + y*a; } normal mix (normal x, normal y, float a) { return x*(1-a) + y*a; } @@ -154,102 +245,121 @@ color mix (color x, color y, color a) { return x*(1-a) + y*a; } color mix (color x, color y, float a) { return x*(1-a) + y*a; } float mix (float x, float y, float a) { return x*(1-a) + y*a; } #else -normal mix (normal x, normal y, normal a) BUILTIN; -normal mix (normal x, normal y, float a) BUILTIN; -vector mix (vector x, vector y, vector a) BUILTIN; -vector mix (vector x, vector y, float a) BUILTIN; -point mix (point x, point y, point a) BUILTIN; -point mix (point x, point y, float a) BUILTIN; -color mix (color x, color y, color a) BUILTIN; -color mix (color x, color y, float a) BUILTIN; -float mix (float x, float y, float a) BUILTIN; +normal mix(normal x, normal y, normal a) BUILTIN; +normal mix(normal x, normal y, float a) BUILTIN; +vector mix(vector x, vector y, vector a) BUILTIN; +vector mix(vector x, vector y, float a) BUILTIN; +point mix(point x, point y, point a) BUILTIN; +point mix(point x, point y, float a) BUILTIN; +color mix(color x, color y, color a) BUILTIN; +color mix(color x, color y, float a) BUILTIN; +float mix(float x, float y, float a) BUILTIN; #endif -int isnan (float x) BUILTIN; -int isinf (float x) BUILTIN; -int isfinite (float x) BUILTIN; -float erf (float x) BUILTIN; -float erfc (float x) BUILTIN; +int isnan(float x) BUILTIN; +int isinf(float x) BUILTIN; +int isfinite(float x) BUILTIN; +float erf(float x) BUILTIN; +float erfc(float x) BUILTIN; // Vector functions -vector cross (vector a, vector b) BUILTIN; -float dot (vector a, vector b) BUILTIN; -float length (vector v) BUILTIN; -float distance (point a, point b) BUILTIN; -float distance (point a, point b, point q) -{ - vector d = b - a; - float dd = dot(d, d); - if(dd == 0.0) - return distance(q, a); - float t = dot(q - a, d)/dd; - return distance(q, a + clamp(t, 0.0, 1.0)*d); -} -normal normalize (normal v) BUILTIN; -vector normalize (vector v) BUILTIN; -vector faceforward (vector N, vector I, vector Nref) BUILTIN; -vector faceforward (vector N, vector I) BUILTIN; -vector reflect (vector I, vector N) { return I - 2*dot(N,I)*N; } -vector refract (vector I, vector N, float eta) { - float IdotN = dot (I, N); - float k = 1 - eta*eta * (1 - IdotN*IdotN); - return (k < 0) ? vector(0,0,0) : (eta*I - N * (eta*IdotN + sqrt(k))); -} -void fresnel (vector I, normal N, float eta, - output float Kr, output float Kt, - output vector R, output vector T) -{ - float sqr(float x) { return x*x; } - float c = dot(I, N); - if (c < 0) - c = -c; - R = reflect(I, N); - float g = 1.0 / sqr(eta) - 1.0 + c * c; - if (g >= 0.0) { - g = sqrt (g); - float beta = g - c; - float F = (c * (g+c) - 1.0) / (c * beta + 1.0); - F = 0.5 * (1.0 + sqr(F)); - F *= sqr (beta / (g+c)); - Kr = F; - Kt = (1.0 - Kr) * eta*eta; - // OPT: the following recomputes some of the above values, but it - // gives us the same result as if the shader-writer called refract() - T = refract(I, N, eta); - } else { - // total internal reflection - Kr = 1.0; - Kt = 0.0; - T = vector (0,0,0); - } +vector cross(vector a, vector b) BUILTIN; +float dot(vector a, vector b) BUILTIN; +float length(vector v) BUILTIN; +float distance(point a, point b) BUILTIN; +float distance(point a, point b, point q) +{ + vector d = b - a; + float dd = dot(d, d); + if (dd == 0.0) + return distance(q, a); + float t = dot(q - a, d) / dd; + return distance(q, a + clamp(t, 0.0, 1.0) * d); } - -void fresnel (vector I, normal N, float eta, - output float Kr, output float Kt) +normal normalize(normal v) BUILTIN; +vector normalize(vector v) BUILTIN; +vector faceforward(vector N, vector I, vector Nref) BUILTIN; +vector faceforward(vector N, vector I) BUILTIN; +vector reflect(vector I, vector N) { - vector R, T; - fresnel(I, N, eta, Kr, Kt, R, T); + return I - 2 * dot(N, I) * N; +} +vector refract(vector I, vector N, float eta) +{ + float IdotN = dot(I, N); + float k = 1 - eta * eta * (1 - IdotN * IdotN); + return (k < 0) ? vector(0, 0, 0) : (eta * I - N * (eta * IdotN + sqrt(k))); +} +void fresnel(vector I, + normal N, + float eta, + output float Kr, + output float Kt, + output vector R, + output vector T) +{ + float sqr(float x) + { + return x * x; + } + float c = dot(I, N); + if (c < 0) + c = -c; + R = reflect(I, N); + float g = 1.0 / sqr(eta) - 1.0 + c * c; + if (g >= 0.0) { + g = sqrt(g); + float beta = g - c; + float F = (c * (g + c) - 1.0) / (c * beta + 1.0); + F = 0.5 * (1.0 + sqr(F)); + F *= sqr(beta / (g + c)); + Kr = F; + Kt = (1.0 - Kr) * eta * eta; + // OPT: the following recomputes some of the above values, but it + // gives us the same result as if the shader-writer called refract() + T = refract(I, N, eta); + } + else { + // total internal reflection + Kr = 1.0; + Kt = 0.0; + T = vector(0, 0, 0); + } } +void fresnel(vector I, normal N, float eta, output float Kr, output float Kt) +{ + vector R, T; + fresnel(I, N, eta, Kr, Kt, R, T); +} -normal transform (matrix Mto, normal p) BUILTIN; -vector transform (matrix Mto, vector p) BUILTIN; -point transform (matrix Mto, point p) BUILTIN; -normal transform (string from, string to, normal p) BUILTIN; -vector transform (string from, string to, vector p) BUILTIN; -point transform (string from, string to, point p) BUILTIN; -normal transform (string to, normal p) { return transform("common",to,p); } -vector transform (string to, vector p) { return transform("common",to,p); } -point transform (string to, point p) { return transform("common",to,p); } +normal transform(matrix Mto, normal p) BUILTIN; +vector transform(matrix Mto, vector p) BUILTIN; +point transform(matrix Mto, point p) BUILTIN; +normal transform(string from, string to, normal p) BUILTIN; +vector transform(string from, string to, vector p) BUILTIN; +point transform(string from, string to, point p) BUILTIN; +normal transform(string to, normal p) +{ + return transform("common", to, p); +} +vector transform(string to, vector p) +{ + return transform("common", to, p); +} +point transform(string to, point p) +{ + return transform("common", to, p); +} -float transformu (string tounits, float x) BUILTIN; -float transformu (string fromunits, string tounits, float x) BUILTIN; +float transformu(string tounits, float x) BUILTIN; +float transformu(string fromunits, string tounits, float x) BUILTIN; -point rotate (point p, float angle, point a, point b) +point rotate(point p, float angle, point a, point b) { - vector axis = normalize (b - a); - float cosang, sinang; - /* Older OSX has major issues with sincos() function, + vector axis = normalize(b - a); + float cosang, sinang; + /* Older OSX has major issues with sincos() function, * it's likely a big in OSL or LLVM. For until we've * updated to new versions of this libraries we'll * use a workaround to prevent possible crashes on all @@ -261,317 +371,348 @@ point rotate (point p, float angle, point a, point b) #if 0 sincos (angle, sinang, cosang); #else - sinang = sin (angle); - cosang = cos (angle); + sinang = sin(angle); + cosang = cos(angle); #endif - float cosang1 = 1.0 - cosang; - float x = axis[0], y = axis[1], z = axis[2]; - matrix M = matrix (x * x + (1.0 - x * x) * cosang, - x * y * cosang1 + z * sinang, - x * z * cosang1 - y * sinang, - 0.0, - x * y * cosang1 - z * sinang, - y * y + (1.0 - y * y) * cosang, - y * z * cosang1 + x * sinang, - 0.0, - x * z * cosang1 + y * sinang, - y * z * cosang1 - x * sinang, - z * z + (1.0 - z * z) * cosang, - 0.0, - 0.0, 0.0, 0.0, 1.0); - return transform (M, p-a) + a; + float cosang1 = 1.0 - cosang; + float x = axis[0], y = axis[1], z = axis[2]; + matrix M = matrix(x * x + (1.0 - x * x) * cosang, + x * y * cosang1 + z * sinang, + x * z * cosang1 - y * sinang, + 0.0, + x * y * cosang1 - z * sinang, + y * y + (1.0 - y * y) * cosang, + y * z * cosang1 + x * sinang, + 0.0, + x * z * cosang1 + y * sinang, + y * z * cosang1 - x * sinang, + z * z + (1.0 - z * z) * cosang, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0); + return transform(M, p - a) + a; } normal ensure_valid_reflection(normal Ng, vector I, normal N) { - /* The implementation here mirrors the one in kernel_montecarlo.h, + /* The implementation here mirrors the one in kernel_montecarlo.h, * check there for an explanation of the algorithm. */ - float sqr(float x) { return x*x; } - - vector R = 2*dot(N, I)*N - I; + float sqr(float x) + { + return x * x; + } - float threshold = min(0.9*dot(Ng, I), 0.01); - if(dot(Ng, R) >= threshold) { - return N; - } + vector R = 2 * dot(N, I) * N - I; - float NdotNg = dot(N, Ng); - vector X = normalize(N - NdotNg*Ng); + float threshold = min(0.9 * dot(Ng, I), 0.01); + if (dot(Ng, R) >= threshold) { + return N; + } - float Ix = dot(I, X), Iz = dot(I, Ng); - float Ix2 = sqr(Ix), Iz2 = sqr(Iz); - float a = Ix2 + Iz2; + float NdotNg = dot(N, Ng); + vector X = normalize(N - NdotNg * Ng); - float b = sqrt(Ix2*(a - sqr(threshold))); - float c = Iz*threshold + a; + float Ix = dot(I, X), Iz = dot(I, Ng); + float Ix2 = sqr(Ix), Iz2 = sqr(Iz); + float a = Ix2 + Iz2; - float fac = 0.5/a; - float N1_z2 = fac*(b+c), N2_z2 = fac*(-b+c); - int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5)); - int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5)); + float b = sqrt(Ix2 * (a - sqr(threshold))); + float c = Iz * threshold + a; - float N_new_x, N_new_z; - if(valid1 && valid2) { - float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2); - float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2); + float fac = 0.5 / a; + float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c); + int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5)); + int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5)); - float R1 = 2*(N1_x*Ix + N1_z*Iz)*N1_z - Iz; - float R2 = 2*(N2_x*Ix + N2_z*Iz)*N2_z - Iz; + float N_new_x, N_new_z; + if (valid1 && valid2) { + float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2); + float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2); - valid1 = (R1 >= 1e-5); - valid2 = (R2 >= 1e-5); - if(valid1 && valid2) { - N_new_x = (R1 < R2)? N1_x : N2_x; - N_new_z = (R1 < R2)? N1_z : N2_z; - } - else { - N_new_x = (R1 > R2)? N1_x : N2_x; - N_new_z = (R1 > R2)? N1_z : N2_z; - } + float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz; + float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz; - } - else if(valid1 || valid2) { - float Nz2 = valid1? N1_z2 : N2_z2; - N_new_x = sqrt(1.0 - Nz2); - N_new_z = sqrt(Nz2); + valid1 = (R1 >= 1e-5); + valid2 = (R2 >= 1e-5); + if (valid1 && valid2) { + N_new_x = (R1 < R2) ? N1_x : N2_x; + N_new_z = (R1 < R2) ? N1_z : N2_z; } else { - return Ng; + N_new_x = (R1 > R2) ? N1_x : N2_x; + N_new_z = (R1 > R2) ? N1_z : N2_z; } - - return N_new_x*X + N_new_z*Ng; + } + else if (valid1 || valid2) { + float Nz2 = valid1 ? N1_z2 : N2_z2; + N_new_x = sqrt(1.0 - Nz2); + N_new_z = sqrt(Nz2); + } + else { + return Ng; + } + + return N_new_x * X + N_new_z * Ng; } - // Color functions -float luminance (color c) BUILTIN; -color blackbody (float temperatureK) BUILTIN; -color wavelength_color (float wavelength_nm) BUILTIN; - - -color transformc (string to, color x) -{ - color rgb_to_hsv (color rgb) { // See Foley & van Dam - float r = rgb[0], g = rgb[1], b = rgb[2]; - float mincomp = min (r, min (g, b)); - float maxcomp = max (r, max (g, b)); - float delta = maxcomp - mincomp; // chroma - float h, s, v; - v = maxcomp; - if (maxcomp > 0) - s = delta / maxcomp; - else s = 0; - if (s <= 0) - h = 0; - else { - if (r >= maxcomp) h = (g-b) / delta; - else if (g >= maxcomp) h = 2 + (b-r) / delta; - else h = 4 + (r-g) / delta; - h /= 6; - if (h < 0) - h += 1; - } - return color (h, s, v); - } - - color rgb_to_hsl (color rgb) { // See Foley & van Dam - // First convert rgb to hsv, then to hsl - float minval = min (rgb[0], min (rgb[1], rgb[2])); - color hsv = rgb_to_hsv (rgb); - float maxval = hsv[2]; // v == maxval - float h = hsv[0], s, l = (minval+maxval) / 2; - if (minval == maxval) - s = 0; // special 'achromatic' case, hue is 0 - else if (l <= 0.5) - s = (maxval - minval) / (maxval + minval); - else - s = (maxval - minval) / (2 - maxval - minval); - return color (h, s, l); - } +float luminance(color c) BUILTIN; +color blackbody(float temperatureK) BUILTIN; +color wavelength_color(float wavelength_nm) BUILTIN; - color r; - if (to == "rgb" || to == "RGB") - r = x; - else if (to == "hsv") - r = rgb_to_hsv (x); - else if (to == "hsl") - r = rgb_to_hsl (x); - else if (to == "YIQ") - r = color (dot (vector(0.299, 0.587, 0.114), (vector)x), - dot (vector(0.596, -0.275, -0.321), (vector)x), - dot (vector(0.212, -0.523, 0.311), (vector)x)); - else if (to == "XYZ") - r = color (dot (vector(0.412453, 0.357580, 0.180423), (vector)x), - dot (vector(0.212671, 0.715160, 0.072169), (vector)x), - dot (vector(0.019334, 0.119193, 0.950227), (vector)x)); +color transformc(string to, color x) +{ + color rgb_to_hsv(color rgb) + { // See Foley & van Dam + float r = rgb[0], g = rgb[1], b = rgb[2]; + float mincomp = min(r, min(g, b)); + float maxcomp = max(r, max(g, b)); + float delta = maxcomp - mincomp; // chroma + float h, s, v; + v = maxcomp; + if (maxcomp > 0) + s = delta / maxcomp; + else + s = 0; + if (s <= 0) + h = 0; else { - error ("Unknown color space \"%s\"", to); - r = x; + if (r >= maxcomp) + h = (g - b) / delta; + else if (g >= maxcomp) + h = 2 + (b - r) / delta; + else + h = 4 + (r - g) / delta; + h /= 6; + if (h < 0) + h += 1; } - return r; + return color(h, s, v); + } + + color rgb_to_hsl(color rgb) + { // See Foley & van Dam + // First convert rgb to hsv, then to hsl + float minval = min(rgb[0], min(rgb[1], rgb[2])); + color hsv = rgb_to_hsv(rgb); + float maxval = hsv[2]; // v == maxval + float h = hsv[0], s, l = (minval + maxval) / 2; + if (minval == maxval) + s = 0; // special 'achromatic' case, hue is 0 + else if (l <= 0.5) + s = (maxval - minval) / (maxval + minval); + else + s = (maxval - minval) / (2 - maxval - minval); + return color(h, s, l); + } + + color r; + if (to == "rgb" || to == "RGB") + r = x; + else if (to == "hsv") + r = rgb_to_hsv(x); + else if (to == "hsl") + r = rgb_to_hsl(x); + else if (to == "YIQ") + r = color(dot(vector(0.299, 0.587, 0.114), (vector)x), + dot(vector(0.596, -0.275, -0.321), (vector)x), + dot(vector(0.212, -0.523, 0.311), (vector)x)); + else if (to == "XYZ") + r = color(dot(vector(0.412453, 0.357580, 0.180423), (vector)x), + dot(vector(0.212671, 0.715160, 0.072169), (vector)x), + dot(vector(0.019334, 0.119193, 0.950227), (vector)x)); + else { + error("Unknown color space \"%s\"", to); + r = x; + } + return r; } - -color transformc (string from, string to, color x) -{ - color hsv_to_rgb (color c) { // Reference: Foley & van Dam - float h = c[0], s = c[1], v = c[2]; - color r; - if (s < 0.0001) { - r = v; - } else { - h = 6 * (h - floor(h)); // expand to [0..6) - int hi = (int)h; - float f = h - hi; - float p = v * (1-s); - float q = v * (1-s*f); - float t = v * (1-s*(1-f)); - if (hi == 0) r = color (v, t, p); - else if (hi == 1) r = color (q, v, p); - else if (hi == 2) r = color (p, v, t); - else if (hi == 3) r = color (p, q, v); - else if (hi == 4) r = color (t, p, v); - else r = color (v, p, q); - } - return r; +color transformc(string from, string to, color x) +{ + color hsv_to_rgb(color c) + { // Reference: Foley & van Dam + float h = c[0], s = c[1], v = c[2]; + color r; + if (s < 0.0001) { + r = v; } - - color hsl_to_rgb (color c) { - float h = c[0], s = c[1], l = c[2]; - // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam) - float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s); - color r; - if (v <= 0) { - r = 0; - } else { - float min = 2 * l - v; - s = (v - min) / v; - r = hsv_to_rgb (color (h, s, v)); - } - return r; + else { + h = 6 * (h - floor(h)); // expand to [0..6) + int hi = (int)h; + float f = h - hi; + float p = v * (1 - s); + float q = v * (1 - s * f); + float t = v * (1 - s * (1 - f)); + if (hi == 0) + r = color(v, t, p); + else if (hi == 1) + r = color(q, v, p); + else if (hi == 2) + r = color(p, v, t); + else if (hi == 3) + r = color(p, q, v); + else if (hi == 4) + r = color(t, p, v); + else + r = color(v, p, q); } + return r; + } + color hsl_to_rgb(color c) + { + float h = c[0], s = c[1], l = c[2]; + // Easiest to convert hsl -> hsv, then hsv -> RGB (per Foley & van Dam) + float v = (l <= 0.5) ? (l * (1 + s)) : (l * (1 - s) + s); color r; - if (from == "rgb" || from == "RGB") - r = x; - else if (from == "hsv") - r = hsv_to_rgb (x); - else if (from == "hsl") - r = hsl_to_rgb (x); - else if (from == "YIQ") - r = color (dot (vector(1, 0.9557, 0.6199), (vector)x), - dot (vector(1, -0.2716, -0.6469), (vector)x), - dot (vector(1, -1.1082, 1.7051), (vector)x)); - else if (from == "XYZ") - r = color (dot (vector( 3.240479, -1.537150, -0.498535), (vector)x), - dot (vector(-0.969256, 1.875991, 0.041556), (vector)x), - dot (vector( 0.055648, -0.204043, 1.057311), (vector)x)); + if (v <= 0) { + r = 0; + } else { - error ("Unknown color space \"%s\"", to); - r = x; + float min = 2 * l - v; + s = (v - min) / v; + r = hsv_to_rgb(color(h, s, v)); } - return transformc (to, r); + return r; + } + + color r; + if (from == "rgb" || from == "RGB") + r = x; + else if (from == "hsv") + r = hsv_to_rgb(x); + else if (from == "hsl") + r = hsl_to_rgb(x); + else if (from == "YIQ") + r = color(dot(vector(1, 0.9557, 0.6199), (vector)x), + dot(vector(1, -0.2716, -0.6469), (vector)x), + dot(vector(1, -1.1082, 1.7051), (vector)x)); + else if (from == "XYZ") + r = color(dot(vector(3.240479, -1.537150, -0.498535), (vector)x), + dot(vector(-0.969256, 1.875991, 0.041556), (vector)x), + dot(vector(0.055648, -0.204043, 1.057311), (vector)x)); + else { + error("Unknown color space \"%s\"", to); + r = x; + } + return transformc(to, r); } - - // Matrix functions -float determinant (matrix m) BUILTIN; -matrix transpose (matrix m) BUILTIN; +float determinant(matrix m) BUILTIN; +matrix transpose(matrix m) BUILTIN; +// Pattern generation +color step(color edge, color x) BUILTIN; +point step(point edge, point x) BUILTIN; +vector step(vector edge, vector x) BUILTIN; +normal step(normal edge, normal x) BUILTIN; +float step(float edge, float x) BUILTIN; +float smoothstep(float edge0, float edge1, float x) BUILTIN; -// Pattern generation +float linearstep(float edge0, float edge1, float x) +{ + float result; + if (edge0 != edge1) { + float xclamped = clamp(x, edge0, edge1); + result = (xclamped - edge0) / (edge1 - edge0); + } + else { // special case: edges coincide + result = step(edge0, x); + } + return result; +} -color step (color edge, color x) BUILTIN; -point step (point edge, point x) BUILTIN; -vector step (vector edge, vector x) BUILTIN; -normal step (normal edge, normal x) BUILTIN; -float step (float edge, float x) BUILTIN; -float smoothstep (float edge0, float edge1, float x) BUILTIN; - -float linearstep (float edge0, float edge1, float x) { - float result; - if (edge0 != edge1) { - float xclamped = clamp (x, edge0, edge1); - result = (xclamped - edge0) / (edge1 - edge0); - } else { // special case: edges coincide - result = step (edge0, x); - } - return result; -} - -float smooth_linearstep (float edge0, float edge1, float x_, float eps_) { - float result; - if (edge0 != edge1) { - float rampup (float x, float r) { return 0.5/r * x*x; } - float width_inv = 1.0 / (edge1 - edge0); - float eps = eps_ * width_inv; - float x = (x_ - edge0) * width_inv; - if (x <= -eps) result = 0; - else if (x >= eps && x <= 1.0-eps) result = x; - else if (x >= 1.0+eps) result = 1; - else if (x < eps) result = rampup (x+eps, 2.0*eps); - else /* if (x < 1.0+eps) */ result = 1.0 - rampup (1.0+eps - x, 2.0*eps); - } else { - result = step (edge0, x_); +float smooth_linearstep(float edge0, float edge1, float x_, float eps_) +{ + float result; + if (edge0 != edge1) { + float rampup(float x, float r) + { + return 0.5 / r * x * x; } - return result; + float width_inv = 1.0 / (edge1 - edge0); + float eps = eps_ * width_inv; + float x = (x_ - edge0) * width_inv; + if (x <= -eps) + result = 0; + else if (x >= eps && x <= 1.0 - eps) + result = x; + else if (x >= 1.0 + eps) + result = 1; + else if (x < eps) + result = rampup(x + eps, 2.0 * eps); + else /* if (x < 1.0+eps) */ + result = 1.0 - rampup(1.0 + eps - x, 2.0 * eps); + } + else { + result = step(edge0, x_); + } + return result; } -float aastep (float edge, float s, float dedge, float ds) { - // Box filtered AA step - float width = fabs(dedge) + fabs(ds); - float halfwidth = 0.5*width; - float e1 = edge-halfwidth; - return (s <= e1) ? 0.0 : ((s >= (edge+halfwidth)) ? 1.0 : (s-e1)/width); +float aastep(float edge, float s, float dedge, float ds) +{ + // Box filtered AA step + float width = fabs(dedge) + fabs(ds); + float halfwidth = 0.5 * width; + float e1 = edge - halfwidth; + return (s <= e1) ? 0.0 : ((s >= (edge + halfwidth)) ? 1.0 : (s - e1) / width); } -float aastep (float edge, float s, float ds) { - return aastep (edge, s, filterwidth(edge), ds); +float aastep(float edge, float s, float ds) +{ + return aastep(edge, s, filterwidth(edge), ds); } -float aastep (float edge, float s) { - return aastep (edge, s, filterwidth(edge), filterwidth(s)); +float aastep(float edge, float s) +{ + return aastep(edge, s, filterwidth(edge), filterwidth(s)); } - // Derivatives and area operators - // Displacement functions - // String functions -int strlen (string s) BUILTIN; -int hash (string s) BUILTIN; -int getchar (string s, int index) BUILTIN; -int startswith (string s, string prefix) BUILTIN; -int endswith (string s, string suffix) BUILTIN; -string substr (string s, int start, int len) BUILTIN; -string substr (string s, int start) { return substr (s, start, strlen(s)); } -float stof (string str) BUILTIN; -int stoi (string str) BUILTIN; +int strlen(string s) BUILTIN; +int hash(string s) BUILTIN; +int getchar(string s, int index) BUILTIN; +int startswith(string s, string prefix) BUILTIN; +int endswith(string s, string suffix) BUILTIN; +string substr(string s, int start, int len) BUILTIN; +string substr(string s, int start) +{ + return substr(s, start, strlen(s)); +} +float stof(string str) BUILTIN; +int stoi(string str) BUILTIN; // Define concat in terms of shorter concat -string concat (string a, string b, string c) { - return concat(concat(a,b), c); +string concat(string a, string b, string c) +{ + return concat(concat(a, b), c); } -string concat (string a, string b, string c, string d) { - return concat(concat(a,b,c), d); +string concat(string a, string b, string c, string d) +{ + return concat(concat(a, b, c), d); } -string concat (string a, string b, string c, string d, string e) { - return concat(concat(a,b,c,d), e); +string concat(string a, string b, string c, string d, string e) +{ + return concat(concat(a, b, c, d), e); } -string concat (string a, string b, string c, string d, string e, string f) { - return concat(concat(a,b,c,d,e), f); +string concat(string a, string b, string c, string d, string e, string f) +{ + return concat(concat(a, b, c, d, e), f); } - // Texture - // Closures closure color diffuse(normal N) BUILTIN; @@ -591,14 +732,18 @@ closure color microfacet_multi_ggx(normal N, float ag, color C) BUILTIN; closure color microfacet_multi_ggx_aniso(normal N, vector T, float ax, float ay, color C) BUILTIN; closure color microfacet_multi_ggx_glass(normal N, float ag, float eta, color C) BUILTIN; closure color microfacet_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; -closure color microfacet_ggx_aniso_fresnel(normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; -closure color microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; -closure color microfacet_multi_ggx_aniso_fresnel(normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; -closure color microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; +closure color microfacet_ggx_aniso_fresnel( + normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; +closure color +microfacet_multi_ggx_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; +closure color microfacet_multi_ggx_aniso_fresnel( + normal N, vector T, float ax, float ay, float eta, color C, color Cspec0) BUILTIN; +closure color +microfacet_multi_ggx_glass_fresnel(normal N, float ag, float eta, color C, color Cspec0) BUILTIN; closure color microfacet_beckmann(normal N, float ab) BUILTIN; closure color microfacet_beckmann_aniso(normal N, vector T, float ax, float ay) BUILTIN; closure color microfacet_beckmann_refraction(normal N, float ab, float eta) BUILTIN; -closure color ashikhmin_shirley(normal N, vector T,float ax, float ay) BUILTIN; +closure color ashikhmin_shirley(normal N, vector T, float ax, float ay) BUILTIN; closure color ashikhmin_velvet(normal N, float sigma) BUILTIN; closure color emission() BUILTIN; closure color background() BUILTIN; @@ -612,78 +757,97 @@ closure color principled_clearcoat(normal N, float clearcoat, float clearcoat_ro closure color bssrdf(string method, normal N, vector radius, color albedo) BUILTIN; // Hair -closure color hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; -closure color hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; -closure color principled_hair(normal N, color sigma, float roughnessu, float roughnessv, float coat, float alpha, float eta) BUILTIN; +closure color +hair_reflection(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; +closure color +hair_transmission(normal N, float roughnessu, float roughnessv, vector T, float offset) BUILTIN; +closure color principled_hair(normal N, + color sigma, + float roughnessu, + float roughnessv, + float coat, + float alpha, + float eta) BUILTIN; // Volume closure color henyey_greenstein(float g) BUILTIN; closure color absorption() BUILTIN; // OSL 1.5 Microfacet functions -closure color microfacet(string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract) { - /* GGX */ - if (distribution == "ggx" || distribution == "default") { - if (!refract) { - if (xalpha == yalpha) { - /* Isotropic */ - return microfacet_ggx(N, xalpha); - } - else { - /* Anisotropic */ - return microfacet_ggx_aniso(N, U, xalpha, yalpha); - } - } - else { - return microfacet_ggx_refraction(N, xalpha, eta); - } - } - /* Beckmann */ - else { - if (!refract) { - if (xalpha == yalpha) { - /* Isotropic */ - return microfacet_beckmann(N, xalpha); - } - else { - /* Anisotropic */ - return microfacet_beckmann_aniso(N, U, xalpha, yalpha); - } - } - else { - return microfacet_beckmann_refraction(N, xalpha, eta); - } - } -} - -closure color microfacet (string distribution, normal N, float alpha, float eta, int refract) { - return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract); +closure color microfacet( + string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract) +{ + /* GGX */ + if (distribution == "ggx" || distribution == "default") { + if (!refract) { + if (xalpha == yalpha) { + /* Isotropic */ + return microfacet_ggx(N, xalpha); + } + else { + /* Anisotropic */ + return microfacet_ggx_aniso(N, U, xalpha, yalpha); + } + } + else { + return microfacet_ggx_refraction(N, xalpha, eta); + } + } + /* Beckmann */ + else { + if (!refract) { + if (xalpha == yalpha) { + /* Isotropic */ + return microfacet_beckmann(N, xalpha); + } + else { + /* Anisotropic */ + return microfacet_beckmann_aniso(N, U, xalpha, yalpha); + } + } + else { + return microfacet_beckmann_refraction(N, xalpha, eta); + } + } } +closure color microfacet(string distribution, normal N, float alpha, float eta, int refract) +{ + return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract); +} // Renderer state -int backfacing () BUILTIN; -int raytype (string typename) BUILTIN; +int backfacing() BUILTIN; +int raytype(string typename) BUILTIN; // the individual 'isFOOray' functions are deprecated -int iscameraray () { return raytype("camera"); } -int isdiffuseray () { return raytype("diffuse"); } -int isglossyray () { return raytype("glossy"); } -int isshadowray () { return raytype("shadow"); } -int getmatrix (string fromspace, string tospace, output matrix M) BUILTIN; -int getmatrix (string fromspace, output matrix M) { - return getmatrix (fromspace, "common", M); +int iscameraray() +{ + return raytype("camera"); +} +int isdiffuseray() +{ + return raytype("diffuse"); +} +int isglossyray() +{ + return raytype("glossy"); +} +int isshadowray() +{ + return raytype("shadow"); +} +int getmatrix(string fromspace, string tospace, output matrix M) BUILTIN; +int getmatrix(string fromspace, output matrix M) +{ + return getmatrix(fromspace, "common", M); } - // Miscellaneous - - - #undef BUILTIN #undef BUILTIN_DERIV #undef PERCOMP1 #undef PERCOMP2 #undef PERCOMP2F -#endif /* CCL_STDOSL_H */ +#endif /* CCL_STDOSL_H */ diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h index ed0a82067f1..e08d87ab618 100644 --- a/intern/cycles/kernel/split/kernel_branched.h +++ b/intern/cycles/kernel/split/kernel_branched.h @@ -19,215 +19,213 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ /* sets up the various state needed to do an indirect loop */ -ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, + int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - /* save a copy of the state to restore later */ -#define BRANCHED_STORE(name) \ - branched_state->name = kernel_split_state.name[ray_index]; + /* save a copy of the state to restore later */ +# define BRANCHED_STORE(name) branched_state->name = kernel_split_state.name[ray_index]; - BRANCHED_STORE(path_state); - BRANCHED_STORE(throughput); - BRANCHED_STORE(ray); - BRANCHED_STORE(isect); - BRANCHED_STORE(ray_state); + BRANCHED_STORE(path_state); + BRANCHED_STORE(throughput); + BRANCHED_STORE(ray); + BRANCHED_STORE(isect); + BRANCHED_STORE(ray_state); - *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index); - for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { - kernel_split_sd(branched_state_sd, ray_index)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i]; - } + *kernel_split_sd(branched_state_sd, ray_index) = *kernel_split_sd(sd, ray_index); + for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { + kernel_split_sd(branched_state_sd, ray_index)->closure[i] = + kernel_split_sd(sd, ray_index)->closure[i]; + } -#undef BRANCHED_STORE +# undef BRANCHED_STORE - /* set loop counters to intial position */ - branched_state->next_closure = 0; - branched_state->next_sample = 0; + /* set loop counters to intial position */ + branched_state->next_closure = 0; + branched_state->next_sample = 0; } /* ends an indirect loop and restores the previous state */ -ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, + int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - /* restore state */ -#define BRANCHED_RESTORE(name) \ - kernel_split_state.name[ray_index] = branched_state->name; + /* restore state */ +# define BRANCHED_RESTORE(name) kernel_split_state.name[ray_index] = branched_state->name; - BRANCHED_RESTORE(path_state); - BRANCHED_RESTORE(throughput); - BRANCHED_RESTORE(ray); - BRANCHED_RESTORE(isect); - BRANCHED_RESTORE(ray_state); + BRANCHED_RESTORE(path_state); + BRANCHED_RESTORE(throughput); + BRANCHED_RESTORE(ray); + BRANCHED_RESTORE(isect); + BRANCHED_RESTORE(ray_state); - *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index); - for(int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { - kernel_split_sd(sd, ray_index)->closure[i] = kernel_split_sd(branched_state_sd, ray_index)->closure[i]; - } + *kernel_split_sd(sd, ray_index) = *kernel_split_sd(branched_state_sd, ray_index); + for (int i = 0; i < kernel_split_sd(branched_state_sd, ray_index)->num_closure; i++) { + kernel_split_sd(sd, ray_index)->closure[i] = + kernel_split_sd(branched_state_sd, ray_index)->closure[i]; + } -#undef BRANCHED_RESTORE +# undef BRANCHED_RESTORE - /* leave indirect loop */ - REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT); + /* leave indirect loop */ + REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT); } -ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, int ray_index) +ccl_device_inline bool kernel_split_branched_indirect_start_shared(KernelGlobals *kg, + int ray_index) { - ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global char *ray_state = kernel_split_state.ray_state; - int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, kernel_split_params.queue_index); + int inactive_ray = dequeue_ray_index(QUEUE_INACTIVE_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + kernel_split_params.queue_index); - if(!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) { - return false; - } + if (!IS_STATE(ray_state, inactive_ray, RAY_INACTIVE)) { + return false; + } -#define SPLIT_DATA_ENTRY(type, name, num) \ - if(num) { \ - kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \ - } - SPLIT_DATA_ENTRIES_BRANCHED_SHARED -#undef SPLIT_DATA_ENTRY +# define SPLIT_DATA_ENTRY(type, name, num) \ + if (num) { \ + kernel_split_state.name[inactive_ray] = kernel_split_state.name[ray_index]; \ + } + SPLIT_DATA_ENTRIES_BRANCHED_SHARED +# undef SPLIT_DATA_ENTRY - *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index); - for(int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) { - kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i]; - } + *kernel_split_sd(sd, inactive_ray) = *kernel_split_sd(sd, ray_index); + for (int i = 0; i < kernel_split_sd(sd, ray_index)->num_closure; i++) { + kernel_split_sd(sd, inactive_ray)->closure[i] = kernel_split_sd(sd, ray_index)->closure[i]; + } - kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0; - kernel_split_state.branched_state[inactive_ray].original_ray = ray_index; - kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false; + kernel_split_state.branched_state[inactive_ray].shared_sample_count = 0; + kernel_split_state.branched_state[inactive_ray].original_ray = ray_index; + kernel_split_state.branched_state[inactive_ray].waiting_on_shared_samples = false; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *inactive_L = &kernel_split_state.path_radiance[inactive_ray]; - path_radiance_init(inactive_L, kernel_data.film.use_light_pass); - path_radiance_copy_indirect(inactive_L, L); + path_radiance_init(inactive_L, kernel_data.film.use_light_pass); + path_radiance_copy_indirect(inactive_L, L); - ray_state[inactive_ray] = RAY_REGENERATED; - ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED); - ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)); + ray_state[inactive_ray] = RAY_REGENERATED; + ADD_RAY_FLAG(ray_state, inactive_ray, RAY_BRANCHED_INDIRECT_SHARED); + ADD_RAY_FLAG(ray_state, inactive_ray, IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)); - atomic_fetch_and_inc_uint32((ccl_global uint*)&kernel_split_state.branched_state[ray_index].shared_sample_count); + atomic_fetch_and_inc_uint32( + (ccl_global uint *)&kernel_split_state.branched_state[ray_index].shared_sample_count); - return true; + return true; } /* bounce off surface and integrate indirect light */ -ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg, - int ray_index, - float num_samples_adjust, - ShaderData *saved_sd, - bool reset_path_state, - bool wait_for_shared) +ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter( + KernelGlobals *kg, + int ray_index, + float num_samples_adjust, + ShaderData *saved_sd, + bool reset_path_state, + bool wait_for_shared) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - - ShaderData *sd = saved_sd; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - float3 throughput = branched_state->throughput; - ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; - - float sum_sample_weight = 0.0f; -#ifdef __DENOISING_FEATURES__ - if(ps->denoising_feature_weight > 0.0f) { - for(int i = 0; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - /* transparency is not handled here, but in outer loop */ - if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { - continue; - } - - sum_sample_weight += sc->sample_weight; - } - } - else { - sum_sample_weight = 1.0f; - } -#endif /* __DENOISING_FEATURES__ */ - - for(int i = branched_state->next_closure; i < sd->num_closure; i++) { - const ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSDF(sc->type)) - continue; - /* transparency is not handled here, but in outer loop */ - if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID) - continue; - - int num_samples; - - if(CLOSURE_IS_BSDF_DIFFUSE(sc->type)) - num_samples = kernel_data.integrator.diffuse_samples; - else if(CLOSURE_IS_BSDF_BSSRDF(sc->type)) - num_samples = 1; - else if(CLOSURE_IS_BSDF_GLOSSY(sc->type)) - num_samples = kernel_data.integrator.glossy_samples; - else - num_samples = kernel_data.integrator.transmission_samples; - - num_samples = ceil_to_int(num_samples_adjust*num_samples); - - float num_samples_inv = num_samples_adjust/num_samples; - - for(int j = branched_state->next_sample; j < num_samples; j++) { - if(reset_path_state) { - *ps = branched_state->path_state; - } - - ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); - - ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; - *tp = throughput; - - ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index]; - - if(!kernel_branched_path_surface_bounce(kg, - sd, - sc, - j, - num_samples, - tp, - ps, - &L->state, - bsdf_ray, - sum_sample_weight)) - { - continue; - } - - ps->rng_hash = branched_state->path_state.rng_hash; - - /* update state for next iteration */ - branched_state->next_closure = i; - branched_state->next_sample = j+1; - - /* start the indirect path */ - *tp *= num_samples_inv; - - if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { - continue; - } - - return true; - } - - branched_state->next_sample = 0; - } - - branched_state->next_closure = sd->num_closure; - - if(wait_for_shared) { - branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); - if(branched_state->waiting_on_shared_samples) { - return true; - } - } - - return false; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + + ShaderData *sd = saved_sd; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + float3 throughput = branched_state->throughput; + ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; + + float sum_sample_weight = 0.0f; +# ifdef __DENOISING_FEATURES__ + if (ps->denoising_feature_weight > 0.0f) { + for (int i = 0; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + /* transparency is not handled here, but in outer loop */ + if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) { + continue; + } + + sum_sample_weight += sc->sample_weight; + } + } + else { + sum_sample_weight = 1.0f; + } +# endif /* __DENOISING_FEATURES__ */ + + for (int i = branched_state->next_closure; i < sd->num_closure; i++) { + const ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSDF(sc->type)) + continue; + /* transparency is not handled here, but in outer loop */ + if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) + continue; + + int num_samples; + + if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) + num_samples = kernel_data.integrator.diffuse_samples; + else if (CLOSURE_IS_BSDF_BSSRDF(sc->type)) + num_samples = 1; + else if (CLOSURE_IS_BSDF_GLOSSY(sc->type)) + num_samples = kernel_data.integrator.glossy_samples; + else + num_samples = kernel_data.integrator.transmission_samples; + + num_samples = ceil_to_int(num_samples_adjust * num_samples); + + float num_samples_inv = num_samples_adjust / num_samples; + + for (int j = branched_state->next_sample; j < num_samples; j++) { + if (reset_path_state) { + *ps = branched_state->path_state; + } + + ps->rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); + + ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; + *tp = throughput; + + ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index]; + + if (!kernel_branched_path_surface_bounce( + kg, sd, sc, j, num_samples, tp, ps, &L->state, bsdf_ray, sum_sample_weight)) { + continue; + } + + ps->rng_hash = branched_state->path_state.rng_hash; + + /* update state for next iteration */ + branched_state->next_closure = i; + branched_state->next_sample = j + 1; + + /* start the indirect path */ + *tp *= num_samples_inv; + + if (kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + + return true; + } + + branched_state->next_sample = 0; + } + + branched_state->next_closure = sd->num_closure; + + if (wait_for_shared) { + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if (branched_state->waiting_on_shared_samples) { + return true; + } + } + + return false; } -#endif /* __BRANCHED_PATH__ */ +#endif /* __BRANCHED_PATH__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_buffer_update.h b/intern/cycles/kernel/split/kernel_buffer_update.h index 18eec6372f1..e77743350dc 100644 --- a/intern/cycles/kernel/split/kernel_buffer_update.h +++ b/intern/cycles/kernel/split/kernel_buffer_update.h @@ -41,132 +41,133 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_buffer_update(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(ray_index == 0) { - /* We will empty this queue in this kernel. */ - kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } - char enqueue_flag = 0; - ray_index = get_ray_index(kg, ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (ray_index == 0) { + /* We will empty this queue in this kernel. */ + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + char enqueue_flag = 0; + ray_index = get_ray_index(kg, + ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); #ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } #endif #ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { + if (ray_index != QUEUE_EMPTY_SLOT) { #endif - ccl_global char *ray_state = kernel_split_state.ray_state; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - bool ray_was_updated = false; - - if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { - ray_was_updated = true; - uint sample = state->sample; - uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; - - /* accumulate result in output buffer */ - kernel_write_result(kg, buffer, sample, L); - - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); - } - - if(kernel_data.film.cryptomatte_passes) { - /* Make sure no thread is writing to the buffers. */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) { - uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; - ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; - kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); - } - } - - if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { - /* We have completed current work; So get next work */ - ccl_global uint *work_pools = kernel_split_params.work_pools; - uint total_work_size = kernel_split_params.total_work_size; - uint work_index; - - if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { - /* If work is invalid, this means no more work is available and the thread may exit */ - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); - } - - if(IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { - ccl_global WorkTile *tile = &kernel_split_params.tile; - uint x, y, sample; - get_work_pixel(tile, work_index, &x, &y, &sample); - - /* Store buffer offset for writing to passes. */ - uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; - kernel_split_state.buffer_offset[ray_index] = buffer_offset; - - /* Initialize random numbers and ray. */ - uint rng_hash; - kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray); - - if(ray->t != 0.0f) { - /* Initialize throughput, path radiance, Ray, PathState; - * These rays proceed with path-iteration. - */ - *throughput = make_float3(1.0f, 1.0f, 1.0f); - path_radiance_init(L, kernel_data.film.use_light_pass); - path_state_init(kg, - AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), - state, - rng_hash, - sample, - ray); + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + bool ray_was_updated = false; + + if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ray_was_updated = true; + uint sample = state->sample; + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + + /* accumulate result in output buffer */ + kernel_write_result(kg, buffer, sample, L); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + } + + if (kernel_data.film.cryptomatte_passes) { + /* Make sure no thread is writing to the buffers. */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ray_was_updated && state->sample - 1 == kernel_data.integrator.aa_samples) { + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte; + kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth); + } + } + + if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { + /* We have completed current work; So get next work */ + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; + uint work_index; + + if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { + /* If work is invalid, this means no more work is available and the thread may exit */ + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } + + if (IS_STATE(ray_state, ray_index, RAY_TO_REGENERATE)) { + ccl_global WorkTile *tile = &kernel_split_params.tile; + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); + + /* Store buffer offset for writing to passes. */ + uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride; + kernel_split_state.buffer_offset[ray_index] = buffer_offset; + + /* Initialize random numbers and ray. */ + uint rng_hash; + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, ray); + + if (ray->t != 0.0f) { + /* Initialize throughput, path radiance, Ray, PathState; + * These rays proceed with path-iteration. + */ + *throughput = make_float3(1.0f, 1.0f, 1.0f); + path_radiance_init(L, kernel_data.film.use_light_pass); + path_state_init(kg, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + state, + rng_hash, + sample, + ray); #ifdef __SUBSURFACE__ - kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - enqueue_flag = 1; - } - else { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); - } - } - } + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + enqueue_flag = 1; + } + else { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_TO_REGENERATE); + } + } + } #ifndef __COMPUTE_DEVICE_GPU__ - } + } #endif - /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; - * These rays will be made active during next SceneIntersectkernel. - */ - enqueue_ray_index_local(ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS; + * These rays will be made active during next SceneIntersectkernel. + */ + enqueue_ray_index_local(ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h index 77fb61b80a8..52930843f56 100644 --- a/intern/cycles/kernel/split/kernel_data_init.h +++ b/intern/cycles/kernel/split/kernel_data_init.h @@ -28,82 +28,88 @@ ccl_device void kernel_data_init( #else void KERNEL_FUNCTION_FULL_NAME(data_init)( #endif - KernelGlobals *kg, - ccl_constant KernelData *data, - ccl_global void *split_data_buffer, - int num_elements, - ccl_global char *ray_state, + KernelGlobals *kg, + ccl_constant KernelData *data, + ccl_global void *split_data_buffer, + int num_elements, + ccl_global char *ray_state, #ifdef __KERNEL_OPENCL__ - KERNEL_BUFFER_PARAMS, + KERNEL_BUFFER_PARAMS, #endif - int start_sample, - int end_sample, - int sx, int sy, int sw, int sh, int offset, int stride, - ccl_global int *Queue_index, /* Tracks the number of elements in queues */ - int queuesize, /* size (capacity) of the queue */ - ccl_global char *use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ - ccl_global unsigned int *work_pools, /* Work pool for each work group */ - unsigned int num_samples, - ccl_global float *buffer) + int start_sample, + int end_sample, + int sx, + int sy, + int sw, + int sh, + int offset, + int stride, + ccl_global int *Queue_index, /* Tracks the number of elements in queues */ + int queuesize, /* size (capacity) of the queue */ + ccl_global char * + use_queues_flag, /* flag to decide if scene-intersect kernel should use queues to fetch ray index */ + ccl_global unsigned int *work_pools, /* Work pool for each work group */ + unsigned int num_samples, + ccl_global float *buffer) { #ifdef KERNEL_STUB - STUB_ASSERT(KERNEL_ARCH, data_init); + STUB_ASSERT(KERNEL_ARCH, data_init); #else -#ifdef __KERNEL_OPENCL__ - kg->data = data; -#endif +# ifdef __KERNEL_OPENCL__ + kg->data = data; +# endif - kernel_split_params.tile.x = sx; - kernel_split_params.tile.y = sy; - kernel_split_params.tile.w = sw; - kernel_split_params.tile.h = sh; + kernel_split_params.tile.x = sx; + kernel_split_params.tile.y = sy; + kernel_split_params.tile.w = sw; + kernel_split_params.tile.h = sh; - kernel_split_params.tile.start_sample = start_sample; - kernel_split_params.tile.num_samples = num_samples; + kernel_split_params.tile.start_sample = start_sample; + kernel_split_params.tile.num_samples = num_samples; - kernel_split_params.tile.offset = offset; - kernel_split_params.tile.stride = stride; + kernel_split_params.tile.offset = offset; + kernel_split_params.tile.stride = stride; - kernel_split_params.tile.buffer = buffer; + kernel_split_params.tile.buffer = buffer; - kernel_split_params.total_work_size = sw * sh * num_samples; + kernel_split_params.total_work_size = sw * sh * num_samples; - kernel_split_params.work_pools = work_pools; + kernel_split_params.work_pools = work_pools; - kernel_split_params.queue_index = Queue_index; - kernel_split_params.queue_size = queuesize; - kernel_split_params.use_queues_flag = use_queues_flag; + kernel_split_params.queue_index = Queue_index; + kernel_split_params.queue_size = queuesize; + kernel_split_params.use_queues_flag = use_queues_flag; - split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); + split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); -#ifdef __KERNEL_OPENCL__ - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); -#endif +# ifdef __KERNEL_OPENCL__ + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); +# endif + + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + /* Initialize queue data and queue index. */ + if (thread_index < queuesize) { + for (int i = 0; i < NUM_QUEUES; i++) { + kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT; + } + } + + if (thread_index == 0) { + for (int i = 0; i < NUM_QUEUES; i++) { + Queue_index[i] = 0; + } - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - - /* Initialize queue data and queue index. */ - if(thread_index < queuesize) { - for(int i = 0; i < NUM_QUEUES; i++) { - kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT; - } - } - - if(thread_index == 0) { - for(int i = 0; i < NUM_QUEUES; i++) { - Queue_index[i] = 0; - } - - /* The scene-intersect kernel should not use the queues very first time. - * since the queue would be empty. - */ - *use_queues_flag = 0; - } -#endif /* KERENL_STUB */ + /* The scene-intersect kernel should not use the queues very first time. + * since the queue would be empty. + */ + *use_queues_flag = 0; + } +#endif /* KERENL_STUB */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index ca79602c565..b2ca59d60cc 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -43,116 +43,111 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_direct_lighting(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - char enqueue_flag = 0; - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); - - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - - /* direct lighting */ + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + char enqueue_flag = 0; + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + + /* direct lighting */ #ifdef __EMISSION__ - bool flag = (kernel_data.integrator.use_direct_light && - (sd->flag & SD_BSDF_HAS_EVAL)); + bool flag = (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)); # ifdef __BRANCHED_PATH__ - if(flag && kernel_data.integrator.branched) { - flag = false; - enqueue_flag = 1; - } -# endif /* __BRANCHED_PATH__ */ + if (flag && kernel_data.integrator.branched) { + flag = false; + enqueue_flag = 1; + } +# endif /* __BRANCHED_PATH__ */ # ifdef __SHADOW_TRICKS__ - if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) { - flag = false; - enqueue_flag = 1; - } -# endif /* __SHADOW_TRICKS__ */ - - if(flag) { - /* Sample illumination from lights to find path contribution. */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_state_rng_light_termination(kg, state); - - LightSample ls; - if(light_sample(kg, - light_u, light_v, - sd->time, - sd->P, - state->bounce, - &ls)) { - - Ray light_ray; - light_ray.time = sd->time; - - BsdfEval L_light; - bool is_lamp; - if(direct_emission(kg, - sd, - AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), - &ls, - state, - &light_ray, - &L_light, - &is_lamp, - terminate)) - { - /* Write intermediate data to global memory to access from - * the next kernel. - */ - kernel_split_state.light_ray[ray_index] = light_ray; - kernel_split_state.bsdf_eval[ray_index] = L_light; - kernel_split_state.is_lamp[ray_index] = is_lamp; - /* Mark ray state for next shadow kernel. */ - enqueue_flag = 1; - } - } - } -#endif /* __EMISSION__ */ - } + if (flag && state->flag & PATH_RAY_SHADOW_CATCHER) { + flag = false; + enqueue_flag = 1; + } +# endif /* __SHADOW_TRICKS__ */ + + if (flag) { + /* Sample illumination from lights to find path contribution. */ + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); + float terminate = path_state_rng_light_termination(kg, state); + + LightSample ls; + if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + + Ray light_ray; + light_ray.time = sd->time; + + BsdfEval L_light; + bool is_lamp; + if (direct_emission(kg, + sd, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + &ls, + state, + &light_ray, + &L_light, + &is_lamp, + terminate)) { + /* Write intermediate data to global memory to access from + * the next kernel. + */ + kernel_split_state.light_ray[ray_index] = light_ray; + kernel_split_state.bsdf_eval[ray_index] = L_light; + kernel_split_state.is_lamp[ray_index] = is_lamp; + /* Mark ray state for next shadow kernel. */ + enqueue_flag = 1; + } + } + } +#endif /* __EMISSION__ */ + } #ifdef __EMISSION__ - /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_DL_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + /* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); #endif #ifdef __BRANCHED_PATH__ - /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays - * this is the last kernel before next_iteration_setup that uses local atomics so we do this here - */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - enqueue_ray_index_local(ray_index, - QUEUE_LIGHT_INDIRECT_ITER, - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER), - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); - -#endif /* __BRANCHED_PATH__ */ + /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays + * this is the last kernel before next_iteration_setup that uses local atomics so we do this here + */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + enqueue_ray_index_local( + ray_index, + QUEUE_LIGHT_INDIRECT_ITER, + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER), + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + +#endif /* __BRANCHED_PATH__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index fb5bd3d48dd..45b839db05f 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -18,203 +18,210 @@ CCL_NAMESPACE_BEGIN #if defined(__BRANCHED_PATH__) && defined(__VOLUME__) -ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, + int ray_index) { - kernel_split_branched_path_indirect_loop_init(kg, ray_index); + kernel_split_branched_path_indirect_loop_init(kg, ray_index); - ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT); + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT); } -ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index) +ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, + int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + ShaderData *sd = kernel_split_sd(sd, ray_index); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - /* GPU: no decoupled ray marching, scatter probalistically */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; + /* GPU: no decoupled ray marching, scatter probalistically */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f / num_samples; - Ray volume_ray = branched_state->ray; - volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX; + Ray volume_ray = branched_state->ray; + volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? + branched_state->isect.t : + FLT_MAX; - bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack); + bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack); - for(int j = branched_state->next_sample; j < num_samples; j++) { - ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; - *ps = branched_state->path_state; + for (int j = branched_state->next_sample; j < num_samples; j++) { + ccl_global PathState *ps = &kernel_split_state.path_state[ray_index]; + *ps = branched_state->path_state; - ccl_global Ray *pray = &kernel_split_state.ray[ray_index]; - *pray = branched_state->ray; + ccl_global Ray *pray = &kernel_split_state.ray[ray_index]; + *pray = branched_state->ray; - ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; - *tp = branched_state->throughput * num_samples_inv; + ccl_global float3 *tp = &kernel_split_state.throughput[ray_index]; + *tp = branched_state->throughput * num_samples_inv; - /* branch RNG state */ - path_state_branch(ps, j, num_samples); + /* branch RNG state */ + path_state_branch(ps, j, num_samples); - /* integrate along volume segment with distance sampling */ - VolumeIntegrateResult result = kernel_volume_integrate( - kg, ps, sd, &volume_ray, L, tp, heterogeneous); + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, ps, sd, &volume_ray, L, tp, heterogeneous); # ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* direct lighting */ - kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L); - - /* indirect light bounce */ - if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) { - continue; - } - - /* start the indirect path */ - branched_state->next_closure = 0; - branched_state->next_sample = j+1; - - /* Attempting to share too many samples is slow for volumes as it causes us to - * loop here more and have many calls to kernel_volume_integrate which evaluates - * shaders. The many expensive shader evaluations cause the work load to become - * unbalanced and many threads to become idle in this kernel. Limiting the - * number of shared samples here helps quite a lot. - */ - if(branched_state->shared_sample_count < 2) { - if(kernel_split_branched_indirect_start_shared(kg, ray_index)) { - continue; - } - } - - return true; - } + if (result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L); + + /* indirect light bounce */ + if (!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) { + continue; + } + + /* start the indirect path */ + branched_state->next_closure = 0; + branched_state->next_sample = j + 1; + + /* Attempting to share too many samples is slow for volumes as it causes us to + * loop here more and have many calls to kernel_volume_integrate which evaluates + * shaders. The many expensive shader evaluations cause the work load to become + * unbalanced and many threads to become idle in this kernel. Limiting the + * number of shared samples here helps quite a lot. + */ + if (branched_state->shared_sample_count < 2) { + if (kernel_split_branched_indirect_start_shared(kg, ray_index)) { + continue; + } + } + + return true; + } # endif - } + } - branched_state->next_sample = num_samples; + branched_state->next_sample = num_samples; - branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); - if(branched_state->waiting_on_shared_samples) { - return true; - } + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if (branched_state->waiting_on_shared_samples) { + return true; + } - kernel_split_branched_path_indirect_loop_end(kg, ray_index); + kernel_split_branched_path_indirect_loop_end(kg, ray_index); - /* todo: avoid this calculation using decoupled ray marching */ - float3 throughput = kernel_split_state.throughput[ray_index]; - kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput); - kernel_split_state.throughput[ray_index] = throughput; + /* todo: avoid this calculation using decoupled ray marching */ + float3 throughput = kernel_split_state.throughput[ray_index]; + kernel_volume_shadow( + kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput); + kernel_split_state.throughput[ray_index] = throughput; - return false; + return false; } -#endif /* __BRANCHED_PATH__ && __VOLUME__ */ +#endif /* __BRANCHED_PATH__ && __VOLUME__ */ ccl_device void kernel_do_volume(KernelGlobals *kg) { #ifdef __VOLUME__ - /* We will empty this queue in this kernel. */ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + /* We will empty this queue in this kernel. */ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; # ifdef __BRANCHED_PATH__ - kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0; -# endif /* __BRANCHED_PATH__ */ - } - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - - if(*kernel_split_params.use_queues_flag) { - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - } - - ccl_global char *ray_state = kernel_split_state.ray_state; - - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) || - IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - - bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); - - /* Sanitize volume stack. */ - if(!hit) { - kernel_volume_clean_stack(kg, state->volume_stack); - } - /* volume attenuation, emission, scatter */ - if(state->volume_stack[0].shader != SHADER_NONE) { - Ray volume_ray = *ray; - volume_ray.t = (hit)? isect->t: FLT_MAX; + kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0; +# endif /* __BRANCHED_PATH__ */ + } + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + + if (*kernel_split_params.use_queues_flag) { + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + } + + ccl_global char *ray_state = kernel_split_state.ray_state; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + + bool hit = !IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); + + /* Sanitize volume stack. */ + if (!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + /* volume attenuation, emission, scatter */ + if (state->volume_stack[0].shader != SHADER_NONE) { + Ray volume_ray = *ray; + volume_ray.t = (hit) ? isect->t : FLT_MAX; # ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { -# endif /* __BRANCHED_PATH__ */ - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + if (!kernel_data.integrator.branched || + IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { +# endif /* __BRANCHED_PATH__ */ + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); - { - /* integrate along volume segment with distance sampling */ - VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, sd, &volume_ray, L, throughput, heterogeneous); + { + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, state, sd, &volume_ray, L, throughput, heterogeneous); # ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* direct lighting */ - kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); - - /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - else { - kernel_split_path_end(kg, ray_index); - } - } -# endif /* __VOLUME_SCATTER__ */ - } + if (result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); + + /* indirect light bounce */ + if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + kernel_split_path_end(kg, ray_index); + } + } +# endif /* __VOLUME_SCATTER__ */ + } # ifdef __BRANCHED_PATH__ - } - else { - kernel_split_branched_path_volume_indirect_light_init(kg, ray_index); - - if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -# endif /* __BRANCHED_PATH__ */ - } - } + } + else { + kernel_split_branched_path_volume_indirect_light_init(kg, ray_index); + + if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + } + } # ifdef __BRANCHED_PATH__ - /* iter loop */ - ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), - QUEUE_VOLUME_INDIRECT_ITER, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - - if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) { - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); - path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); - - if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -# endif /* __BRANCHED_PATH__ */ - -#endif /* __VOLUME__ */ + /* iter loop */ + ray_index = get_ray_index(kg, + ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), + QUEUE_VOLUME_INDIRECT_ITER, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if (IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) { + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); + path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); + + if (kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + +#endif /* __VOLUME__ */ } - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_enqueue_inactive.h b/intern/cycles/kernel/split/kernel_enqueue_inactive.h index 496355bbc3a..31d2daef616 100644 --- a/intern/cycles/kernel/split/kernel_enqueue_inactive.h +++ b/intern/cycles/kernel/split/kernel_enqueue_inactive.h @@ -20,27 +20,27 @@ ccl_device void kernel_enqueue_inactive(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { #ifdef __BRANCHED_PATH__ - /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */ - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Enqeueue RAY_INACTIVE rays into QUEUE_INACTIVE_RAYS queue. */ + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - char enqueue_flag = 0; - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) { - enqueue_flag = 1; - } + char enqueue_flag = 0; + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_INACTIVE)) { + enqueue_flag = 1; + } - enqueue_ray_index_local(ray_index, - QUEUE_INACTIVE_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); -#endif /* __BRANCHED_PATH__ */ + enqueue_ray_index_local(ray_index, + QUEUE_INACTIVE_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +#endif /* __BRANCHED_PATH__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index f14eecec2f2..63bc5a8e0ce 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -54,120 +54,112 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( - KernelGlobals *kg, - ccl_local_param BackgroundAOLocals *locals) + KernelGlobals *kg, ccl_local_param BackgroundAOLocals *locals) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - locals->queue_atomics_bg = 0; - locals->queue_atomics_ao = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + locals->queue_atomics_bg = 0; + locals->queue_atomics_ao = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); #ifdef __AO__ - char enqueue_flag = 0; + char enqueue_flag = 0; #endif - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); #ifdef __COMPUTE_DEVICE_GPU__ - /* If we are executing on a GPU device, we exit all threads that are not - * required. - * - * If we are executing on a CPU device, then we need to keep all threads - * active since we have barrier() calls later in the kernel. CPU devices, - * expect all threads to execute barrier statement. - */ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } -#endif /* __COMPUTE_DEVICE_GPU__ */ + /* If we are executing on a GPU device, we exit all threads that are not + * required. + * + * If we are executing on a CPU device, then we need to keep all threads + * active since we have barrier() calls later in the kernel. CPU devices, + * expect all threads to execute barrier statement. + */ + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } +#endif /* __COMPUTE_DEVICE_GPU__ */ #ifndef __COMPUTE_DEVICE_GPU__ - if(ray_index != QUEUE_EMPTY_SLOT) { + if (ray_index != QUEUE_EMPTY_SLOT) { #endif - ccl_global PathState *state = 0x0; - float3 throughput; - - ccl_global char *ray_state = kernel_split_state.ray_state; - ShaderData *sd = kernel_split_sd(sd, ray_index); - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; - ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; - - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - - throughput = kernel_split_state.throughput[ray_index]; - state = &kernel_split_state.path_state[ray_index]; - - if(!kernel_path_shader_apply(kg, - sd, - state, - ray, - throughput, - emission_sd, - L, - buffer)) - { - kernel_split_path_end(kg, ray_index); - } - } - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - /* Path termination. this is a strange place to put the termination, it's - * mainly due to the mixed in MIS that we use. gives too many unneeded - * shader evaluations, only need emission if we are going to terminate. - */ - float probability = path_state_continuation_probability(kg, state, throughput); - - if(probability == 0.0f) { - kernel_split_path_end(kg, ray_index); - } - else if(probability < 1.0f) { - float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); - if(terminate >= probability) { - kernel_split_path_end(kg, ray_index); - } - else { - kernel_split_state.throughput[ray_index] = throughput/probability; - } - } - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - kernel_update_denoising_features(kg, sd, state, L); - } - } + ccl_global PathState *state = 0x0; + float3 throughput; + + ccl_global char *ray_state = kernel_split_state.ray_state; + ShaderData *sd = kernel_split_sd(sd, ray_index); + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; + ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset; + + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + + throughput = kernel_split_state.throughput[ray_index]; + state = &kernel_split_state.path_state[ray_index]; + + if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, buffer)) { + kernel_split_path_end(kg, ray_index); + } + } + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + /* Path termination. this is a strange place to put the termination, it's + * mainly due to the mixed in MIS that we use. gives too many unneeded + * shader evaluations, only need emission if we are going to terminate. + */ + float probability = path_state_continuation_probability(kg, state, throughput); + + if (probability == 0.0f) { + kernel_split_path_end(kg, ray_index); + } + else if (probability < 1.0f) { + float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE); + if (terminate >= probability) { + kernel_split_path_end(kg, ray_index); + } + else { + kernel_split_state.throughput[ray_index] = throughput / probability; + } + } + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + kernel_update_denoising_features(kg, sd, state, L); + } + } #ifdef __AO__ - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - /* ambient occlusion */ - if(kernel_data.integrator.use_ambient_occlusion) { - enqueue_flag = 1; - } - } -#endif /* __AO__ */ + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + /* ambient occlusion */ + if (kernel_data.integrator.use_ambient_occlusion) { + enqueue_flag = 1; + } + } +#endif /* __AO__ */ #ifndef __COMPUTE_DEVICE_GPU__ - } + } #endif #ifdef __AO__ - /* Enqueue to-shadow-ray-cast rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_SHADOW_RAY_CAST_AO_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - &locals->queue_atomics_ao, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + /* Enqueue to-shadow-ray-cast rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + &locals->queue_atomics_ao, + kernel_split_state.queue_data, + kernel_split_params.queue_index); #endif } diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h index 4cf88a02590..b1c65f61e2c 100644 --- a/intern/cycles/kernel/split/kernel_indirect_background.h +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -18,48 +18,50 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_indirect_background(KernelGlobals *kg) { - ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global char *ray_state = kernel_split_state.ray_state; - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - int ray_index; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + int ray_index; - if(kernel_data.integrator.ao_bounces != INT_MAX) { - ray_index = get_ray_index(kg, thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + if (kernel_data.integrator.ao_bounces != INT_MAX) { + ray_index = get_ray_index(kg, + thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index != QUEUE_EMPTY_SLOT) { - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - if(path_state_ao_bounce(kg, state)) { - kernel_split_path_end(kg, ray_index); - } - } - } - } + if (ray_index != QUEUE_EMPTY_SLOT) { + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + if (path_state_ao_bounce(kg, state)) { + kernel_split_path_end(kg, ray_index); + } + } + } + } - ray_index = get_ray_index(kg, thread_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + ray_index = get_ray_index(kg, + thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); + if (IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + float3 throughput = kernel_split_state.throughput[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); - kernel_path_background(kg, state, ray, throughput, sd, L); - kernel_split_path_end(kg, ray_index); - } + kernel_path_background(kg, state, ray, throughput, sd, L); + kernel_split_path_end(kg, ray_index); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h index 236c94e983c..3f48f8d6f56 100644 --- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h +++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h @@ -18,53 +18,50 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_indirect_subsurface(KernelGlobals *kg) { - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index == 0) { - /* We will empty both queues in this kernel. */ - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index == 0) { + /* We will empty both queues in this kernel. */ + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } - int ray_index; - get_ray_index(kg, thread_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - ray_index = get_ray_index(kg, thread_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); + int ray_index; + get_ray_index(kg, + thread_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + ray_index = get_ray_index(kg, + thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); #ifdef __SUBSURFACE__ - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - ccl_global char *ray_state = kernel_split_state.ray_state; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { - ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + if (IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) { + ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - /* Trace indirect subsurface rays by restarting the loop. this uses less - * stack memory than invoking kernel_path_indirect. - */ - if(ss_indirect->num_rays) { - kernel_path_subsurface_setup_indirect(kg, - ss_indirect, - state, - ray, - L, - throughput); - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -#endif /* __SUBSURFACE__ */ + /* Trace indirect subsurface rays by restarting the loop. this uses less + * stack memory than invoking kernel_path_indirect. + */ + if (ss_indirect->num_rays) { + kernel_path_subsurface_setup_indirect(kg, ss_indirect, state, ray, L, throughput); + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +#endif /* __SUBSURFACE__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index 5b2c554b922..7ecb099208d 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -23,45 +23,45 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_lamp_emission(KernelGlobals *kg) { #ifndef __VOLUME__ - /* We will empty this queue in this kernel. */ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - } + /* We will empty this queue in this kernel. */ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + } #endif - /* Fetch use_queues_flag. */ - char local_use_queues_flag = *kernel_split_params.use_queues_flag; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Fetch use_queues_flag. */ + char local_use_queues_flag = *kernel_split_params.use_queues_flag; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(local_use_queues_flag) { - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (local_use_queues_flag) { + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, #ifndef __VOLUME__ - 1 + 1 #else - 0 + 0 #endif - ); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } + ); + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) - { - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) { + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; - Ray ray = kernel_split_state.ray[ray_index]; - ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); + float3 throughput = kernel_split_state.throughput[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; + ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); - kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L); - } + kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index e388955f1af..781ce869374 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -48,217 +48,211 @@ CCL_NAMESPACE_BEGIN #ifdef __BRANCHED_PATH__ ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index) { - kernel_split_branched_path_indirect_loop_init(kg, ray_index); + kernel_split_branched_path_indirect_loop_init(kg, ray_index); - ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT); + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT); } ccl_device void kernel_split_branched_transparent_bounce(KernelGlobals *kg, int ray_index) { - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; # ifdef __VOLUME__ - if(!(sd->flag & SD_HAS_ONLY_VOLUME)) { + if (!(sd->flag & SD_HAS_ONLY_VOLUME)) { # endif - /* continue in case of transparency */ - *throughput *= shader_bsdf_transparency(kg, sd); + /* continue in case of transparency */ + *throughput *= shader_bsdf_transparency(kg, sd); - if(is_zero(*throughput)) { - kernel_split_path_end(kg, ray_index); - return; - } + if (is_zero(*throughput)) { + kernel_split_path_end(kg, ray_index); + return; + } - /* Update Path State */ - path_state_next(kg, state, LABEL_TRANSPARENT); + /* Update Path State */ + path_state_next(kg, state, LABEL_TRANSPARENT); # ifdef __VOLUME__ - } - else { - if(!path_state_volume_next(kg, state)) { - kernel_split_path_end(kg, ray_index); - return; - } - } + } + else { + if (!path_state_volume_next(kg, state)) { + kernel_split_path_end(kg, ray_index); + return; + } + } # endif - ray->P = ray_offset(sd->P, -sd->Ng); - ray->t -= sd->ray_length; /* clipping works through transparent */ + ray->P = ray_offset(sd->P, -sd->Ng); + ray->t -= sd->ray_length; /* clipping works through transparent */ # ifdef __RAY_DIFFERENTIALS__ - ray->dP = sd->dP; - ray->dD.dx = -sd->dI.dx; - ray->dD.dy = -sd->dI.dy; -# endif /* __RAY_DIFFERENTIALS__ */ + ray->dP = sd->dP; + ray->dD.dx = -sd->dI.dx; + ray->dD.dy = -sd->dI.dy; +# endif /* __RAY_DIFFERENTIALS__ */ # ifdef __VOLUME__ - /* enter/exit volume */ - kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); -# endif /* __VOLUME__ */ + /* enter/exit volume */ + kernel_volume_stack_enter_exit(kg, sd, state->volume_stack); +# endif /* __VOLUME__ */ } -#endif /* __BRANCHED_PATH__ */ +#endif /* __BRANCHED_PATH__ */ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - /* If we are here, then it means that scene-intersect kernel - * has already been executed atleast once. From the next time, - * scene-intersect kernel may operate on queues to fetch ray index - */ - *kernel_split_params.use_queues_flag = 1; - - /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and - * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the - * previous kernel. - */ - kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; - kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; - } - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); - - ccl_global char *ray_state = kernel_split_state.ray_state; - -# ifdef __VOLUME__ - /* Reactivate only volume rays here, most surface work was skipped. */ - if(IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE); - } -# endif + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + /* If we are here, then it means that scene-intersect kernel + * has already been executed atleast once. From the next time, + * scene-intersect kernel may operate on queues to fetch ray index + */ + *kernel_split_params.use_queues_flag = 1; + + /* Mark queue indices of QUEUE_SHADOW_RAY_CAST_AO_RAYS and + * QUEUE_SHADOW_RAY_CAST_DL_RAYS queues that were made empty during the + * previous kernel. + */ + kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0; + } + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); + + ccl_global char *ray_state = kernel_split_state.ray_state; + +#ifdef __VOLUME__ + /* Reactivate only volume rays here, most surface work was skipped. */ + if (IS_STATE(ray_state, ray_index, RAY_HAS_ONLY_VOLUME)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_ACTIVE); + } +#endif - bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE); - if(active) { - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE); + if (active) { + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; #ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { + if (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { #endif - /* Compute direct lighting and next bounce. */ - if(!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) { - kernel_split_path_end(kg, ray_index); - } + /* Compute direct lighting and next bounce. */ + if (!kernel_path_surface_bounce(kg, sd, throughput, state, &L->state, ray)) { + kernel_split_path_end(kg, ray_index); + } #ifdef __BRANCHED_PATH__ - } - else if(sd->flag & SD_HAS_ONLY_VOLUME) { - kernel_split_branched_transparent_bounce(kg, ray_index); - } - else { - kernel_split_branched_indirect_light_init(kg, ray_index); - - if(kernel_split_branched_path_surface_indirect_light_iter(kg, - ray_index, - 1.0f, - kernel_split_sd(branched_state_sd, ray_index), - true, - true)) - { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - else { - kernel_split_branched_path_indirect_loop_end(kg, ray_index); - kernel_split_branched_transparent_bounce(kg, ray_index); - } - } -#endif /* __BRANCHED_PATH__ */ - } - - /* Enqueue RAY_UPDATE_BUFFER rays. */ - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + } + else if (sd->flag & SD_HAS_ONLY_VOLUME) { + kernel_split_branched_transparent_bounce(kg, ray_index); + } + else { + kernel_split_branched_indirect_light_init(kg, ray_index); + + if (kernel_split_branched_path_surface_indirect_light_iter( + kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + kernel_split_branched_path_indirect_loop_end(kg, ray_index); + kernel_split_branched_transparent_bounce(kg, ray_index); + } + } +#endif /* __BRANCHED_PATH__ */ + } + + /* Enqueue RAY_UPDATE_BUFFER rays. */ + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); #ifdef __BRANCHED_PATH__ - /* iter loop */ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0; - } - - ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), - QUEUE_LIGHT_INDIRECT_ITER, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - - if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) { - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - - if(kernel_split_branched_path_surface_indirect_light_iter(kg, - ray_index, - 1.0f, - kernel_split_sd(branched_state_sd, ray_index), - true, - true)) - { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - else { - kernel_split_branched_path_indirect_loop_end(kg, ray_index); - kernel_split_branched_transparent_bounce(kg, ray_index); - } - } + /* iter loop */ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0; + } + + ray_index = get_ray_index(kg, + ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), + QUEUE_LIGHT_INDIRECT_ITER, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if (IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) { + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + + if (kernel_split_branched_path_surface_indirect_light_iter( + kg, ray_index, 1.0f, kernel_split_sd(branched_state_sd, ray_index), true, true)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + else { + kernel_split_branched_path_indirect_loop_end(kg, ray_index); + kernel_split_branched_transparent_bounce(kg, ray_index); + } + } # ifdef __VOLUME__ - /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - enqueue_ray_index_local(ray_index, - QUEUE_VOLUME_INDIRECT_ITER, - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER), - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); - -# endif /* __VOLUME__ */ + /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + enqueue_ray_index_local( + ray_index, + QUEUE_VOLUME_INDIRECT_ITER, + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER), + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); + +# endif /* __VOLUME__ */ # ifdef __SUBSURFACE__ - /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */ - ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); - - ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - enqueue_ray_index_local(ray_index, - QUEUE_SUBSURFACE_INDIRECT_ITER, - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER), - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); -# endif /* __SUBSURFACE__ */ -#endif /* __BRANCHED_PATH__ */ + /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */ + ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); + + ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + enqueue_ray_index_local( + ray_index, + QUEUE_SUBSURFACE_INDIRECT_ITER, + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER), + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); +# endif /* __SUBSURFACE__ */ +#endif /* __BRANCHED_PATH__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h index fdd54225b07..3faa3208341 100644 --- a/intern/cycles/kernel/split/kernel_path_init.h +++ b/intern/cycles/kernel/split/kernel_path_init.h @@ -21,61 +21,59 @@ CCL_NAMESPACE_BEGIN * * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE */ -ccl_device void kernel_path_init(KernelGlobals *kg) { - int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); +ccl_device void kernel_path_init(KernelGlobals *kg) +{ + int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0); - /* This is the first assignment to ray_state; - * So we dont use ASSIGN_RAY_STATE macro. - */ - kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; + /* This is the first assignment to ray_state; + * So we dont use ASSIGN_RAY_STATE macro. + */ + kernel_split_state.ray_state[ray_index] = RAY_ACTIVE; - /* Get work. */ - ccl_global uint *work_pools = kernel_split_params.work_pools; - uint total_work_size = kernel_split_params.total_work_size; - uint work_index; + /* Get work. */ + ccl_global uint *work_pools = kernel_split_params.work_pools; + uint total_work_size = kernel_split_params.total_work_size; + uint work_index; - if(!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { - /* No more work, mark ray as inactive */ - kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; + if (!get_next_work(kg, work_pools, total_work_size, ray_index, &work_index)) { + /* No more work, mark ray as inactive */ + kernel_split_state.ray_state[ray_index] = RAY_INACTIVE; - return; - } + return; + } - ccl_global WorkTile *tile = &kernel_split_params.tile; - uint x, y, sample; - get_work_pixel(tile, work_index, &x, &y, &sample); + ccl_global WorkTile *tile = &kernel_split_params.tile; + uint x, y, sample; + get_work_pixel(tile, work_index, &x, &y, &sample); - /* Store buffer offset for writing to passes. */ - uint buffer_offset = (tile->offset + x + y*tile->stride) * kernel_data.film.pass_stride; - kernel_split_state.buffer_offset[ray_index] = buffer_offset; + /* Store buffer offset for writing to passes. */ + uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride; + kernel_split_state.buffer_offset[ray_index] = buffer_offset; - /* Initialize random numbers and ray. */ - uint rng_hash; - kernel_path_trace_setup(kg, - sample, - x, y, - &rng_hash, - &kernel_split_state.ray[ray_index]); + /* Initialize random numbers and ray. */ + uint rng_hash; + kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &kernel_split_state.ray[ray_index]); - if(kernel_split_state.ray[ray_index].t != 0.0f) { - /* Initialize throughput, path radiance, Ray, PathState; - * These rays proceed with path-iteration. - */ - kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); - path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass); - path_state_init(kg, - AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), - &kernel_split_state.path_state[ray_index], - rng_hash, - sample, - &kernel_split_state.ray[ray_index]); + if (kernel_split_state.ray[ray_index].t != 0.0f) { + /* Initialize throughput, path radiance, Ray, PathState; + * These rays proceed with path-iteration. + */ + kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f); + path_radiance_init(&kernel_split_state.path_radiance[ray_index], + kernel_data.film.use_light_pass); + path_state_init(kg, + AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]), + &kernel_split_state.path_state[ray_index], + rng_hash, + sample, + &kernel_split_state.ray[ray_index]); #ifdef __SUBSURFACE__ - kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); + kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]); #endif - } - else { - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); - } + } + else { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h index df67fabab19..2db87f7a671 100644 --- a/intern/cycles/kernel/split/kernel_queue_enqueue.h +++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h @@ -35,58 +35,53 @@ CCL_NAMESPACE_BEGIN * - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with * RAY_TO_REGENERATE, RAY_UPDATE_BUFFER, RAY_HIT_BACKGROUND rays. */ -ccl_device void kernel_queue_enqueue(KernelGlobals *kg, - ccl_local_param QueueEnqueueLocals *locals) +ccl_device void kernel_queue_enqueue(KernelGlobals *kg, ccl_local_param QueueEnqueueLocals *locals) { - /* We have only 2 cases (Hit/Not-Hit) */ - int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + /* We have only 2 cases (Hit/Not-Hit) */ + int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(lidx == 0) { - locals->queue_atomics[0] = 0; - locals->queue_atomics[1] = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (lidx == 0) { + locals->queue_atomics[0] = 0; + locals->queue_atomics[1] = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int queue_number = -1; + int queue_number = -1; - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) { - queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; - } - else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) || - IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { - queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; - } + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) { + queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS; + } + else if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME) || + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { + queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS; + } - unsigned int my_lqidx; - if(queue_number != -1) { - my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + unsigned int my_lqidx; + if (queue_number != -1) { + my_lqidx = get_local_queue_index(queue_number, locals->queue_atomics); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - if(lidx == 0) { - locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = - get_global_per_queue_offset(QUEUE_ACTIVE_AND_REGENERATED_RAYS, - locals->queue_atomics, - kernel_split_params.queue_index); - locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = - get_global_per_queue_offset(QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - locals->queue_atomics, - kernel_split_params.queue_index); - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + if (lidx == 0) { + locals->queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = get_global_per_queue_offset( + QUEUE_ACTIVE_AND_REGENERATED_RAYS, locals->queue_atomics, kernel_split_params.queue_index); + locals->queue_atomics[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = get_global_per_queue_offset( + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + locals->queue_atomics, + kernel_split_params.queue_index); + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - unsigned int my_gqidx; - if(queue_number != -1) { - my_gqidx = get_global_queue_index(queue_number, - kernel_split_params.queue_size, - my_lqidx, - locals->queue_atomics); - kernel_split_state.queue_data[my_gqidx] = ray_index; - } + unsigned int my_gqidx; + if (queue_number != -1) { + my_gqidx = get_global_queue_index( + queue_number, kernel_split_params.queue_size, my_lqidx, locals->queue_atomics); + kernel_split_state.queue_data[my_gqidx] = ray_index; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h index f5378bc172b..5fef3e045f8 100644 --- a/intern/cycles/kernel/split/kernel_scene_intersect.h +++ b/intern/cycles/kernel/split/kernel_scene_intersect.h @@ -25,55 +25,56 @@ CCL_NAMESPACE_BEGIN */ ccl_device void kernel_scene_intersect(KernelGlobals *kg) { - /* Fetch use_queues_flag */ - char local_use_queues_flag = *kernel_split_params.use_queues_flag; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Fetch use_queues_flag */ + char local_use_queues_flag = *kernel_split_params.use_queues_flag; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(local_use_queues_flag) { - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (local_use_queues_flag) { + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } + } - /* All regenerated rays become active here */ - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { + /* All regenerated rays become active here */ + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) { #ifdef __BRANCHED_PATH__ - if(kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) { - kernel_split_path_end(kg, ray_index); - } - else -#endif /* __BRANCHED_PATH__ */ - { - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); - } - } + if (kernel_split_state.branched_state[ray_index].waiting_on_shared_samples) { + kernel_split_path_end(kg, ray_index); + } + else +#endif /* __BRANCHED_PATH__ */ + { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + } + } - if(!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - return; - } + if (!IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + return; + } - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - Ray ray = kernel_split_state.ray[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - Intersection isect; - bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L); - kernel_split_state.isect[ray_index] = isect; + Intersection isect; + bool hit = kernel_path_scene_intersect(kg, state, &ray, &isect, L); + kernel_split_state.isect[ray_index] = isect; - if(!hit) { - /* Change the state of rays that hit the background; - * These rays undergo special processing in the - * background_bufferUpdate kernel. - */ - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND); - } + if (!hit) { + /* Change the state of rays that hit the background; + * These rays undergo special processing in the + * background_bufferUpdate kernel. + */ + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 2bc2d300699..8e39c9797e5 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -22,45 +22,46 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_shader_eval(KernelGlobals *kg) { - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - /* Sorting on cuda split is not implemented */ + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + /* Sorting on cuda split is not implemented */ #ifdef __KERNEL_CUDA__ - int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; + int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; #else - int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS]; + int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS]; #endif - if(ray_index >= queue_index) { - return; - } - ray_index = get_ray_index(kg, ray_index, + if (ray_index >= queue_index) { + return; + } + ray_index = get_ray_index(kg, + ray_index, #ifdef __KERNEL_CUDA__ - QUEUE_ACTIVE_AND_REGENERATED_RAYS, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, #else - QUEUE_SHADER_SORTED_RAYS, + QUEUE_SHADER_SORTED_RAYS, #endif - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - ccl_global char *ray_state = kernel_split_state.ray_state; - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + ccl_global char *ray_state = kernel_split_state.ray_state; + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag); + shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag); #ifdef __BRANCHED_PATH__ - if(kernel_data.integrator.branched) { - shader_merge_closures(kernel_split_sd(sd, ray_index)); - } - else + if (kernel_data.integrator.branched) { + shader_merge_closures(kernel_split_sd(sd, ray_index)); + } + else #endif - { - shader_prepare_closures(kernel_split_sd(sd, ray_index), state); - } - } + { + shader_prepare_closures(kernel_split_sd(sd, ray_index), state); + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h index ea3ec2ec83f..da332db2c98 100644 --- a/intern/cycles/kernel/split/kernel_shader_setup.h +++ b/intern/cycles/kernel/split/kernel_shader_setup.h @@ -25,54 +25,52 @@ CCL_NAMESPACE_BEGIN ccl_device void kernel_shader_setup(KernelGlobals *kg, ccl_local_param unsigned int *local_queue_atomics) { - /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ - if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { - *local_queue_atomics = 0; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */ + if (ccl_local_id(0) == 0 && ccl_local_id(1) == 0) { + *local_queue_atomics = 0; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; - if(ray_index >= queue_index) { - return; - } - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 0); + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; + if (ray_index >= queue_index) { + return; + } + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 0); - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0; - enqueue_ray_index_local(ray_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - enqueue_flag, - kernel_split_params.queue_size, - local_queue_atomics, - kernel_split_state.queue_data, - kernel_split_params.queue_index); + char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : + 0; + enqueue_ray_index_local(ray_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + enqueue_flag, + kernel_split_params.queue_size, + local_queue_atomics, + kernel_split_state.queue_data, + kernel_split_params.queue_index); - /* Continue on with shader evaluation. */ - if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { - Intersection isect = kernel_split_state.isect[ray_index]; - Ray ray = kernel_split_state.ray[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); + /* Continue on with shader evaluation. */ + if (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { + Intersection isect = kernel_split_state.isect[ray_index]; + Ray ray = kernel_split_state.ray[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); - shader_setup_from_ray(kg, - sd, - &isect, - &ray); + shader_setup_from_ray(kg, sd, &isect, &ray); #ifdef __VOLUME__ - if(sd->flag & SD_HAS_ONLY_VOLUME) { - ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME); - } + if (sd->flag & SD_HAS_ONLY_VOLUME) { + ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_HAS_ONLY_VOLUME); + } #endif - } - + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h index 666355de334..95d33a42014 100644 --- a/intern/cycles/kernel/split/kernel_shader_sort.h +++ b/intern/cycles/kernel/split/kernel_shader_sort.h @@ -16,82 +16,82 @@ CCL_NAMESPACE_BEGIN - -ccl_device void kernel_shader_sort(KernelGlobals *kg, - ccl_local_param ShaderSortLocals *locals) +ccl_device void kernel_shader_sort(KernelGlobals *kg, ccl_local_param ShaderSortLocals *locals) { #ifndef __KERNEL_CUDA__ - int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; - if(tid == 0) { - kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize; - } + int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS]; + if (tid == 0) { + kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize; + } - uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE; - if(offset >= qsize) { - return; - } + uint offset = (tid / SHADER_SORT_LOCAL_SIZE) * SHADER_SORT_BLOCK_SIZE; + if (offset >= qsize) { + return; + } - int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); - uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size); - uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size); - ccl_local uint *local_value = &locals->local_value[0]; - ccl_local ushort *local_index = &locals->local_index[0]; + int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0); + uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size); + uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size); + ccl_local uint *local_value = &locals->local_value[0]; + ccl_local ushort *local_index = &locals->local_index[0]; - /* copy to local memory */ - for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { - uint idx = offset + i + lid; - uint add = input + idx; - uint value = (~0); - if(idx < qsize) { - int ray_index = kernel_split_state.queue_data[add]; - bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); - if(valid) { - value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK; - } - } - local_value[i + lid] = value; - local_index[i + lid] = i + lid; - } - ccl_barrier(CCL_LOCAL_MEM_FENCE); + /* copy to local memory */ + for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { + uint idx = offset + i + lid; + uint add = input + idx; + uint value = (~0); + if (idx < qsize) { + int ray_index = kernel_split_state.queue_data[add]; + bool valid = (ray_index != QUEUE_EMPTY_SLOT) && + IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); + if (valid) { + value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK; + } + } + local_value[i + lid] = value; + local_index[i + lid] = i + lid; + } + ccl_barrier(CCL_LOCAL_MEM_FENCE); - /* skip sorting for cpu split kernel */ + /* skip sorting for cpu split kernel */ # ifdef __KERNEL_OPENCL__ - /* bitonic sort */ - for(uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) { - for(uint inc = length; inc > 0; inc >>= 1) { - for(uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) { - uint i = lid + ii; - bool direction = ((i & (length << 1)) != 0); - uint j = i ^ inc; - ushort ioff = local_index[i]; - ushort joff = local_index[j]; - uint iKey = local_value[ioff]; - uint jKey = local_value[joff]; - bool smaller = (jKey < iKey) || (jKey == iKey && j < i); - bool swap = smaller ^ (j < i) ^ direction; - ccl_barrier(CCL_LOCAL_MEM_FENCE); - local_index[i] = (swap) ? joff : ioff; - local_index[j] = (swap) ? ioff : joff; - ccl_barrier(CCL_LOCAL_MEM_FENCE); - } - } - } -# endif /* __KERNEL_OPENCL__ */ + /* bitonic sort */ + for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) { + for (uint inc = length; inc > 0; inc >>= 1) { + for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) { + uint i = lid + ii; + bool direction = ((i & (length << 1)) != 0); + uint j = i ^ inc; + ushort ioff = local_index[i]; + ushort joff = local_index[j]; + uint iKey = local_value[ioff]; + uint jKey = local_value[joff]; + bool smaller = (jKey < iKey) || (jKey == iKey && j < i); + bool swap = smaller ^ (j < i) ^ direction; + ccl_barrier(CCL_LOCAL_MEM_FENCE); + local_index[i] = (swap) ? joff : ioff; + local_index[j] = (swap) ? ioff : joff; + ccl_barrier(CCL_LOCAL_MEM_FENCE); + } + } + } +# endif /* __KERNEL_OPENCL__ */ - /* copy to destination */ - for(uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { - uint idx = offset + i + lid; - uint lidx = local_index[i + lid]; - uint outi = output + idx; - uint ini = input + offset + lidx; - uint value = local_value[lidx]; - if(idx < qsize) { - kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini]; - } - } -#endif /* __KERNEL_CUDA__ */ + /* copy to destination */ + for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) { + uint idx = offset + i + lid; + uint lidx = local_index[i + lid]; + uint outi = output + idx; + uint ini = input + offset + lidx; + uint value = local_value[lidx]; + if (idx < qsize) { + kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : + kernel_split_state.queue_data[ini]; + } + } +#endif /* __KERNEL_CUDA__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index fb08112503a..5d772fc597b 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -19,35 +19,40 @@ CCL_NAMESPACE_BEGIN /* Shadow ray cast for AO. */ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) { - unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + unsigned int ao_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS]; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = QUEUE_EMPTY_SLOT; - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index < ao_queue_length) { - ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_AO_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, 1); - } + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index < ao_queue_length) { + ray_index = get_ray_index(kg, + thread_index, + QUEUE_SHADOW_RAY_CAST_AO_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + } - if(ray_index == QUEUE_EMPTY_SLOT) { - return; - } + if (ray_index == QUEUE_EMPTY_SLOT) { + return; + } - ShaderData *sd = kernel_split_sd(sd, ray_index); - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - float3 throughput = kernel_split_state.throughput[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + float3 throughput = kernel_split_state.throughput[ray_index]; #ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { + if (!kernel_data.integrator.branched || + IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { #endif - kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd)); + kernel_path_ao(kg, sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, sd)); #ifdef __BRANCHED_PATH__ - } - else { - kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput); - } + } + else { + kernel_branched_path_ao(kg, sd, emission_sd, L, state, throughput); + } #endif } diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index da072fd5f1a..82990ce9fae 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -19,89 +19,80 @@ CCL_NAMESPACE_BEGIN /* Shadow ray cast for direct visible light. */ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) { - unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; - ccl_barrier(CCL_LOCAL_MEM_FENCE); + unsigned int dl_queue_length = kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS]; + ccl_barrier(CCL_LOCAL_MEM_FENCE); - int ray_index = QUEUE_EMPTY_SLOT; - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index < dl_queue_length) { - ray_index = get_ray_index(kg, thread_index, QUEUE_SHADOW_RAY_CAST_DL_RAYS, - kernel_split_state.queue_data, kernel_split_params.queue_size, 1); - } + int ray_index = QUEUE_EMPTY_SLOT; + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index < dl_queue_length) { + ray_index = get_ray_index(kg, + thread_index, + QUEUE_SHADOW_RAY_CAST_DL_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + } #ifdef __BRANCHED_PATH__ - /* TODO(mai): move this somewhere else? */ - if(thread_index == 0) { - /* Clear QUEUE_INACTIVE_RAYS before next kernel. */ - kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0; - } -#endif /* __BRANCHED_PATH__ */ + /* TODO(mai): move this somewhere else? */ + if (thread_index == 0) { + /* Clear QUEUE_INACTIVE_RAYS before next kernel. */ + kernel_split_params.queue_index[QUEUE_INACTIVE_RAYS] = 0; + } +#endif /* __BRANCHED_PATH__ */ - if(ray_index == QUEUE_EMPTY_SLOT) - return; + if (ray_index == QUEUE_EMPTY_SLOT) + return; - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - Ray ray = kernel_split_state.light_ray[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - float3 throughput = kernel_split_state.throughput[ray_index]; + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + Ray ray = kernel_split_state.light_ray[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + float3 throughput = kernel_split_state.throughput[ray_index]; - BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - bool is_lamp = kernel_split_state.is_lamp[ray_index]; + BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + bool is_lamp = kernel_split_state.is_lamp[ray_index]; -# if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__) - bool use_branched = false; - int all = 0; +#if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__) + bool use_branched = false; + int all = 0; - if(state->flag & PATH_RAY_SHADOW_CATCHER) { - use_branched = true; - all = 1; - } -# if defined(__BRANCHED_PATH__) - else if(kernel_data.integrator.branched) { - use_branched = true; + if (state->flag & PATH_RAY_SHADOW_CATCHER) { + use_branched = true; + all = 1; + } +# if defined(__BRANCHED_PATH__) + else if (kernel_data.integrator.branched) { + use_branched = true; - if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { - all = (kernel_data.integrator.sample_all_lights_indirect); - } - else - { - all = (kernel_data.integrator.sample_all_lights_direct); - } - } -# endif /* __BRANCHED_PATH__ */ + if (IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { + all = (kernel_data.integrator.sample_all_lights_indirect); + } + else { + all = (kernel_data.integrator.sample_all_lights_direct); + } + } +# endif /* __BRANCHED_PATH__ */ - if(use_branched) { - kernel_branched_path_surface_connect_light(kg, - sd, - emission_sd, - state, - throughput, - 1.0f, - L, - all); - } - else -# endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/ - { - /* trace shadow ray */ - float3 shadow; + if (use_branched) { + kernel_branched_path_surface_connect_light( + kg, sd, emission_sd, state, throughput, 1.0f, L, all); + } + else +#endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/ + { + /* trace shadow ray */ + float3 shadow; - if(!shadow_blocked(kg, - sd, - emission_sd, - state, - &ray, - &shadow)) - { - /* accumulate */ - path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput, &L_light); - } - } + if (!shadow_blocked(kg, sd, emission_sd, state, &ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); + } + else { + path_radiance_accum_total_light(L, state, throughput, &L_light); + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h index 4b86696691a..384bc952460 100644 --- a/intern/cycles/kernel/split/kernel_split_common.h +++ b/intern/cycles/kernel/split/kernel_split_common.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __KERNEL_SPLIT_H__ -#define __KERNEL_SPLIT_H__ +#ifndef __KERNEL_SPLIT_H__ +#define __KERNEL_SPLIT_H__ #include "kernel/kernel_math.h" #include "kernel/kernel_types.h" @@ -57,47 +57,48 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index) { - ccl_global char *ray_state = kernel_split_state.ray_state; + ccl_global char *ray_state = kernel_split_state.ray_state; #ifdef __BRANCHED_PATH__ # ifdef __SUBSURFACE__ - ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - - if(ss_indirect->num_rays) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } - else -# endif /* __SUBSURFACE__ */ - if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) { - int orig_ray = kernel_split_state.branched_state[ray_index].original_ray; - - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray]; - - path_radiance_sum_indirect(L); - path_radiance_accum_sample(orig_ray_L, L); - - atomic_fetch_and_dec_uint32((ccl_global uint*)&kernel_split_state.branched_state[orig_ray].shared_sample_count); - - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); - } - else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER); - } - else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER); - } - else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER); - } - else { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); - } + ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + + if (ss_indirect->num_rays) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } + else +# endif /* __SUBSURFACE__ */ + if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT_SHARED)) { + int orig_ray = kernel_split_state.branched_state[ray_index].original_ray; + + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + PathRadiance *orig_ray_L = &kernel_split_state.path_radiance[orig_ray]; + + path_radiance_sum_indirect(L); + path_radiance_accum_sample(orig_ray_L, L); + + atomic_fetch_and_dec_uint32( + (ccl_global uint *)&kernel_split_state.branched_state[orig_ray].shared_sample_count); + + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_INACTIVE); + } + else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER); + } + else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER); + } + else if (IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER); + } + else { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + } #else - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER); #endif } CCL_NAMESPACE_END -#endif /* __KERNEL_SPLIT_H__ */ +#endif /* __KERNEL_SPLIT_H__ */ diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h index 3f6b3977d79..433b1221a37 100644 --- a/intern/cycles/kernel/split/kernel_split_data.h +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -24,22 +24,22 @@ CCL_NAMESPACE_BEGIN ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements) { - (void) kg; /* Unused on CPU. */ + (void)kg; /* Unused on CPU. */ - uint64_t size = 0; -#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16) - size = size SPLIT_DATA_ENTRIES; + uint64_t size = 0; +#define SPLIT_DATA_ENTRY(type, name, num) +align_up(num_elements *num * sizeof(type), 16) + size = size SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY - uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1); + uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1); #ifdef __BRANCHED_PATH__ - size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); #endif - size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); - return size; + return size; } ccl_device_inline void split_data_init(KernelGlobals *kg, @@ -48,28 +48,29 @@ ccl_device_inline void split_data_init(KernelGlobals *kg, ccl_global void *data, ccl_global char *ray_state) { - (void) kg; /* Unused on CPU. */ + (void)kg; /* Unused on CPU. */ - ccl_global char *p = (ccl_global char*)data; + ccl_global char *p = (ccl_global char *)data; #define SPLIT_DATA_ENTRY(type, name, num) \ - split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16); - SPLIT_DATA_ENTRIES; + split_data->name = (type *)p; \ + p += align_up(num_elements * num * sizeof(type), 16); + SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY - uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1); + uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1); #ifdef __BRANCHED_PATH__ - split_data->_branched_state_sd = (ShaderData*)p; - p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + split_data->_branched_state_sd = (ShaderData *)p; + p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); #endif - split_data->_sd = (ShaderData*)p; - p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + split_data->_sd = (ShaderData *)p; + p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); - split_data->ray_state = ray_state; + split_data->ray_state = ray_state; } CCL_NAMESPACE_END -#endif /* __KERNEL_SPLIT_DATA_H__ */ +#endif /* __KERNEL_SPLIT_DATA_H__ */ diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 83df1e2a0a6..6ff3f5bdb55 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -22,17 +22,17 @@ CCL_NAMESPACE_BEGIN /* parameters used by the split kernels, we use a single struct to avoid passing these to each kernel */ typedef struct SplitParams { - WorkTile tile; - uint total_work_size; + WorkTile tile; + uint total_work_size; - ccl_global unsigned int *work_pools; + ccl_global unsigned int *work_pools; - ccl_global int *queue_index; - int queue_size; - ccl_global char *use_queues_flag; + ccl_global int *queue_index; + int queue_size; + ccl_global char *use_queues_flag; - /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */ - int dummy_sd_flag; + /* Place for storing sd->flag. AMD GPU OpenCL compiler workaround */ + int dummy_sd_flag; } SplitParams; /* Global memory variables [porting]; These memory is used for @@ -46,98 +46,98 @@ typedef struct SplitParams { #ifdef __BRANCHED_PATH__ typedef ccl_global struct SplitBranchedState { - /* various state that must be kept and restored after an indirect loop */ - PathState path_state; - float3 throughput; - Ray ray; + /* various state that must be kept and restored after an indirect loop */ + PathState path_state; + float3 throughput; + Ray ray; - Intersection isect; + Intersection isect; - char ray_state; + char ray_state; - /* indirect loop state */ - int next_closure; - int next_sample; + /* indirect loop state */ + int next_closure; + int next_sample; -#ifdef __SUBSURFACE__ - int ss_next_closure; - int ss_next_sample; - int next_hit; - int num_hits; - - uint lcg_state; - LocalIntersection ss_isect; -#endif /*__SUBSURFACE__ */ - - int shared_sample_count; /* number of branched samples shared with other threads */ - int original_ray; /* index of original ray when sharing branched samples */ - bool waiting_on_shared_samples; +# ifdef __SUBSURFACE__ + int ss_next_closure; + int ss_next_sample; + int next_hit; + int num_hits; + + uint lcg_state; + LocalIntersection ss_isect; +# endif /*__SUBSURFACE__ */ + + int shared_sample_count; /* number of branched samples shared with other threads */ + int original_ray; /* index of original ray when sharing branched samples */ + bool waiting_on_shared_samples; } SplitBranchedState; -#define SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1) \ - SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0) +# define SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(SplitBranchedState, branched_state, 1) \ + SPLIT_DATA_ENTRY(ShaderData, _branched_state_sd, 0) #else -#define SPLIT_DATA_BRANCHED_ENTRIES -#endif /* __BRANCHED_PATH__ */ +# define SPLIT_DATA_BRANCHED_ENTRIES +#endif /* __BRANCHED_PATH__ */ #ifdef __SUBSURFACE__ # define SPLIT_DATA_SUBSURFACE_ENTRIES \ - SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1) + SPLIT_DATA_ENTRY(ccl_global SubsurfaceIndirectRays, ss_rays, 1) #else # define SPLIT_DATA_SUBSURFACE_ENTRIES -#endif /* __SUBSURFACE__ */ +#endif /* __SUBSURFACE__ */ #ifdef __VOLUME__ -# define SPLIT_DATA_VOLUME_ENTRIES \ - SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1) +# define SPLIT_DATA_VOLUME_ENTRIES SPLIT_DATA_ENTRY(ccl_global PathState, state_shadow, 1) #else # define SPLIT_DATA_VOLUME_ENTRIES -#endif /* __VOLUME__ */ +#endif /* __VOLUME__ */ #define SPLIT_DATA_ENTRIES \ - SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ - SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ - SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ - SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ - SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ - SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ - SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ - SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ - SPLIT_DATA_SUBSURFACE_ENTRIES \ - SPLIT_DATA_VOLUME_ENTRIES \ - SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_ENTRY(ShaderData, _sd, 0) + SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ + SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ + SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ + SPLIT_DATA_ENTRY( \ + ccl_global int, queue_data, (NUM_QUEUES * 2)) /* TODO(mai): this is too large? */ \ + SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ + SPLIT_DATA_SUBSURFACE_ENTRIES \ + SPLIT_DATA_VOLUME_ENTRIES \ + SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(ShaderData, _sd, 0) /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ - SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ - SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ - SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ - SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ - SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ - SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ - SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ - SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ - SPLIT_DATA_SUBSURFACE_ENTRIES \ - SPLIT_DATA_VOLUME_ENTRIES \ - SPLIT_DATA_BRANCHED_ENTRIES \ - SPLIT_DATA_ENTRY(ShaderData, _sd, 0) + SPLIT_DATA_ENTRY(ccl_global float3, throughput, 1) \ + SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \ + SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \ + SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \ + SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ + SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ + SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ + SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ + SPLIT_DATA_SUBSURFACE_ENTRIES \ + SPLIT_DATA_VOLUME_ENTRIES \ + SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(ShaderData, _sd, 0) /* struct that holds pointers to data in the shared state buffer */ typedef struct SplitData { #define SPLIT_DATA_ENTRY(type, name, num) type *name; - SPLIT_DATA_ENTRIES + SPLIT_DATA_ENTRIES #undef SPLIT_DATA_ENTRY - /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from - * the host easily) but is still used the same as the other data so we have it here in this struct as well - */ - ccl_global char *ray_state; + /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from + * the host easily) but is still used the same as the other data so we have it here in this struct as well + */ + ccl_global char *ray_state; } SplitData; #ifndef __KERNEL_CUDA__ @@ -148,30 +148,30 @@ __device__ SplitData __split_data; # define kernel_split_state (__split_data) __device__ SplitParams __split_param_data; # define kernel_split_params (__split_param_data) -#endif /* __KERNEL_CUDA__ */ +#endif /* __KERNEL_CUDA__ */ -#define kernel_split_sd(sd, ray_index) ((ShaderData*) \ - ( \ - ((ccl_global char*)kernel_split_state._##sd) + \ - (sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \ - )) +#define kernel_split_sd(sd, ray_index) \ + ((ShaderData *)(((ccl_global char *)kernel_split_state._##sd) + \ + (sizeof(ShaderData) + \ + sizeof(ShaderClosure) * (kernel_data.integrator.max_closures - 1)) * \ + (ray_index))) /* Local storage for queue_enqueue kernel. */ typedef struct QueueEnqueueLocals { - uint queue_atomics[2]; + uint queue_atomics[2]; } QueueEnqueueLocals; /* Local storage for holdout_emission_blurring_pathtermination_ao kernel. */ typedef struct BackgroundAOLocals { - uint queue_atomics_bg; - uint queue_atomics_ao; + uint queue_atomics_bg; + uint queue_atomics_ao; } BackgroundAOLocals; typedef struct ShaderSortLocals { - uint local_value[SHADER_SORT_BLOCK_SIZE]; - ushort local_index[SHADER_SORT_BLOCK_SIZE]; + uint local_value[SHADER_SORT_BLOCK_SIZE]; + ushort local_index[SHADER_SORT_BLOCK_SIZE]; } ShaderSortLocals; CCL_NAMESPACE_END -#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */ +#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */ diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index 08769fe303b..ba06ae3bc53 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -18,276 +18,247 @@ CCL_NAMESPACE_BEGIN #if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__) -ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index) +ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, + int ray_index) { - kernel_split_branched_path_indirect_loop_init(kg, ray_index); + kernel_split_branched_path_indirect_loop_init(kg, ray_index); - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - branched_state->ss_next_closure = 0; - branched_state->ss_next_sample = 0; + branched_state->ss_next_closure = 0; + branched_state->ss_next_sample = 0; - branched_state->num_hits = 0; - branched_state->next_hit = 0; + branched_state->num_hits = 0; + branched_state->next_hit = 0; - ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT); + ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT); } -ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index) +ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter( + KernelGlobals *kg, int ray_index) { - SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - - ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index); - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - - for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(!CLOSURE_IS_BSSRDF(sc->type)) - continue; - - /* Closure memory will be overwritten, so read required variables now. */ - Bssrdf *bssrdf = (Bssrdf *)sc; - ClosureType bssrdf_type = sc->type; - float bssrdf_roughness = bssrdf->roughness; - - /* set up random number generator */ - if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 && - branched_state->next_closure == 0 && branched_state->next_sample == 0) - { - branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state, - 0x68bc21eb); - } - int num_samples = kernel_data.integrator.subsurface_samples * 3; - float num_samples_inv = 1.0f/num_samples; - uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); - - /* do subsurface scatter step with copy of shader data, this will - * replace the BSSRDF with a diffuse BSDF closure */ - for(int j = branched_state->ss_next_sample; j < num_samples; j++) { - ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index]; - *hit_state = branched_state->path_state; - hit_state->rng_hash = bssrdf_rng_hash; - path_state_branch(hit_state, j, num_samples); - - ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect; - float bssrdf_u, bssrdf_v; - path_branched_rng_2D(kg, - bssrdf_rng_hash, - hit_state, - j, - num_samples, - PRNG_BSDF_U, - &bssrdf_u, - &bssrdf_v); - - /* intersection is expensive so avoid doing multiple times for the same input */ - if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) { - uint lcg_state = branched_state->lcg_state; - LocalIntersection ss_isect_private; - - branched_state->num_hits = subsurface_scatter_multi_intersect(kg, - &ss_isect_private, - sd, - hit_state, - sc, - &lcg_state, - bssrdf_u, bssrdf_v, - true); - - branched_state->lcg_state = lcg_state; - *ss_isect = ss_isect_private; - } - - hit_state->rng_offset += PRNG_BOUNCE_NUM; - -#ifdef __VOLUME__ - Ray volume_ray = branched_state->ray; - bool need_update_volume_stack = - kernel_data.integrator.use_volumes && - sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; -#endif /* __VOLUME__ */ - - /* compute lighting with the BSDF closure */ - for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) { - ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index); - *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is - * important as the indirect path will write into bssrdf_sd */ - - LocalIntersection ss_isect_private = *ss_isect; - subsurface_scatter_multi_setup(kg, - &ss_isect_private, - hit, - bssrdf_sd, - hit_state, - bssrdf_type, - bssrdf_roughness); - *ss_isect = ss_isect_private; - -#ifdef __VOLUME__ - if(need_update_volume_stack) { - /* Setup ray from previous surface point to the new one. */ - float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng); - volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t); - - for(int k = 0; k < VOLUME_STACK_SIZE; k++) { - hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k]; - } - - kernel_volume_stack_update_for_subsurface(kg, - emission_sd, - &volume_ray, - hit_state->volume_stack); - } -#endif /* __VOLUME__ */ - -#ifdef __EMISSION__ - if(branched_state->next_closure == 0 && branched_state->next_sample == 0) { - /* direct light */ - if(kernel_data.integrator.use_direct_light) { - int all = (kernel_data.integrator.sample_all_lights_direct) || - (hit_state->flag & PATH_RAY_SHADOW_CATCHER); - kernel_branched_path_surface_connect_light(kg, - bssrdf_sd, - emission_sd, - hit_state, - branched_state->throughput, - num_samples_inv, - L, - all); - } - } -#endif /* __EMISSION__ */ - - /* indirect light */ - if(kernel_split_branched_path_surface_indirect_light_iter(kg, - ray_index, - num_samples_inv, - bssrdf_sd, - false, - false)) - { - branched_state->ss_next_closure = i; - branched_state->ss_next_sample = j; - branched_state->next_hit = hit; - - return true; - } - - branched_state->next_closure = 0; - } - - branched_state->next_hit = 0; - } - - branched_state->ss_next_sample = 0; - } - - branched_state->ss_next_closure = sd->num_closure; - - branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); - if(branched_state->waiting_on_shared_samples) { - return true; - } - - kernel_split_branched_path_indirect_loop_end(kg, ray_index); - - return false; + SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; + + ShaderData *sd = kernel_split_sd(branched_state_sd, ray_index); + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + + for (int i = branched_state->ss_next_closure; i < sd->num_closure; i++) { + ShaderClosure *sc = &sd->closure[i]; + + if (!CLOSURE_IS_BSSRDF(sc->type)) + continue; + + /* Closure memory will be overwritten, so read required variables now. */ + Bssrdf *bssrdf = (Bssrdf *)sc; + ClosureType bssrdf_type = sc->type; + float bssrdf_roughness = bssrdf->roughness; + + /* set up random number generator */ + if (branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 && + branched_state->next_closure == 0 && branched_state->next_sample == 0) { + branched_state->lcg_state = lcg_state_init_addrspace(&branched_state->path_state, + 0x68bc21eb); + } + int num_samples = kernel_data.integrator.subsurface_samples * 3; + float num_samples_inv = 1.0f / num_samples; + uint bssrdf_rng_hash = cmj_hash(branched_state->path_state.rng_hash, i); + + /* do subsurface scatter step with copy of shader data, this will + * replace the BSSRDF with a diffuse BSDF closure */ + for (int j = branched_state->ss_next_sample; j < num_samples; j++) { + ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index]; + *hit_state = branched_state->path_state; + hit_state->rng_hash = bssrdf_rng_hash; + path_state_branch(hit_state, j, num_samples); + + ccl_global LocalIntersection *ss_isect = &branched_state->ss_isect; + float bssrdf_u, bssrdf_v; + path_branched_rng_2D( + kg, bssrdf_rng_hash, hit_state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + + /* intersection is expensive so avoid doing multiple times for the same input */ + if (branched_state->next_hit == 0 && branched_state->next_closure == 0 && + branched_state->next_sample == 0) { + uint lcg_state = branched_state->lcg_state; + LocalIntersection ss_isect_private; + + branched_state->num_hits = subsurface_scatter_multi_intersect( + kg, &ss_isect_private, sd, hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true); + + branched_state->lcg_state = lcg_state; + *ss_isect = ss_isect_private; + } + + hit_state->rng_offset += PRNG_BOUNCE_NUM; + +# ifdef __VOLUME__ + Ray volume_ray = branched_state->ray; + bool need_update_volume_stack = kernel_data.integrator.use_volumes && + sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME; +# endif /* __VOLUME__ */ + + /* compute lighting with the BSDF closure */ + for (int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) { + ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index); + *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is + * important as the indirect path will write into bssrdf_sd */ + + LocalIntersection ss_isect_private = *ss_isect; + subsurface_scatter_multi_setup( + kg, &ss_isect_private, hit, bssrdf_sd, hit_state, bssrdf_type, bssrdf_roughness); + *ss_isect = ss_isect_private; + +# ifdef __VOLUME__ + if (need_update_volume_stack) { + /* Setup ray from previous surface point to the new one. */ + float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng); + volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t); + + for (int k = 0; k < VOLUME_STACK_SIZE; k++) { + hit_state->volume_stack[k] = branched_state->path_state.volume_stack[k]; + } + + kernel_volume_stack_update_for_subsurface( + kg, emission_sd, &volume_ray, hit_state->volume_stack); + } +# endif /* __VOLUME__ */ + +# ifdef __EMISSION__ + if (branched_state->next_closure == 0 && branched_state->next_sample == 0) { + /* direct light */ + if (kernel_data.integrator.use_direct_light) { + int all = (kernel_data.integrator.sample_all_lights_direct) || + (hit_state->flag & PATH_RAY_SHADOW_CATCHER); + kernel_branched_path_surface_connect_light(kg, + bssrdf_sd, + emission_sd, + hit_state, + branched_state->throughput, + num_samples_inv, + L, + all); + } + } +# endif /* __EMISSION__ */ + + /* indirect light */ + if (kernel_split_branched_path_surface_indirect_light_iter( + kg, ray_index, num_samples_inv, bssrdf_sd, false, false)) { + branched_state->ss_next_closure = i; + branched_state->ss_next_sample = j; + branched_state->next_hit = hit; + + return true; + } + + branched_state->next_closure = 0; + } + + branched_state->next_hit = 0; + } + + branched_state->ss_next_sample = 0; + } + + branched_state->ss_next_closure = sd->num_closure; + + branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0); + if (branched_state->waiting_on_shared_samples) { + return true; + } + + kernel_split_branched_path_indirect_loop_end(kg, ray_index); + + return false; } -#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */ +#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) { - int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - if(thread_index == 0) { - /* We will empty both queues in this kernel. */ - kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; - kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; - } - - int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); - ray_index = get_ray_index(kg, ray_index, - QUEUE_ACTIVE_AND_REGENERATED_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - get_ray_index(kg, thread_index, - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); + int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + if (thread_index == 0) { + /* We will empty both queues in this kernel. */ + kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0; + kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0; + } + + int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); + ray_index = get_ray_index(kg, + ray_index, + QUEUE_ACTIVE_AND_REGENERATED_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + get_ray_index(kg, + thread_index, + QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); #ifdef __SUBSURFACE__ - ccl_global char *ray_state = kernel_split_state.ray_state; - - if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { - ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - ShaderData *sd = kernel_split_sd(sd, ray_index); - ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); - - if(sd->flag & SD_BSSRDF) { - -#ifdef __BRANCHED_PATH__ - if(!kernel_data.integrator.branched || - IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) - { -#endif - if(kernel_path_subsurface_scatter(kg, - sd, - emission_sd, - L, - state, - ray, - throughput, - ss_indirect)) - { - kernel_split_path_end(kg, ray_index); - } -#ifdef __BRANCHED_PATH__ - } - else { - kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index); - - if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -#endif - } - } + ccl_global char *ray_state = kernel_split_state.ray_state; + + if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { + ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; + PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; + ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; + ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; + ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); + ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); + + if (sd->flag & SD_BSSRDF) { # ifdef __BRANCHED_PATH__ - if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { - kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0; - } - - /* iter loop */ - ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), - QUEUE_SUBSURFACE_INDIRECT_ITER, - kernel_split_state.queue_data, - kernel_split_params.queue_size, - 1); - - if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) { - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); - path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); - - if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { - ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); - } - } -# endif /* __BRANCHED_PATH__ */ - -#endif /* __SUBSURFACE__ */ + if (!kernel_data.integrator.branched || + IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) { +# endif + if (kernel_path_subsurface_scatter( + kg, sd, emission_sd, L, state, ray, throughput, ss_indirect)) { + kernel_split_path_end(kg, ray_index); + } +# ifdef __BRANCHED_PATH__ + } + else { + kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index); + + if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif + } + } +# ifdef __BRANCHED_PATH__ + if (ccl_global_id(0) == 0 && ccl_global_id(1) == 0) { + kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0; + } + + /* iter loop */ + ray_index = get_ray_index(kg, + ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0), + QUEUE_SUBSURFACE_INDIRECT_ITER, + kernel_split_state.queue_data, + kernel_split_params.queue_size, + 1); + + if (IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) { + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]); + path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]); + + if (kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) { + ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED); + } + } +# endif /* __BRANCHED_PATH__ */ + +#endif /* __SUBSURFACE__ */ } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index ccb9aef7a5b..4a386afa5de 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -46,92 +46,102 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 stack_load_float3(float *stack, uint a) { - kernel_assert(a+2 < SVM_STACK_SIZE); + kernel_assert(a + 2 < SVM_STACK_SIZE); - return make_float3(stack[a+0], stack[a+1], stack[a+2]); + return make_float3(stack[a + 0], stack[a + 1], stack[a + 2]); } ccl_device_inline void stack_store_float3(float *stack, uint a, float3 f) { - kernel_assert(a+2 < SVM_STACK_SIZE); + kernel_assert(a + 2 < SVM_STACK_SIZE); - stack[a+0] = f.x; - stack[a+1] = f.y; - stack[a+2] = f.z; + stack[a + 0] = f.x; + stack[a + 1] = f.y; + stack[a + 2] = f.z; } ccl_device_inline float stack_load_float(float *stack, uint a) { - kernel_assert(a < SVM_STACK_SIZE); + kernel_assert(a < SVM_STACK_SIZE); - return stack[a]; + return stack[a]; } ccl_device_inline float stack_load_float_default(float *stack, uint a, uint value) { - return (a == (uint)SVM_STACK_INVALID)? __uint_as_float(value): stack_load_float(stack, a); + return (a == (uint)SVM_STACK_INVALID) ? __uint_as_float(value) : stack_load_float(stack, a); } ccl_device_inline void stack_store_float(float *stack, uint a, float f) { - kernel_assert(a < SVM_STACK_SIZE); + kernel_assert(a < SVM_STACK_SIZE); - stack[a] = f; + stack[a] = f; } ccl_device_inline int stack_load_int(float *stack, uint a) { - kernel_assert(a < SVM_STACK_SIZE); + kernel_assert(a < SVM_STACK_SIZE); - return __float_as_int(stack[a]); + return __float_as_int(stack[a]); } ccl_device_inline int stack_load_int_default(float *stack, uint a, uint value) { - return (a == (uint)SVM_STACK_INVALID)? (int)value: stack_load_int(stack, a); + return (a == (uint)SVM_STACK_INVALID) ? (int)value : stack_load_int(stack, a); } ccl_device_inline void stack_store_int(float *stack, uint a, int i) { - kernel_assert(a < SVM_STACK_SIZE); + kernel_assert(a < SVM_STACK_SIZE); - stack[a] = __int_as_float(i); + stack[a] = __int_as_float(i); } ccl_device_inline bool stack_valid(uint a) { - return a != (uint)SVM_STACK_INVALID; + return a != (uint)SVM_STACK_INVALID; } /* Reading Nodes */ ccl_device_inline uint4 read_node(KernelGlobals *kg, int *offset) { - uint4 node = kernel_tex_fetch(__svm_nodes, *offset); - (*offset)++; - return node; + uint4 node = kernel_tex_fetch(__svm_nodes, *offset); + (*offset)++; + return node; } ccl_device_inline float4 read_node_float(KernelGlobals *kg, int *offset) { - uint4 node = kernel_tex_fetch(__svm_nodes, *offset); - float4 f = make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w)); - (*offset)++; - return f; + uint4 node = kernel_tex_fetch(__svm_nodes, *offset); + float4 f = make_float4(__uint_as_float(node.x), + __uint_as_float(node.y), + __uint_as_float(node.z), + __uint_as_float(node.w)); + (*offset)++; + return f; } ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset) { - uint4 node = kernel_tex_fetch(__svm_nodes, offset); - return make_float4(__uint_as_float(node.x), __uint_as_float(node.y), __uint_as_float(node.z), __uint_as_float(node.w)); + uint4 node = kernel_tex_fetch(__svm_nodes, offset); + return make_float4(__uint_as_float(node.x), + __uint_as_float(node.y), + __uint_as_float(node.z), + __uint_as_float(node.w)); } ccl_device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w) { - if(x) *x = (i & 0xFF); - if(y) *y = ((i >> 8) & 0xFF); - if(z) *z = ((i >> 16) & 0xFF); - if(w) *w = ((i >> 24) & 0xFF); + if (x) + *x = (i & 0xFF); + if (y) + *y = ((i >> 8) & 0xFF); + if (z) + *z = ((i >> 16) & 0xFF); + if (w) + *w = ((i >> 24) & 0xFF); } CCL_NAMESPACE_END @@ -194,302 +204,310 @@ CCL_NAMESPACE_BEGIN #define NODES_FEATURE(feature) ((__NODES_FEATURES__ & (feature)) != 0) /* Main Interpreter Loop */ -ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, ShaderType type, int path_flag) +ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space PathState *state, + ShaderType type, + int path_flag) { - float stack[SVM_STACK_SIZE]; - int offset = sd->shader & SHADER_MASK; + float stack[SVM_STACK_SIZE]; + int offset = sd->shader & SHADER_MASK; - while(1) { - uint4 node = read_node(kg, &offset); + while (1) { + uint4 node = read_node(kg, &offset); - switch(node.x) { + switch (node.x) { #if NODES_GROUP(NODE_GROUP_LEVEL_0) - case NODE_SHADER_JUMP: { - if(type == SHADER_TYPE_SURFACE) offset = node.y; - else if(type == SHADER_TYPE_VOLUME) offset = node.z; - else if(type == SHADER_TYPE_DISPLACEMENT) offset = node.w; - else return; - break; - } - case NODE_CLOSURE_BSDF: - svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset); - break; - case NODE_CLOSURE_EMISSION: - svm_node_closure_emission(sd, stack, node); - break; - case NODE_CLOSURE_BACKGROUND: - svm_node_closure_background(sd, stack, node); - break; - case NODE_CLOSURE_SET_WEIGHT: - svm_node_closure_set_weight(sd, node.y, node.z, node.w); - break; - case NODE_CLOSURE_WEIGHT: - svm_node_closure_weight(sd, stack, node.y); - break; - case NODE_EMISSION_WEIGHT: - svm_node_emission_weight(kg, sd, stack, node); - break; - case NODE_MIX_CLOSURE: - svm_node_mix_closure(sd, stack, node); - break; - case NODE_JUMP_IF_ZERO: - if(stack_load_float(stack, node.z) == 0.0f) - offset += node.y; - break; - case NODE_JUMP_IF_ONE: - if(stack_load_float(stack, node.z) == 1.0f) - offset += node.y; - break; - case NODE_GEOMETRY: - svm_node_geometry(kg, sd, stack, node.y, node.z); - break; - case NODE_CONVERT: - svm_node_convert(kg, sd, stack, node.y, node.z, node.w); - break; - case NODE_TEX_COORD: - svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset); - break; - case NODE_VALUE_F: - svm_node_value_f(kg, sd, stack, node.y, node.z); - break; - case NODE_VALUE_V: - svm_node_value_v(kg, sd, stack, node.y, &offset); - break; - case NODE_ATTR: - svm_node_attr(kg, sd, stack, node); - break; + case NODE_SHADER_JUMP: { + if (type == SHADER_TYPE_SURFACE) + offset = node.y; + else if (type == SHADER_TYPE_VOLUME) + offset = node.z; + else if (type == SHADER_TYPE_DISPLACEMENT) + offset = node.w; + else + return; + break; + } + case NODE_CLOSURE_BSDF: + svm_node_closure_bsdf(kg, sd, stack, node, type, path_flag, &offset); + break; + case NODE_CLOSURE_EMISSION: + svm_node_closure_emission(sd, stack, node); + break; + case NODE_CLOSURE_BACKGROUND: + svm_node_closure_background(sd, stack, node); + break; + case NODE_CLOSURE_SET_WEIGHT: + svm_node_closure_set_weight(sd, node.y, node.z, node.w); + break; + case NODE_CLOSURE_WEIGHT: + svm_node_closure_weight(sd, stack, node.y); + break; + case NODE_EMISSION_WEIGHT: + svm_node_emission_weight(kg, sd, stack, node); + break; + case NODE_MIX_CLOSURE: + svm_node_mix_closure(sd, stack, node); + break; + case NODE_JUMP_IF_ZERO: + if (stack_load_float(stack, node.z) == 0.0f) + offset += node.y; + break; + case NODE_JUMP_IF_ONE: + if (stack_load_float(stack, node.z) == 1.0f) + offset += node.y; + break; + case NODE_GEOMETRY: + svm_node_geometry(kg, sd, stack, node.y, node.z); + break; + case NODE_CONVERT: + svm_node_convert(kg, sd, stack, node.y, node.z, node.w); + break; + case NODE_TEX_COORD: + svm_node_tex_coord(kg, sd, path_flag, stack, node, &offset); + break; + case NODE_VALUE_F: + svm_node_value_f(kg, sd, stack, node.y, node.z); + break; + case NODE_VALUE_V: + svm_node_value_v(kg, sd, stack, node.y, &offset); + break; + case NODE_ATTR: + svm_node_attr(kg, sd, stack, node); + break; # if NODES_FEATURE(NODE_FEATURE_BUMP) - case NODE_GEOMETRY_BUMP_DX: - svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); - break; - case NODE_GEOMETRY_BUMP_DY: - svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); - break; - case NODE_SET_DISPLACEMENT: - svm_node_set_displacement(kg, sd, stack, node.y); - break; - case NODE_DISPLACEMENT: - svm_node_displacement(kg, sd, stack, node); - break; - case NODE_VECTOR_DISPLACEMENT: - svm_node_vector_displacement(kg, sd, stack, node, &offset); - break; -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ + case NODE_GEOMETRY_BUMP_DX: + svm_node_geometry_bump_dx(kg, sd, stack, node.y, node.z); + break; + case NODE_GEOMETRY_BUMP_DY: + svm_node_geometry_bump_dy(kg, sd, stack, node.y, node.z); + break; + case NODE_SET_DISPLACEMENT: + svm_node_set_displacement(kg, sd, stack, node.y); + break; + case NODE_DISPLACEMENT: + svm_node_displacement(kg, sd, stack, node); + break; + case NODE_VECTOR_DISPLACEMENT: + svm_node_vector_displacement(kg, sd, stack, node, &offset); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ # ifdef __TEXTURES__ - case NODE_TEX_IMAGE: - svm_node_tex_image(kg, sd, stack, node); - break; - case NODE_TEX_IMAGE_BOX: - svm_node_tex_image_box(kg, sd, stack, node); - break; - case NODE_TEX_NOISE: - svm_node_tex_noise(kg, sd, stack, node, &offset); - break; -# endif /* __TEXTURES__ */ + case NODE_TEX_IMAGE: + svm_node_tex_image(kg, sd, stack, node); + break; + case NODE_TEX_IMAGE_BOX: + svm_node_tex_image_box(kg, sd, stack, node); + break; + case NODE_TEX_NOISE: + svm_node_tex_noise(kg, sd, stack, node, &offset); + break; +# endif /* __TEXTURES__ */ # ifdef __EXTRA_NODES__ # if NODES_FEATURE(NODE_FEATURE_BUMP) - case NODE_SET_BUMP: - svm_node_set_bump(kg, sd, stack, node); - break; - case NODE_ATTR_BUMP_DX: - svm_node_attr_bump_dx(kg, sd, stack, node); - break; - case NODE_ATTR_BUMP_DY: - svm_node_attr_bump_dy(kg, sd, stack, node); - break; - case NODE_TEX_COORD_BUMP_DX: - svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset); - break; - case NODE_TEX_COORD_BUMP_DY: - svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset); - break; - case NODE_CLOSURE_SET_NORMAL: - svm_node_set_normal(kg, sd, stack, node.y, node.z); - break; + case NODE_SET_BUMP: + svm_node_set_bump(kg, sd, stack, node); + break; + case NODE_ATTR_BUMP_DX: + svm_node_attr_bump_dx(kg, sd, stack, node); + break; + case NODE_ATTR_BUMP_DY: + svm_node_attr_bump_dy(kg, sd, stack, node); + break; + case NODE_TEX_COORD_BUMP_DX: + svm_node_tex_coord_bump_dx(kg, sd, path_flag, stack, node, &offset); + break; + case NODE_TEX_COORD_BUMP_DY: + svm_node_tex_coord_bump_dy(kg, sd, path_flag, stack, node, &offset); + break; + case NODE_CLOSURE_SET_NORMAL: + svm_node_set_normal(kg, sd, stack, node.y, node.z); + break; # if NODES_FEATURE(NODE_FEATURE_BUMP_STATE) - case NODE_ENTER_BUMP_EVAL: - svm_node_enter_bump_eval(kg, sd, stack, node.y); - break; - case NODE_LEAVE_BUMP_EVAL: - svm_node_leave_bump_eval(kg, sd, stack, node.y); - break; -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */ -# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ - case NODE_HSV: - svm_node_hsv(kg, sd, stack, node, &offset); - break; -# endif /* __EXTRA_NODES__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */ + case NODE_ENTER_BUMP_EVAL: + svm_node_enter_bump_eval(kg, sd, stack, node.y); + break; + case NODE_LEAVE_BUMP_EVAL: + svm_node_leave_bump_eval(kg, sd, stack, node.y); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_BUMP_STATE) */ +# endif /* NODES_FEATURE(NODE_FEATURE_BUMP) */ + case NODE_HSV: + svm_node_hsv(kg, sd, stack, node, &offset); + break; +# endif /* __EXTRA_NODES__ */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_0) */ #if NODES_GROUP(NODE_GROUP_LEVEL_1) - case NODE_CLOSURE_HOLDOUT: - svm_node_closure_holdout(sd, stack, node); - break; - case NODE_FRESNEL: - svm_node_fresnel(sd, stack, node.y, node.z, node.w); - break; - case NODE_LAYER_WEIGHT: - svm_node_layer_weight(sd, stack, node); - break; + case NODE_CLOSURE_HOLDOUT: + svm_node_closure_holdout(sd, stack, node); + break; + case NODE_FRESNEL: + svm_node_fresnel(sd, stack, node.y, node.z, node.w); + break; + case NODE_LAYER_WEIGHT: + svm_node_layer_weight(sd, stack, node); + break; # if NODES_FEATURE(NODE_FEATURE_VOLUME) - case NODE_CLOSURE_VOLUME: - svm_node_closure_volume(kg, sd, stack, node, type); - break; - case NODE_PRINCIPLED_VOLUME: - svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset); - break; -# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ + case NODE_CLOSURE_VOLUME: + svm_node_closure_volume(kg, sd, stack, node, type); + break; + case NODE_PRINCIPLED_VOLUME: + svm_node_principled_volume(kg, sd, stack, node, type, path_flag, &offset); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ # ifdef __EXTRA_NODES__ - case NODE_MATH: - svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_VECTOR_MATH: - svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_RGB_RAMP: - svm_node_rgb_ramp(kg, sd, stack, node, &offset); - break; - case NODE_GAMMA: - svm_node_gamma(sd, stack, node.y, node.z, node.w); - break; - case NODE_BRIGHTCONTRAST: - svm_node_brightness(sd, stack, node.y, node.z, node.w); - break; - case NODE_LIGHT_PATH: - svm_node_light_path(sd, state, stack, node.y, node.z, path_flag); - break; - case NODE_OBJECT_INFO: - svm_node_object_info(kg, sd, stack, node.y, node.z); - break; - case NODE_PARTICLE_INFO: - svm_node_particle_info(kg, sd, stack, node.y, node.z); - break; + case NODE_MATH: + svm_node_math(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_VECTOR_MATH: + svm_node_vector_math(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_RGB_RAMP: + svm_node_rgb_ramp(kg, sd, stack, node, &offset); + break; + case NODE_GAMMA: + svm_node_gamma(sd, stack, node.y, node.z, node.w); + break; + case NODE_BRIGHTCONTRAST: + svm_node_brightness(sd, stack, node.y, node.z, node.w); + break; + case NODE_LIGHT_PATH: + svm_node_light_path(sd, state, stack, node.y, node.z, path_flag); + break; + case NODE_OBJECT_INFO: + svm_node_object_info(kg, sd, stack, node.y, node.z); + break; + case NODE_PARTICLE_INFO: + svm_node_particle_info(kg, sd, stack, node.y, node.z); + break; # ifdef __HAIR__ # if NODES_FEATURE(NODE_FEATURE_HAIR) - case NODE_HAIR_INFO: - svm_node_hair_info(kg, sd, stack, node.y, node.z); - break; -# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */ -# endif /* __HAIR__ */ -# endif /* __EXTRA_NODES__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */ + case NODE_HAIR_INFO: + svm_node_hair_info(kg, sd, stack, node.y, node.z); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_HAIR) */ +# endif /* __HAIR__ */ +# endif /* __EXTRA_NODES__ */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_1) */ #if NODES_GROUP(NODE_GROUP_LEVEL_2) - case NODE_MAPPING: - svm_node_mapping(kg, sd, stack, node.y, node.z, &offset); - break; - case NODE_MIN_MAX: - svm_node_min_max(kg, sd, stack, node.y, node.z, &offset); - break; - case NODE_CAMERA: - svm_node_camera(kg, sd, stack, node.y, node.z, node.w); - break; + case NODE_MAPPING: + svm_node_mapping(kg, sd, stack, node.y, node.z, &offset); + break; + case NODE_MIN_MAX: + svm_node_min_max(kg, sd, stack, node.y, node.z, &offset); + break; + case NODE_CAMERA: + svm_node_camera(kg, sd, stack, node.y, node.z, node.w); + break; # ifdef __TEXTURES__ - case NODE_TEX_ENVIRONMENT: - svm_node_tex_environment(kg, sd, stack, node); - break; - case NODE_TEX_SKY: - svm_node_tex_sky(kg, sd, stack, node, &offset); - break; - case NODE_TEX_GRADIENT: - svm_node_tex_gradient(sd, stack, node); - break; - case NODE_TEX_VORONOI: - svm_node_tex_voronoi(kg, sd, stack, node, &offset); - break; - case NODE_TEX_MUSGRAVE: - svm_node_tex_musgrave(kg, sd, stack, node, &offset); - break; - case NODE_TEX_WAVE: - svm_node_tex_wave(kg, sd, stack, node, &offset); - break; - case NODE_TEX_MAGIC: - svm_node_tex_magic(kg, sd, stack, node, &offset); - break; - case NODE_TEX_CHECKER: - svm_node_tex_checker(kg, sd, stack, node); - break; - case NODE_TEX_BRICK: - svm_node_tex_brick(kg, sd, stack, node, &offset); - break; -# endif /* __TEXTURES__ */ + case NODE_TEX_ENVIRONMENT: + svm_node_tex_environment(kg, sd, stack, node); + break; + case NODE_TEX_SKY: + svm_node_tex_sky(kg, sd, stack, node, &offset); + break; + case NODE_TEX_GRADIENT: + svm_node_tex_gradient(sd, stack, node); + break; + case NODE_TEX_VORONOI: + svm_node_tex_voronoi(kg, sd, stack, node, &offset); + break; + case NODE_TEX_MUSGRAVE: + svm_node_tex_musgrave(kg, sd, stack, node, &offset); + break; + case NODE_TEX_WAVE: + svm_node_tex_wave(kg, sd, stack, node, &offset); + break; + case NODE_TEX_MAGIC: + svm_node_tex_magic(kg, sd, stack, node, &offset); + break; + case NODE_TEX_CHECKER: + svm_node_tex_checker(kg, sd, stack, node); + break; + case NODE_TEX_BRICK: + svm_node_tex_brick(kg, sd, stack, node, &offset); + break; +# endif /* __TEXTURES__ */ # ifdef __EXTRA_NODES__ - case NODE_NORMAL: - svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_LIGHT_FALLOFF: - svm_node_light_falloff(sd, stack, node); - break; - case NODE_IES: - svm_node_ies(kg, sd, stack, node, &offset); - break; -# endif /* __EXTRA_NODES__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */ + case NODE_NORMAL: + svm_node_normal(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_LIGHT_FALLOFF: + svm_node_light_falloff(sd, stack, node); + break; + case NODE_IES: + svm_node_ies(kg, sd, stack, node, &offset); + break; +# endif /* __EXTRA_NODES__ */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_2) */ #if NODES_GROUP(NODE_GROUP_LEVEL_3) - case NODE_RGB_CURVES: - case NODE_VECTOR_CURVES: - svm_node_curves(kg, sd, stack, node, &offset); - break; - case NODE_TANGENT: - svm_node_tangent(kg, sd, stack, node); - break; - case NODE_NORMAL_MAP: - svm_node_normal_map(kg, sd, stack, node); - break; + case NODE_RGB_CURVES: + case NODE_VECTOR_CURVES: + svm_node_curves(kg, sd, stack, node, &offset); + break; + case NODE_TANGENT: + svm_node_tangent(kg, sd, stack, node); + break; + case NODE_NORMAL_MAP: + svm_node_normal_map(kg, sd, stack, node); + break; # ifdef __EXTRA_NODES__ - case NODE_INVERT: - svm_node_invert(sd, stack, node.y, node.z, node.w); - break; - case NODE_MIX: - svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_SEPARATE_VECTOR: - svm_node_separate_vector(sd, stack, node.y, node.z, node.w); - break; - case NODE_COMBINE_VECTOR: - svm_node_combine_vector(sd, stack, node.y, node.z, node.w); - break; - case NODE_SEPARATE_HSV: - svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_COMBINE_HSV: - svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); - break; - case NODE_VECTOR_TRANSFORM: - svm_node_vector_transform(kg, sd, stack, node); - break; - case NODE_WIREFRAME: - svm_node_wireframe(kg, sd, stack, node); - break; - case NODE_WAVELENGTH: - svm_node_wavelength(kg, sd, stack, node.y, node.z); - break; - case NODE_BLACKBODY: - svm_node_blackbody(kg, sd, stack, node.y, node.z); - break; -# endif /* __EXTRA_NODES__ */ + case NODE_INVERT: + svm_node_invert(sd, stack, node.y, node.z, node.w); + break; + case NODE_MIX: + svm_node_mix(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_SEPARATE_VECTOR: + svm_node_separate_vector(sd, stack, node.y, node.z, node.w); + break; + case NODE_COMBINE_VECTOR: + svm_node_combine_vector(sd, stack, node.y, node.z, node.w); + break; + case NODE_SEPARATE_HSV: + svm_node_separate_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_COMBINE_HSV: + svm_node_combine_hsv(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_VECTOR_TRANSFORM: + svm_node_vector_transform(kg, sd, stack, node); + break; + case NODE_WIREFRAME: + svm_node_wireframe(kg, sd, stack, node); + break; + case NODE_WAVELENGTH: + svm_node_wavelength(kg, sd, stack, node.y, node.z); + break; + case NODE_BLACKBODY: + svm_node_blackbody(kg, sd, stack, node.y, node.z); + break; +# endif /* __EXTRA_NODES__ */ # if NODES_FEATURE(NODE_FEATURE_VOLUME) - case NODE_TEX_VOXEL: - svm_node_tex_voxel(kg, sd, stack, node, &offset); - break; -# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ + case NODE_TEX_VOXEL: + svm_node_tex_voxel(kg, sd, stack, node, &offset); + break; +# endif /* NODES_FEATURE(NODE_FEATURE_VOLUME) */ # ifdef __SHADER_RAYTRACE__ - case NODE_BEVEL: - svm_node_bevel(kg, sd, state, stack, node); - break; - case NODE_AMBIENT_OCCLUSION: - svm_node_ao(kg, sd, state, stack, node); - break; -# endif /* __SHADER_RAYTRACE__ */ -#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */ - case NODE_END: - return; - default: - kernel_assert(!"Unknown node type was passed to the SVM machine"); - return; - } - } + case NODE_BEVEL: + svm_node_bevel(kg, sd, state, stack, node); + break; + case NODE_AMBIENT_OCCLUSION: + svm_node_ao(kg, sd, state, stack, node); + break; +# endif /* __SHADER_RAYTRACE__ */ +#endif /* NODES_GROUP(NODE_GROUP_LEVEL_3) */ + case NODE_END: + return; + default: + kernel_assert(!"Unknown node type was passed to the SVM machine"); + return; + } + } } #undef NODES_GROUP @@ -497,4 +515,4 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ccl_a CCL_NAMESPACE_END -#endif /* __SVM_H__ */ +#endif /* __SVM_H__ */ diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index 0744ec1768f..06076175c40 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -24,95 +24,82 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg, int num_samples, int flags) { - if(flags & NODE_AO_GLOBAL_RADIUS) { - max_dist = kernel_data.background.ao_distance; - } - - /* Early out if no sampling needed. */ - if(max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) { - return 1.0f; - } - - /* Can't raytrace from shaders like displacement, before BVH exists. */ - if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { - return 1.0f; - } - - if(flags & NODE_AO_INSIDE) { - N = -N; - } - - float3 T, B; - make_orthonormals(N, &T, &B); - - int unoccluded = 0; - for(int sample = 0; sample < num_samples; sample++) { - float disk_u, disk_v; - path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples, - PRNG_BEVEL_U, &disk_u, &disk_v); - - float2 d = concentric_sample_disk(disk_u, disk_v); - float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d))); - - /* Create ray. */ - Ray ray; - ray.P = ray_offset(sd->P, N); - ray.D = D.x*T + D.y*B + D.z*N; - ray.t = max_dist; - ray.time = sd->time; - ray.dP = sd->dP; - ray.dD = differential3_zero(); - - if(flags & NODE_AO_ONLY_LOCAL) { - if(!scene_intersect_local(kg, - ray, - NULL, - sd->object, - NULL, - 0)) { - unoccluded++; - } - } - else { - Intersection isect; - if(!scene_intersect(kg, - ray, - PATH_RAY_SHADOW_OPAQUE, - &isect, - NULL, - 0.0f, 0.0f)) { - unoccluded++; - } - } - } - - return ((float) unoccluded) / num_samples; + if (flags & NODE_AO_GLOBAL_RADIUS) { + max_dist = kernel_data.background.ao_distance; + } + + /* Early out if no sampling needed. */ + if (max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) { + return 1.0f; + } + + /* Can't raytrace from shaders like displacement, before BVH exists. */ + if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { + return 1.0f; + } + + if (flags & NODE_AO_INSIDE) { + N = -N; + } + + float3 T, B; + make_orthonormals(N, &T, &B); + + int unoccluded = 0; + for (int sample = 0; sample < num_samples; sample++) { + float disk_u, disk_v; + path_branched_rng_2D( + kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); + + float2 d = concentric_sample_disk(disk_u, disk_v); + float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d))); + + /* Create ray. */ + Ray ray; + ray.P = ray_offset(sd->P, N); + ray.D = D.x * T + D.y * B + D.z * N; + ray.t = max_dist; + ray.time = sd->time; + ray.dP = sd->dP; + ray.dD = differential3_zero(); + + if (flags & NODE_AO_ONLY_LOCAL) { + if (!scene_intersect_local(kg, ray, NULL, sd->object, NULL, 0)) { + unoccluded++; + } + } + else { + Intersection isect; + if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f)) { + unoccluded++; + } + } + } + + return ((float)unoccluded) / num_samples; } -ccl_device void svm_node_ao(KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - float *stack, - uint4 node) +ccl_device void svm_node_ao( + KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node) { - uint flags, dist_offset, normal_offset, out_ao_offset; - decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset); + uint flags, dist_offset, normal_offset, out_ao_offset; + decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset); - uint color_offset, out_color_offset, samples; - decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL); + uint color_offset, out_color_offset, samples; + decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL); - float dist = stack_load_float_default(stack, dist_offset, node.w); - float3 normal = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; - float ao = svm_ao(kg, sd, normal, state, dist, samples, flags); + float dist = stack_load_float_default(stack, dist_offset, node.w); + float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; + float ao = svm_ao(kg, sd, normal, state, dist, samples, flags); - if(stack_valid(out_ao_offset)) { - stack_store_float(stack, out_ao_offset, ao); - } + if (stack_valid(out_ao_offset)) { + stack_store_float(stack, out_ao_offset, ao); + } - if(stack_valid(out_color_offset)) { - float3 color = stack_load_float3(stack, color_offset); - stack_store_float3(stack, out_color_offset, ao * color); - } + if (stack_valid(out_color_offset)) { + float3 color = stack_load_float3(stack, color_offset); + stack_store_float3(stack, out_color_offset, ao * color); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index c2366df71d0..a67cfe91a30 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -18,67 +18,66 @@ CCL_NAMESPACE_BEGIN /* Attribute Node */ -ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, - uint4 node, NodeAttributeType *type, - uint *out_offset) +ccl_device AttributeDescriptor svm_node_attr_init( + KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, uint *out_offset) { - *out_offset = node.z; - *type = (NodeAttributeType)node.w; + *out_offset = node.z; + *type = (NodeAttributeType)node.w; - AttributeDescriptor desc; + AttributeDescriptor desc; - if(sd->object != OBJECT_NONE) { - desc = find_attribute(kg, sd, node.y); - if(desc.offset == ATTR_STD_NOT_FOUND) { - desc = attribute_not_found(); - desc.offset = 0; - desc.type = (NodeAttributeType)node.w; - } - } - else { - /* background */ - desc = attribute_not_found(); - desc.offset = 0; - desc.type = (NodeAttributeType)node.w; - } + if (sd->object != OBJECT_NONE) { + desc = find_attribute(kg, sd, node.y); + if (desc.offset == ATTR_STD_NOT_FOUND) { + desc = attribute_not_found(); + desc.offset = 0; + desc.type = (NodeAttributeType)node.w; + } + } + else { + /* background */ + desc = attribute_not_found(); + desc.offset = 0; + desc.type = (NodeAttributeType)node.w; + } - return desc; + return desc; } ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type; - uint out_offset; - AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); + NodeAttributeType type; + uint out_offset; + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); - /* fetch and store attribute */ - if(desc.type == NODE_ATTR_FLOAT) { - float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, f); - } - else { - stack_store_float3(stack, out_offset, make_float3(f, f, f)); - } - } - else if(desc.type == NODE_ATTR_FLOAT2) { - float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, f.x); - } - else { - stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f)); - } - } - else { - float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, average(f)); - } - else { - stack_store_float3(stack, out_offset, f); - } - } + /* fetch and store attribute */ + if (desc.type == NODE_ATTR_FLOAT) { + float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, f); + } + else { + stack_store_float3(stack, out_offset, make_float3(f, f, f)); + } + } + else if (desc.type == NODE_ATTR_FLOAT2) { + float2 f = primitive_attribute_float2(kg, sd, desc, NULL, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, f.x); + } + else { + stack_store_float3(stack, out_offset, make_float3(f.x, f.y, 0.0f)); + } + } + else { + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, average(f)); + } + else { + stack_store_float3(stack, out_offset, f); + } + } } #ifndef __KERNEL_CUDA__ @@ -86,43 +85,44 @@ ccl_device #else ccl_device_noinline #endif -void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) + void + svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type; - uint out_offset; - AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); + NodeAttributeType type; + uint out_offset; + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); - /* fetch and store attribute */ - if(desc.type == NODE_ATTR_FLOAT) { - float dx; - float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, f+dx); - } - else { - stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx)); - } - } - else if(desc.type == NODE_ATTR_FLOAT2) { - float2 dx; - float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL); - if (type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, f.x + dx.x); - } - else { - stack_store_float3(stack, out_offset, make_float3(f.x+dx.x, f.y+dx.y, 0.0f)); - } - } - else { - float3 dx; - float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, average(f+dx)); - } - else { - stack_store_float3(stack, out_offset, f+dx); - } - } + /* fetch and store attribute */ + if (desc.type == NODE_ATTR_FLOAT) { + float dx; + float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, f + dx); + } + else { + stack_store_float3(stack, out_offset, make_float3(f + dx, f + dx, f + dx)); + } + } + else if (desc.type == NODE_ATTR_FLOAT2) { + float2 dx; + float2 f = primitive_attribute_float2(kg, sd, desc, &dx, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, f.x + dx.x); + } + else { + stack_store_float3(stack, out_offset, make_float3(f.x + dx.x, f.y + dx.y, 0.0f)); + } + } + else { + float3 dx; + float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, average(f + dx)); + } + else { + stack_store_float3(stack, out_offset, f + dx); + } + } } #ifndef __KERNEL_CUDA__ @@ -130,46 +130,44 @@ ccl_device #else ccl_device_noinline #endif -void svm_node_attr_bump_dy(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node) + void + svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type; - uint out_offset; - AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); + NodeAttributeType type; + uint out_offset; + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); - /* fetch and store attribute */ - if(desc.type == NODE_ATTR_FLOAT) { - float dy; - float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, f+dy); - } - else { - stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy)); - } - } - else if(desc.type == NODE_ATTR_FLOAT2) { - float2 dy; - float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, f.x + dy.x); - } - else { - stack_store_float3(stack, out_offset, make_float3(f.x+dy.x, f.y+dy.y, 0.0f)); - } - } - else { - float3 dy; - float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy); - if(type == NODE_ATTR_FLOAT) { - stack_store_float(stack, out_offset, average(f+dy)); - } - else { - stack_store_float3(stack, out_offset, f+dy); - } - } + /* fetch and store attribute */ + if (desc.type == NODE_ATTR_FLOAT) { + float dy; + float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, f + dy); + } + else { + stack_store_float3(stack, out_offset, make_float3(f + dy, f + dy, f + dy)); + } + } + else if (desc.type == NODE_ATTR_FLOAT2) { + float2 dy; + float2 f = primitive_attribute_float2(kg, sd, desc, NULL, &dy); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, f.x + dy.x); + } + else { + stack_store_float3(stack, out_offset, make_float3(f.x + dy.x, f.y + dy.y, 0.0f)); + } + } + else { + float3 dy; + float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, average(f + dy)); + } + else { + stack_store_float3(stack, out_offset, f + dy); + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index b5bb9df422b..fcf28e96e98 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -22,215 +22,196 @@ CCL_NAMESPACE_BEGIN * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf */ -ccl_device_noinline float3 svm_bevel( - KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - float radius, - int num_samples) +ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space PathState *state, + float radius, + int num_samples) { - /* Early out if no sampling needed. */ - if(radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) { - return sd->N; - } - - /* Can't raytrace from shaders like displacement, before BVH exists. */ - if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { - return sd->N; - } - - /* Don't bevel for blurry indirect rays. */ - if(state->min_ray_pdf < 8.0f) { - return sd->N; - } - - /* Setup for multi intersection. */ - LocalIntersection isect; - uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e); - - /* Sample normals from surrounding points on surface. */ - float3 sum_N = make_float3(0.0f, 0.0f, 0.0f); - - for(int sample = 0; sample < num_samples; sample++) { - float disk_u, disk_v; - path_branched_rng_2D(kg, state->rng_hash, state, sample, num_samples, - PRNG_BEVEL_U, &disk_u, &disk_v); - - /* Pick random axis in local frame and point on disk. */ - float3 disk_N, disk_T, disk_B; - float pick_pdf_N, pick_pdf_T, pick_pdf_B; - - disk_N = sd->Ng; - make_orthonormals(disk_N, &disk_T, &disk_B); - - float axisu = disk_u; - - if(axisu < 0.5f) { - pick_pdf_N = 0.5f; - pick_pdf_T = 0.25f; - pick_pdf_B = 0.25f; - disk_u *= 2.0f; - } - else if(axisu < 0.75f) { - float3 tmp = disk_N; - disk_N = disk_T; - disk_T = tmp; - pick_pdf_N = 0.25f; - pick_pdf_T = 0.5f; - pick_pdf_B = 0.25f; - disk_u = (disk_u - 0.5f)*4.0f; - } - else { - float3 tmp = disk_N; - disk_N = disk_B; - disk_B = tmp; - pick_pdf_N = 0.25f; - pick_pdf_T = 0.25f; - pick_pdf_B = 0.5f; - disk_u = (disk_u - 0.75f)*4.0f; - } - - /* Sample point on disk. */ - float phi = M_2PI_F * disk_u; - float disk_r = disk_v; - float disk_height; - - /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */ - bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height); - - float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B; - - /* Create ray. */ - Ray *ray = &isect.ray; - ray->P = sd->P + disk_N*disk_height + disk_P; - ray->D = -disk_N; - ray->t = 2.0f*disk_height; - ray->dP = sd->dP; - ray->dD = differential3_zero(); - ray->time = sd->time; - - /* Intersect with the same object. if multiple intersections are found it - * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */ - scene_intersect_local(kg, - *ray, - &isect, - sd->object, - &lcg_state, - LOCAL_MAX_HITS); - - int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS); - - for(int hit = 0; hit < num_eval_hits; hit++) { - /* Quickly retrieve P and Ng without setting up ShaderData. */ - float3 hit_P; - if(sd->type & PRIMITIVE_TRIANGLE) { - hit_P = triangle_refine_local(kg, - sd, - &isect.hits[hit], - ray); - } + /* Early out if no sampling needed. */ + if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) { + return sd->N; + } + + /* Can't raytrace from shaders like displacement, before BVH exists. */ + if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { + return sd->N; + } + + /* Don't bevel for blurry indirect rays. */ + if (state->min_ray_pdf < 8.0f) { + return sd->N; + } + + /* Setup for multi intersection. */ + LocalIntersection isect; + uint lcg_state = lcg_state_init_addrspace(state, 0x64c6a40e); + + /* Sample normals from surrounding points on surface. */ + float3 sum_N = make_float3(0.0f, 0.0f, 0.0f); + + for (int sample = 0; sample < num_samples; sample++) { + float disk_u, disk_v; + path_branched_rng_2D( + kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); + + /* Pick random axis in local frame and point on disk. */ + float3 disk_N, disk_T, disk_B; + float pick_pdf_N, pick_pdf_T, pick_pdf_B; + + disk_N = sd->Ng; + make_orthonormals(disk_N, &disk_T, &disk_B); + + float axisu = disk_u; + + if (axisu < 0.5f) { + pick_pdf_N = 0.5f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.25f; + disk_u *= 2.0f; + } + else if (axisu < 0.75f) { + float3 tmp = disk_N; + disk_N = disk_T; + disk_T = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.5f; + pick_pdf_B = 0.25f; + disk_u = (disk_u - 0.5f) * 4.0f; + } + else { + float3 tmp = disk_N; + disk_N = disk_B; + disk_B = tmp; + pick_pdf_N = 0.25f; + pick_pdf_T = 0.25f; + pick_pdf_B = 0.5f; + disk_u = (disk_u - 0.75f) * 4.0f; + } + + /* Sample point on disk. */ + float phi = M_2PI_F * disk_u; + float disk_r = disk_v; + float disk_height; + + /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */ + bssrdf_cubic_sample(radius, 0.0f, disk_r, &disk_r, &disk_height); + + float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B; + + /* Create ray. */ + Ray *ray = &isect.ray; + ray->P = sd->P + disk_N * disk_height + disk_P; + ray->D = -disk_N; + ray->t = 2.0f * disk_height; + ray->dP = sd->dP; + ray->dD = differential3_zero(); + ray->time = sd->time; + + /* Intersect with the same object. if multiple intersections are found it + * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */ + scene_intersect_local(kg, *ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS); + + int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS); + + for (int hit = 0; hit < num_eval_hits; hit++) { + /* Quickly retrieve P and Ng without setting up ShaderData. */ + float3 hit_P; + if (sd->type & PRIMITIVE_TRIANGLE) { + hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray); + } #ifdef __OBJECT_MOTION__ - else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) { - float3 verts[3]; - motion_triangle_vertices( - kg, - sd->object, - kernel_tex_fetch(__prim_index, isect.hits[hit].prim), - sd->time, - verts); - hit_P = motion_triangle_refine_local(kg, - sd, - &isect.hits[hit], - ray, - verts); - } -#endif /* __OBJECT_MOTION__ */ - - /* Get geometric normal. */ - float3 hit_Ng = isect.Ng[hit]; - int object = (isect.hits[hit].object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, isect.hits[hit].prim): isect.hits[hit].object; - int object_flag = kernel_tex_fetch(__object_flag, object); - if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { - hit_Ng = -hit_Ng; - } - - /* Compute smooth normal. */ - float3 N = hit_Ng; - int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim); - int shader = kernel_tex_fetch(__tri_shader, prim); - - if(shader & SHADER_SMOOTH_NORMAL) { - float u = isect.hits[hit].u; - float v = isect.hits[hit].v; - - if(sd->type & PRIMITIVE_TRIANGLE) { - N = triangle_smooth_normal(kg, N, prim, u, v); - } + else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { + float3 verts[3]; + motion_triangle_vertices( + kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts); + hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts); + } +#endif /* __OBJECT_MOTION__ */ + + /* Get geometric normal. */ + float3 hit_Ng = isect.Ng[hit]; + int object = (isect.hits[hit].object == OBJECT_NONE) ? + kernel_tex_fetch(__prim_object, isect.hits[hit].prim) : + isect.hits[hit].object; + int object_flag = kernel_tex_fetch(__object_flag, object); + if (object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) { + hit_Ng = -hit_Ng; + } + + /* Compute smooth normal. */ + float3 N = hit_Ng; + int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim); + int shader = kernel_tex_fetch(__tri_shader, prim); + + if (shader & SHADER_SMOOTH_NORMAL) { + float u = isect.hits[hit].u; + float v = isect.hits[hit].v; + + if (sd->type & PRIMITIVE_TRIANGLE) { + N = triangle_smooth_normal(kg, N, prim, u, v); + } #ifdef __OBJECT_MOTION__ - else if(sd->type & PRIMITIVE_MOTION_TRIANGLE) { - N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time); - } -#endif /* __OBJECT_MOTION__ */ - } - - /* Transform normals to world space. */ - if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_normal_transform(kg, sd, &N); - object_normal_transform(kg, sd, &hit_Ng); - } - - /* Probability densities for local frame axes. */ - float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng)); - float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng)); - float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng)); - - /* Multiple importance sample between 3 axes, power heuristic - * found to be slightly better than balance heuristic. pdf_N - * in the MIS weight and denominator cancelled out. */ - float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B)); - if(isect.num_hits > LOCAL_MAX_HITS) { - w *= isect.num_hits/(float)LOCAL_MAX_HITS; - } - - /* Real distance to sampled point. */ - float r = len(hit_P - sd->P); - - /* Compute weight. */ - float pdf = bssrdf_cubic_pdf(radius, 0.0f, r); - float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r); - - w *= pdf / disk_pdf; - - /* Sum normal and weight. */ - sum_N += w * N; - } - } - - /* Normalize. */ - float3 N = safe_normalize(sum_N); - return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N; + else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { + N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time); + } +#endif /* __OBJECT_MOTION__ */ + } + + /* Transform normals to world space. */ + if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_normal_transform(kg, sd, &N); + object_normal_transform(kg, sd, &hit_Ng); + } + + /* Probability densities for local frame axes. */ + float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng)); + float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng)); + float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng)); + + /* Multiple importance sample between 3 axes, power heuristic + * found to be slightly better than balance heuristic. pdf_N + * in the MIS weight and denominator cancelled out. */ + float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B)); + if (isect.num_hits > LOCAL_MAX_HITS) { + w *= isect.num_hits / (float)LOCAL_MAX_HITS; + } + + /* Real distance to sampled point. */ + float r = len(hit_P - sd->P); + + /* Compute weight. */ + float pdf = bssrdf_cubic_pdf(radius, 0.0f, r); + float disk_pdf = bssrdf_cubic_pdf(radius, 0.0f, disk_r); + + w *= pdf / disk_pdf; + + /* Sum normal and weight. */ + sum_N += w * N; + } + } + + /* Normalize. */ + float3 N = safe_normalize(sum_N); + return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N; } ccl_device void svm_node_bevel( - KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space PathState *state, - float *stack, - uint4 node) + KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node) { - uint num_samples, radius_offset, normal_offset, out_offset; - decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset); + uint num_samples, radius_offset, normal_offset, out_offset; + decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset); - float radius = stack_load_float(stack, radius_offset); - float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples); + float radius = stack_load_float(stack, radius_offset); + float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples); - if(stack_valid(normal_offset)) { - /* Preserve input normal. */ - float3 ref_N = stack_load_float3(stack, normal_offset); - bevel_N = normalize(ref_N + (bevel_N - sd->N)); - } + if (stack_valid(normal_offset)) { + /* Preserve input normal. */ + float3 ref_N = stack_load_float3(stack, normal_offset); + bevel_N = normalize(ref_N + (bevel_N - sd->N)); + } - stack_store_float3(stack, out_offset, bevel_N); + stack_store_float3(stack, out_offset, bevel_N); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h index 51590b18505..adfc50d961e 100644 --- a/intern/cycles/kernel/svm/svm_blackbody.h +++ b/intern/cycles/kernel/svm/svm_blackbody.h @@ -34,14 +34,15 @@ CCL_NAMESPACE_BEGIN /* Blackbody Node */ -ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset) +ccl_device void svm_node_blackbody( + KernelGlobals *kg, ShaderData *sd, float *stack, uint temperature_offset, uint col_offset) { - /* Input */ - float temperature = stack_load_float(stack, temperature_offset); + /* Input */ + float temperature = stack_load_float(stack, temperature_offset); - float3 color_rgb = svm_math_blackbody_color(temperature); + float3 color_rgb = svm_math_blackbody_color(temperature); - stack_store_float3(stack, col_offset, color_rgb); + stack_store_float3(stack, col_offset, color_rgb); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h index 744d9ff16c5..b5cbfcc72df 100644 --- a/intern/cycles/kernel/svm/svm_brick.h +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -20,101 +20,119 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline float brick_noise(uint n) /* fast integer noise */ { - uint nn; - n = (n + 1013) & 0x7fffffff; - n = (n >> 13) ^ n; - nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff; - return 0.5f * ((float)nn / 1073741824.0f); + uint nn; + n = (n + 1013) & 0x7fffffff; + n = (n >> 13) ^ n; + nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff; + return 0.5f * ((float)nn / 1073741824.0f); } -ccl_device_noinline float2 svm_brick(float3 p, float mortar_size, float mortar_smooth, float bias, - float brick_width, float row_height, float offset_amount, int offset_frequency, - float squash_amount, int squash_frequency) +ccl_device_noinline float2 svm_brick(float3 p, + float mortar_size, + float mortar_smooth, + float bias, + float brick_width, + float row_height, + float offset_amount, + int offset_frequency, + float squash_amount, + int squash_frequency) { - int bricknum, rownum; - float offset = 0.0f; - float x, y; - - rownum = floor_to_int(p.y / row_height); - - if(offset_frequency && squash_frequency) { - brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount; /* squash */ - offset = (rownum % offset_frequency) ? 0.0f : (brick_width*offset_amount); /* offset */ - } - - bricknum = floor_to_int((p.x+offset) / brick_width); - - x = (p.x+offset) - brick_width*bricknum; - y = p.y - row_height*rownum; - - float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias)); - float min_dist = min(min(x, y), min(brick_width - x, row_height - y)); - - float mortar; - if(min_dist >= mortar_size) { - mortar = 0.0f; - } - else if(mortar_smooth == 0.0f) { - mortar = 1.0f; - } - else { - min_dist = 1.0f - min_dist/mortar_size; - mortar = (min_dist < mortar_smooth)? smoothstepf(min_dist / mortar_smooth) : 1.0f; - } - - return make_float2(tint, mortar); + int bricknum, rownum; + float offset = 0.0f; + float x, y; + + rownum = floor_to_int(p.y / row_height); + + if (offset_frequency && squash_frequency) { + brick_width *= (rownum % squash_frequency) ? 1.0f : squash_amount; /* squash */ + offset = (rownum % offset_frequency) ? 0.0f : (brick_width * offset_amount); /* offset */ + } + + bricknum = floor_to_int((p.x + offset) / brick_width); + + x = (p.x + offset) - brick_width * bricknum; + y = p.y - row_height * rownum; + + float tint = saturate((brick_noise((rownum << 16) + (bricknum & 0xFFFF)) + bias)); + float min_dist = min(min(x, y), min(brick_width - x, row_height - y)); + + float mortar; + if (min_dist >= mortar_size) { + mortar = 0.0f; + } + else if (mortar_smooth == 0.0f) { + mortar = 1.0f; + } + else { + min_dist = 1.0f - min_dist / mortar_size; + mortar = (min_dist < mortar_smooth) ? smoothstepf(min_dist / mortar_smooth) : 1.0f; + } + + return make_float2(tint, mortar); } -ccl_device void svm_node_tex_brick(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_tex_brick( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint4 node2 = read_node(kg, offset); - uint4 node3 = read_node(kg, offset); - uint4 node4 = read_node(kg, offset); - - /* Input and Output Sockets */ - uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset; - uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset; - uint color_offset, fac_offset, mortar_smooth_offset; - - /* RNA properties */ - uint offset_frequency, squash_frequency; - - decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset); - decode_node_uchar4(node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset); - decode_node_uchar4(node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset); - - decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL); - - float3 co = stack_load_float3(stack, co_offset); - - float3 color1 = stack_load_float3(stack, color1_offset); - float3 color2 = stack_load_float3(stack, color2_offset); - float3 mortar = stack_load_float3(stack, mortar_offset); - - float scale = stack_load_float_default(stack, scale_offset, node2.y); - float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z); - float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x); - float bias = stack_load_float_default(stack, bias_offset, node2.w); - float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x); - float row_height = stack_load_float_default(stack, row_height_offset, node3.y); - float offset_amount = __int_as_float(node3.z); - float squash_amount = __int_as_float(node3.w); - - float2 f2 = svm_brick(co*scale, mortar_size, mortar_smooth, bias, brick_width, row_height, - offset_amount, offset_frequency, squash_amount, squash_frequency); - - float tint = f2.x; - float f = f2.y; - - if(f != 1.0f) { - float facm = 1.0f - tint; - color1 = facm * color1 + tint * color2; - } - - if(stack_valid(color_offset)) - stack_store_float3(stack, color_offset, color1*(1.0f-f) + mortar*f); - if(stack_valid(fac_offset)) - stack_store_float(stack, fac_offset, f); + uint4 node2 = read_node(kg, offset); + uint4 node3 = read_node(kg, offset); + uint4 node4 = read_node(kg, offset); + + /* Input and Output Sockets */ + uint co_offset, color1_offset, color2_offset, mortar_offset, scale_offset; + uint mortar_size_offset, bias_offset, brick_width_offset, row_height_offset; + uint color_offset, fac_offset, mortar_smooth_offset; + + /* RNA properties */ + uint offset_frequency, squash_frequency; + + decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset); + decode_node_uchar4( + node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset); + decode_node_uchar4( + node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset); + + decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL); + + float3 co = stack_load_float3(stack, co_offset); + + float3 color1 = stack_load_float3(stack, color1_offset); + float3 color2 = stack_load_float3(stack, color2_offset); + float3 mortar = stack_load_float3(stack, mortar_offset); + + float scale = stack_load_float_default(stack, scale_offset, node2.y); + float mortar_size = stack_load_float_default(stack, mortar_size_offset, node2.z); + float mortar_smooth = stack_load_float_default(stack, mortar_smooth_offset, node4.x); + float bias = stack_load_float_default(stack, bias_offset, node2.w); + float brick_width = stack_load_float_default(stack, brick_width_offset, node3.x); + float row_height = stack_load_float_default(stack, row_height_offset, node3.y); + float offset_amount = __int_as_float(node3.z); + float squash_amount = __int_as_float(node3.w); + + float2 f2 = svm_brick(co * scale, + mortar_size, + mortar_smooth, + bias, + brick_width, + row_height, + offset_amount, + offset_frequency, + squash_amount, + squash_frequency); + + float tint = f2.x; + float f = f2.y; + + if (f != 1.0f) { + float facm = 1.0f - tint; + color1 = facm * color1 + tint * color2; + } + + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, color1 * (1.0f - f) + mortar * f); + if (stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, f); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h index d71b0ee0b61..dcd75a2fe8f 100644 --- a/intern/cycles/kernel/svm/svm_brightness.h +++ b/intern/cycles/kernel/svm/svm_brightness.h @@ -16,19 +16,20 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_brightness(ShaderData *sd, float *stack, uint in_color, uint out_color, uint node) +ccl_device void svm_node_brightness( + ShaderData *sd, float *stack, uint in_color, uint out_color, uint node) { - uint bright_offset, contrast_offset; - float3 color = stack_load_float3(stack, in_color); + uint bright_offset, contrast_offset; + float3 color = stack_load_float3(stack, in_color); - decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL); - float brightness = stack_load_float(stack, bright_offset); - float contrast = stack_load_float(stack, contrast_offset); + decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL); + float brightness = stack_load_float(stack, bright_offset); + float contrast = stack_load_float(stack, contrast_offset); - color = svm_brightness_contrast(color, brightness, contrast); + color = svm_brightness_contrast(color, brightness, contrast); - if(stack_valid(out_color)) - stack_store_float3(stack, out_color, color); + if (stack_valid(out_color)) + stack_store_float3(stack, out_color, color); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h index 35aac174409..c9d430a2bba 100644 --- a/intern/cycles/kernel/svm/svm_bump.h +++ b/intern/cycles/kernel/svm/svm_bump.h @@ -18,36 +18,42 @@ CCL_NAMESPACE_BEGIN /* Bump Eval Nodes */ -ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset) +ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint offset) { - /* save state */ - stack_store_float3(stack, offset+0, sd->P); - stack_store_float3(stack, offset+3, sd->dP.dx); - stack_store_float3(stack, offset+6, sd->dP.dy); - - /* set state as if undisplaced */ - const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED); - - if(desc.offset != ATTR_STD_NOT_FOUND) { - float3 P, dPdx, dPdy; - P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy); - - object_position_transform(kg, sd, &P); - object_dir_transform(kg, sd, &dPdx); - object_dir_transform(kg, sd, &dPdy); - - sd->P = P; - sd->dP.dx = dPdx; - sd->dP.dy = dPdy; - } + /* save state */ + stack_store_float3(stack, offset + 0, sd->P); + stack_store_float3(stack, offset + 3, sd->dP.dx); + stack_store_float3(stack, offset + 6, sd->dP.dy); + + /* set state as if undisplaced */ + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED); + + if (desc.offset != ATTR_STD_NOT_FOUND) { + float3 P, dPdx, dPdy; + P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy); + + object_position_transform(kg, sd, &P); + object_dir_transform(kg, sd, &dPdx); + object_dir_transform(kg, sd, &dPdy); + + sd->P = P; + sd->dP.dx = dPdx; + sd->dP.dy = dPdy; + } } -ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg, ShaderData *sd, float *stack, uint offset) +ccl_device void svm_node_leave_bump_eval(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint offset) { - /* restore state */ - sd->P = stack_load_float3(stack, offset+0); - sd->dP.dx = stack_load_float3(stack, offset+3); - sd->dP.dy = stack_load_float3(stack, offset+6); + /* restore state */ + sd->P = stack_load_float3(stack, offset + 0); + sd->dP.dx = stack_load_float3(stack, offset + 3); + sd->dP.dy = stack_load_float3(stack, offset + 6); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_camera.h b/intern/cycles/kernel/svm/svm_camera.h index cf90229b53b..21a17acf5f1 100644 --- a/intern/cycles/kernel/svm/svm_camera.h +++ b/intern/cycles/kernel/svm/svm_camera.h @@ -16,25 +16,30 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_camera(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_vector, uint out_zdepth, uint out_distance) +ccl_device void svm_node_camera(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint out_vector, + uint out_zdepth, + uint out_distance) { - float distance; - float zdepth; - float3 vector; + float distance; + float zdepth; + float3 vector; - Transform tfm = kernel_data.cam.worldtocamera; - vector = transform_point(&tfm, sd->P); - zdepth = vector.z; - distance = len(vector); + Transform tfm = kernel_data.cam.worldtocamera; + vector = transform_point(&tfm, sd->P); + zdepth = vector.z; + distance = len(vector); - if(stack_valid(out_vector)) - stack_store_float3(stack, out_vector, normalize(vector)); + if (stack_valid(out_vector)) + stack_store_float3(stack, out_vector, normalize(vector)); - if(stack_valid(out_zdepth)) - stack_store_float(stack, out_zdepth, zdepth); + if (stack_valid(out_zdepth)) + stack_store_float(stack, out_zdepth, zdepth); - if(stack_valid(out_distance)) - stack_store_float(stack, out_distance, distance); + if (stack_valid(out_distance)) + stack_store_float(stack, out_distance, distance); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h index 45e6c181e9e..63b4d1e149b 100644 --- a/intern/cycles/kernel/svm/svm_checker.h +++ b/intern/cycles/kernel/svm/svm_checker.h @@ -20,37 +20,37 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline float svm_checker(float3 p) { - /* avoid precision issues on unit coordinates */ - p.x = (p.x + 0.000001f)*0.999999f; - p.y = (p.y + 0.000001f)*0.999999f; - p.z = (p.z + 0.000001f)*0.999999f; + /* avoid precision issues on unit coordinates */ + p.x = (p.x + 0.000001f) * 0.999999f; + p.y = (p.y + 0.000001f) * 0.999999f; + p.z = (p.z + 0.000001f) * 0.999999f; - int xi = abs(float_to_int(floorf(p.x))); - int yi = abs(float_to_int(floorf(p.y))); - int zi = abs(float_to_int(floorf(p.z))); + int xi = abs(float_to_int(floorf(p.x))); + int yi = abs(float_to_int(floorf(p.y))); + int zi = abs(float_to_int(floorf(p.z))); - return ((xi % 2 == yi % 2) == (zi % 2))? 1.0f: 0.0f; + return ((xi % 2 == yi % 2) == (zi % 2)) ? 1.0f : 0.0f; } ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - uint co_offset, color1_offset, color2_offset, scale_offset; - uint color_offset, fac_offset; + uint co_offset, color1_offset, color2_offset, scale_offset; + uint color_offset, fac_offset; - decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset); - decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); + decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset); + decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); - float3 co = stack_load_float3(stack, co_offset); - float3 color1 = stack_load_float3(stack, color1_offset); - float3 color2 = stack_load_float3(stack, color2_offset); - float scale = stack_load_float_default(stack, scale_offset, node.w); + float3 co = stack_load_float3(stack, co_offset); + float3 color1 = stack_load_float3(stack, color1_offset); + float3 color2 = stack_load_float3(stack, color2_offset); + float scale = stack_load_float_default(stack, scale_offset, node.w); - float f = svm_checker(co*scale); + float f = svm_checker(co * scale); - if(stack_valid(color_offset)) - stack_store_float3(stack, color_offset, (f == 1.0f)? color1: color2); - if(stack_valid(fac_offset)) - stack_store_float(stack, fac_offset, f); + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, (f == 1.0f) ? color1 : color2); + if (stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, f); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index a7e87715ed4..270fe4c8615 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -20,1140 +20,1237 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float3 sigma_from_concentration(float eumelanin, float pheomelanin) { - return eumelanin*make_float3(0.506f, 0.841f, 1.653f) + pheomelanin*make_float3(0.343f, 0.733f, 1.924f); + return eumelanin * make_float3(0.506f, 0.841f, 1.653f) + + pheomelanin * make_float3(0.343f, 0.733f, 1.924f); } ccl_device_inline float3 sigma_from_reflectance(float3 color, float azimuthal_roughness) { - float x = azimuthal_roughness; - float roughness_fac = (((((0.245f*x) + 5.574f)*x - 10.73f)*x + 2.532f)*x - 0.215f)*x + 5.969f; - float3 sigma = log3(color) / roughness_fac; - return sigma * sigma; + float x = azimuthal_roughness; + float roughness_fac = (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + + 5.969f; + float3 sigma = log3(color) / roughness_fac; + return sigma * sigma; } /* Closure Nodes */ -ccl_device void svm_node_glass_setup(ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract) +ccl_device void svm_node_glass_setup( + ShaderData *sd, MicrofacetBsdf *bsdf, int type, float eta, float roughness, bool refract) { - if(type == CLOSURE_BSDF_SHARP_GLASS_ID) { - if(refract) { - bsdf->alpha_y = 0.0f; - bsdf->alpha_x = 0.0f; - bsdf->ior = eta; - sd->flag |= bsdf_refraction_setup(bsdf); - } - else { - bsdf->alpha_y = 0.0f; - bsdf->alpha_x = 0.0f; - bsdf->ior = 0.0f; - sd->flag |= bsdf_reflection_setup(bsdf); - } - } - else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) { - bsdf->alpha_x = roughness; - bsdf->alpha_y = roughness; - bsdf->ior = eta; - - if(refract) - sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); - else - sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); - } - else { - bsdf->alpha_x = roughness; - bsdf->alpha_y = roughness; - bsdf->ior = eta; - - if(refract) - sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); - else - sd->flag |= bsdf_microfacet_ggx_setup(bsdf); - } + if (type == CLOSURE_BSDF_SHARP_GLASS_ID) { + if (refract) { + bsdf->alpha_y = 0.0f; + bsdf->alpha_x = 0.0f; + bsdf->ior = eta; + sd->flag |= bsdf_refraction_setup(bsdf); + } + else { + bsdf->alpha_y = 0.0f; + bsdf->alpha_x = 0.0f; + bsdf->ior = 0.0f; + sd->flag |= bsdf_reflection_setup(bsdf); + } + } + else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID) { + bsdf->alpha_x = roughness; + bsdf->alpha_y = roughness; + bsdf->ior = eta; + + if (refract) + sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); + else + sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); + } + else { + bsdf->alpha_x = roughness; + bsdf->alpha_y = roughness; + bsdf->ior = eta; + + if (refract) + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); + else + sd->flag |= bsdf_microfacet_ggx_setup(bsdf); + } } -ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset) +ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node, + ShaderType shader_type, + int path_flag, + int *offset) { - uint type, param1_offset, param2_offset; + uint type, param1_offset, param2_offset; - uint mix_weight_offset; - decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset); - float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f); + uint mix_weight_offset; + decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset); + float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) : + 1.0f); - /* note we read this extra node before weight check, so offset is added */ - uint4 data_node = read_node(kg, offset); + /* note we read this extra node before weight check, so offset is added */ + uint4 data_node = read_node(kg, offset); - /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */ - if(mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) { - if(type == CLOSURE_BSDF_PRINCIPLED_ID) { - /* Read all principled BSDF extra data to get the right offset. */ - read_node(kg, offset); - read_node(kg, offset); - read_node(kg, offset); - read_node(kg, offset); - } + /* Only compute BSDF for surfaces, transparent variable is shared with volume extinction. */ + if (mix_weight == 0.0f || shader_type != SHADER_TYPE_SURFACE) { + if (type == CLOSURE_BSDF_PRINCIPLED_ID) { + /* Read all principled BSDF extra data to get the right offset. */ + read_node(kg, offset); + read_node(kg, offset); + read_node(kg, offset); + read_node(kg, offset); + } - return; - } + return; + } - float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N; + float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N; - float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z); - float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w); + float param1 = (stack_valid(param1_offset)) ? stack_load_float(stack, param1_offset) : + __uint_as_float(node.z); + float param2 = (stack_valid(param2_offset)) ? stack_load_float(stack, param2_offset) : + __uint_as_float(node.w); - switch(type) { + switch (type) { #ifdef __PRINCIPLED__ - case CLOSURE_BSDF_PRINCIPLED_ID: { - uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset, - sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, eta_offset, transmission_offset, - anisotropic_rotation_offset, transmission_roughness_offset; - uint4 data_node2 = read_node(kg, offset); - - float3 T = stack_load_float3(stack, data_node.y); - decode_node_uchar4(data_node.z, &specular_offset, &roughness_offset, &specular_tint_offset, &anisotropic_offset); - decode_node_uchar4(data_node.w, &sheen_offset, &sheen_tint_offset, &clearcoat_offset, &clearcoat_roughness_offset); - decode_node_uchar4(data_node2.x, &eta_offset, &transmission_offset, &anisotropic_rotation_offset, &transmission_roughness_offset); - - // get Disney principled parameters - float metallic = param1; - float subsurface = param2; - float specular = stack_load_float(stack, specular_offset); - float roughness = stack_load_float(stack, roughness_offset); - float specular_tint = stack_load_float(stack, specular_tint_offset); - float anisotropic = stack_load_float(stack, anisotropic_offset); - float sheen = stack_load_float(stack, sheen_offset); - float sheen_tint = stack_load_float(stack, sheen_tint_offset); - float clearcoat = stack_load_float(stack, clearcoat_offset); - float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset); - float transmission = stack_load_float(stack, transmission_offset); - float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset); - float transmission_roughness = stack_load_float(stack, transmission_roughness_offset); - float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f); - - ClosureType distribution = (ClosureType) data_node2.y; - ClosureType subsurface_method = (ClosureType) data_node2.z; - - /* rotate tangent */ - if(anisotropic_rotation != 0.0f) - T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F); - - /* calculate ior */ - float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta; - - // calculate fresnel for refraction - float cosNO = dot(N, sd->I); - float fresnel = fresnel_dielectric_cos(cosNO, ior); - - // calculate weights of the diffuse and specular part - float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission)); - - float final_transmission = saturate(transmission) * (1.0f - saturate(metallic)); - float specular_weight = (1.0f - final_transmission); - - // get the base color - uint4 data_base_color = read_node(kg, offset); - float3 base_color = stack_valid(data_base_color.x) ? stack_load_float3(stack, data_base_color.x) : - make_float3(__uint_as_float(data_base_color.y), __uint_as_float(data_base_color.z), __uint_as_float(data_base_color.w)); - - // get the additional clearcoat normal and subsurface scattering radius - uint4 data_cn_ssr = read_node(kg, offset); - float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ? stack_load_float3(stack, data_cn_ssr.x) : sd->N; - float3 subsurface_radius = stack_valid(data_cn_ssr.y) ? stack_load_float3(stack, data_cn_ssr.y) : make_float3(1.0f, 1.0f, 1.0f); - - // get the subsurface color - uint4 data_subsurface_color = read_node(kg, offset); - float3 subsurface_color = stack_valid(data_subsurface_color.x) ? stack_load_float3(stack, data_subsurface_color.x) : - make_float3(__uint_as_float(data_subsurface_color.y), __uint_as_float(data_subsurface_color.z), __uint_as_float(data_subsurface_color.w)); - - float3 weight = sd->svm_closure_weight * mix_weight; - -#ifdef __SUBSURFACE__ - float3 mixed_ss_base_color = subsurface_color * subsurface + base_color * (1.0f - subsurface); - float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight; - - /* disable in case of diffuse ancestor, can't see it well then and - * adds considerably noise due to probabilities of continuing path - * getting lower and lower */ - if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) { - subsurface = 0.0f; - - /* need to set the base color in this case such that the - * rays get the correctly mixed color after transmitting - * the object */ - base_color = mixed_ss_base_color; - } - - /* diffuse */ - if(fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) { - if(subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { - float3 diff_weight = weight * base_color * diffuse_weight; - - PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight); - - if(bsdf) { - bsdf->N = N; - bsdf->roughness = roughness; - - /* setup bsdf */ - sd->flag |= bsdf_principled_diffuse_setup(bsdf); - } - } - else if(subsurface > CLOSURE_WEIGHT_CUTOFF) { - Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight); - - if(bssrdf) { - bssrdf->radius = subsurface_radius * subsurface; - bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID)? subsurface_color: mixed_ss_base_color; - bssrdf->texture_blur = 0.0f; - bssrdf->sharpness = 0.0f; - bssrdf->N = N; - bssrdf->roughness = roughness; - - /* setup bsdf */ - sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method); - } - } - } -#else - /* diffuse */ - if(diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { - float3 diff_weight = weight * base_color * diffuse_weight; - - PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), diff_weight); - - if(bsdf) { - bsdf->N = N; - bsdf->roughness = roughness; - - /* setup bsdf */ - sd->flag |= bsdf_principled_diffuse_setup(bsdf); - } - } -#endif - - /* sheen */ - if(diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) { - float m_cdlum = linear_rgb_to_gray(kg, base_color); - float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat - - /* color of the sheen component */ - float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) + m_ctint * sheen_tint; - - float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight; - - PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf*)bsdf_alloc(sd, sizeof(PrincipledSheenBsdf), sheen_weight); - - if(bsdf) { - bsdf->N = N; - - /* setup bsdf */ - sd->flag |= bsdf_principled_sheen_setup(bsdf); - } - } - - /* specular reflection */ + case CLOSURE_BSDF_PRINCIPLED_ID: { + uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, + sheen_offset, sheen_tint_offset, clearcoat_offset, clearcoat_roughness_offset, + eta_offset, transmission_offset, anisotropic_rotation_offset, + transmission_roughness_offset; + uint4 data_node2 = read_node(kg, offset); + + float3 T = stack_load_float3(stack, data_node.y); + decode_node_uchar4(data_node.z, + &specular_offset, + &roughness_offset, + &specular_tint_offset, + &anisotropic_offset); + decode_node_uchar4(data_node.w, + &sheen_offset, + &sheen_tint_offset, + &clearcoat_offset, + &clearcoat_roughness_offset); + decode_node_uchar4(data_node2.x, + &eta_offset, + &transmission_offset, + &anisotropic_rotation_offset, + &transmission_roughness_offset); + + // get Disney principled parameters + float metallic = param1; + float subsurface = param2; + float specular = stack_load_float(stack, specular_offset); + float roughness = stack_load_float(stack, roughness_offset); + float specular_tint = stack_load_float(stack, specular_tint_offset); + float anisotropic = stack_load_float(stack, anisotropic_offset); + float sheen = stack_load_float(stack, sheen_offset); + float sheen_tint = stack_load_float(stack, sheen_tint_offset); + float clearcoat = stack_load_float(stack, clearcoat_offset); + float clearcoat_roughness = stack_load_float(stack, clearcoat_roughness_offset); + float transmission = stack_load_float(stack, transmission_offset); + float anisotropic_rotation = stack_load_float(stack, anisotropic_rotation_offset); + float transmission_roughness = stack_load_float(stack, transmission_roughness_offset); + float eta = fmaxf(stack_load_float(stack, eta_offset), 1e-5f); + + ClosureType distribution = (ClosureType)data_node2.y; + ClosureType subsurface_method = (ClosureType)data_node2.z; + + /* rotate tangent */ + if (anisotropic_rotation != 0.0f) + T = rotate_around_axis(T, N, anisotropic_rotation * M_2PI_F); + + /* calculate ior */ + float ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta; + + // calculate fresnel for refraction + float cosNO = dot(N, sd->I); + float fresnel = fresnel_dielectric_cos(cosNO, ior); + + // calculate weights of the diffuse and specular part + float diffuse_weight = (1.0f - saturate(metallic)) * (1.0f - saturate(transmission)); + + float final_transmission = saturate(transmission) * (1.0f - saturate(metallic)); + float specular_weight = (1.0f - final_transmission); + + // get the base color + uint4 data_base_color = read_node(kg, offset); + float3 base_color = stack_valid(data_base_color.x) ? + stack_load_float3(stack, data_base_color.x) : + make_float3(__uint_as_float(data_base_color.y), + __uint_as_float(data_base_color.z), + __uint_as_float(data_base_color.w)); + + // get the additional clearcoat normal and subsurface scattering radius + uint4 data_cn_ssr = read_node(kg, offset); + float3 clearcoat_normal = stack_valid(data_cn_ssr.x) ? + stack_load_float3(stack, data_cn_ssr.x) : + sd->N; + float3 subsurface_radius = stack_valid(data_cn_ssr.y) ? + stack_load_float3(stack, data_cn_ssr.y) : + make_float3(1.0f, 1.0f, 1.0f); + + // get the subsurface color + uint4 data_subsurface_color = read_node(kg, offset); + float3 subsurface_color = stack_valid(data_subsurface_color.x) ? + stack_load_float3(stack, data_subsurface_color.x) : + make_float3(__uint_as_float(data_subsurface_color.y), + __uint_as_float(data_subsurface_color.z), + __uint_as_float(data_subsurface_color.w)); + + float3 weight = sd->svm_closure_weight * mix_weight; + +# ifdef __SUBSURFACE__ + float3 mixed_ss_base_color = subsurface_color * subsurface + + base_color * (1.0f - subsurface); + float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight; + + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) { + subsurface = 0.0f; + + /* need to set the base color in this case such that the + * rays get the correctly mixed color after transmitting + * the object */ + base_color = mixed_ss_base_color; + } + + /* diffuse */ + if (fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) { + if (subsurface <= CLOSURE_WEIGHT_CUTOFF && diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { + float3 diff_weight = weight * base_color * diffuse_weight; + + PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc( + sd, sizeof(PrincipledDiffuseBsdf), diff_weight); + + if (bsdf) { + bsdf->N = N; + bsdf->roughness = roughness; + + /* setup bsdf */ + sd->flag |= bsdf_principled_diffuse_setup(bsdf); + } + } + else if (subsurface > CLOSURE_WEIGHT_CUTOFF) { + Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight); + + if (bssrdf) { + bssrdf->radius = subsurface_radius * subsurface; + bssrdf->albedo = (subsurface_method == CLOSURE_BSSRDF_PRINCIPLED_ID) ? + subsurface_color : + mixed_ss_base_color; + bssrdf->texture_blur = 0.0f; + bssrdf->sharpness = 0.0f; + bssrdf->N = N; + bssrdf->roughness = roughness; + + /* setup bsdf */ + sd->flag |= bssrdf_setup(sd, bssrdf, subsurface_method); + } + } + } +# else + /* diffuse */ + if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF) { + float3 diff_weight = weight * base_color * diffuse_weight; + + PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf *)bsdf_alloc( + sd, sizeof(PrincipledDiffuseBsdf), diff_weight); + + if (bsdf) { + bsdf->N = N; + bsdf->roughness = roughness; + + /* setup bsdf */ + sd->flag |= bsdf_principled_diffuse_setup(bsdf); + } + } +# endif + + /* sheen */ + if (diffuse_weight > CLOSURE_WEIGHT_CUTOFF && sheen > CLOSURE_WEIGHT_CUTOFF) { + float m_cdlum = linear_rgb_to_gray(kg, base_color); + float3 m_ctint = m_cdlum > 0.0f ? + base_color / m_cdlum : + make_float3(1.0f, 1.0f, 1.0f); // normalize lum. to isolate hue+sat + + /* color of the sheen component */ + float3 sheen_color = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - sheen_tint) + + m_ctint * sheen_tint; + + float3 sheen_weight = weight * sheen * sheen_color * diffuse_weight; + + PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc( + sd, sizeof(PrincipledSheenBsdf), sheen_weight); + + if (bsdf) { + bsdf->N = N; + + /* setup bsdf */ + sd->flag |= bsdf_principled_sheen_setup(bsdf); + } + } + + /* specular reflection */ +# ifdef __CAUSTICS_TRICKS__ + if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) { +# endif + if (specular_weight > CLOSURE_WEIGHT_CUTOFF && + (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) { + float3 spec_weight = weight * specular_weight; + + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), spec_weight); + MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)) : + NULL; + + if (bsdf && extra) { + bsdf->N = N; + bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f; + bsdf->T = T; + bsdf->extra = extra; + + float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f); + float r2 = roughness * roughness; + + bsdf->alpha_x = r2 / aspect; + bsdf->alpha_y = r2 * aspect; + + float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y + + 0.1f * base_color.z; // luminance approx. + float3 m_ctint = m_cdlum > 0.0f ? + base_color / m_cdlum : + make_float3( + 0.0f, 0.0f, 0.0f); // normalize lum. to isolate hue+sat + float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) + + m_ctint * specular_tint; + + bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) + + base_color * metallic; + bsdf->extra->color = base_color; + bsdf->extra->clearcoat = 0.0f; + + /* setup bsdf */ + if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || + roughness <= 0.075f) /* use single-scatter GGX */ + sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); + else /* use multi-scatter GGX */ + sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); + } + } +# ifdef __CAUSTICS_TRICKS__ + } +# endif + + /* BSDF */ +# ifdef __CAUSTICS_TRICKS__ + if (kernel_data.integrator.caustics_reflective || + kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) { +# endif + if (final_transmission > CLOSURE_WEIGHT_CUTOFF) { + float3 glass_weight = weight * final_transmission; + float3 cspec0 = base_color * specular_tint + + make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint); + + if (roughness <= 5e-2f || + distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */ + float refl_roughness = roughness; + + /* reflection */ +# ifdef __CAUSTICS_TRICKS__ + if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) +# endif + { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), glass_weight * fresnel); + MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)) : + NULL; + + if (bsdf && extra) { + bsdf->N = N; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra = extra; + + bsdf->alpha_x = refl_roughness * refl_roughness; + bsdf->alpha_y = refl_roughness * refl_roughness; + bsdf->ior = ior; + + bsdf->extra->color = base_color; + bsdf->extra->cspec0 = cspec0; + bsdf->extra->clearcoat = 0.0f; + + /* setup bsdf */ + sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); + } + } + + /* refraction */ +# ifdef __CAUSTICS_TRICKS__ + if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) +# endif + { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), base_color * glass_weight * (1.0f - fresnel)); + if (bsdf) { + bsdf->N = N; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra = NULL; + + if (distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) + transmission_roughness = 1.0f - (1.0f - refl_roughness) * + (1.0f - transmission_roughness); + else + transmission_roughness = refl_roughness; + + bsdf->alpha_x = transmission_roughness * transmission_roughness; + bsdf->alpha_y = transmission_roughness * transmission_roughness; + bsdf->ior = ior; + + /* setup bsdf */ + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); + } + } + } + else { /* use multi-scatter GGX */ + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), glass_weight); + MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)) : + NULL; + + if (bsdf && extra) { + bsdf->N = N; + bsdf->extra = extra; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + + bsdf->alpha_x = roughness * roughness; + bsdf->alpha_y = roughness * roughness; + bsdf->ior = ior; + + bsdf->extra->color = base_color; + bsdf->extra->cspec0 = cspec0; + bsdf->extra->clearcoat = 0.0f; + + /* setup bsdf */ + sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); + } + } + } +# ifdef __CAUSTICS_TRICKS__ + } +# endif + + /* clearcoat */ +# ifdef __CAUSTICS_TRICKS__ + if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) { +# endif + if (clearcoat > CLOSURE_WEIGHT_CUTOFF) { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); + MicrofacetExtra *extra = (bsdf != NULL) ? (MicrofacetExtra *)closure_alloc_extra( + sd, sizeof(MicrofacetExtra)) : + NULL; + + if (bsdf && extra) { + bsdf->N = clearcoat_normal; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->ior = 1.5f; + bsdf->extra = extra; + + bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness; + + bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); + bsdf->extra->clearcoat = clearcoat; + + /* setup bsdf */ + sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); + } + } +# ifdef __CAUSTICS_TRICKS__ + } +# endif + + break; + } +#endif /* __PRINCIPLED__ */ + case CLOSURE_BSDF_DIFFUSE_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + OrenNayarBsdf *bsdf = (OrenNayarBsdf *)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight); + + if (bsdf) { + bsdf->N = N; + + float roughness = param1; + + if (roughness == 0.0f) { + sd->flag |= bsdf_diffuse_setup((DiffuseBsdf *)bsdf); + } + else { + bsdf->roughness = roughness; + sd->flag |= bsdf_oren_nayar_setup(bsdf); + } + } + break; + } + case CLOSURE_BSDF_TRANSLUCENT_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + DiffuseBsdf *bsdf = (DiffuseBsdf *)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); + + if (bsdf) { + bsdf->N = N; + sd->flag |= bsdf_translucent_setup(bsdf); + } + break; + } + case CLOSURE_BSDF_TRANSPARENT_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + bsdf_transparent_setup(sd, weight, path_flag); + break; + } + case CLOSURE_BSDF_REFLECTION_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: { #ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) { + if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) + break; #endif - if(specular_weight > CLOSURE_WEIGHT_CUTOFF && (specular > CLOSURE_WEIGHT_CUTOFF || metallic > CLOSURE_WEIGHT_CUTOFF)) { - float3 spec_weight = weight * specular_weight; - - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), spec_weight); - MicrofacetExtra *extra = (bsdf != NULL) - ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) - : NULL; - - if(bsdf && extra) { - bsdf->N = N; - bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f; - bsdf->T = T; - bsdf->extra = extra; - - float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f); - float r2 = roughness * roughness; - - bsdf->alpha_x = r2 / aspect; - bsdf->alpha_y = r2 * aspect; - - float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y + 0.1f * base_color.z; // luminance approx. - float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(0.0f, 0.0f, 0.0f); // normalize lum. to isolate hue+sat - float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) + m_ctint * specular_tint; - - bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) + base_color * metallic; - bsdf->extra->color = base_color; - bsdf->extra->clearcoat = 0.0f; - - /* setup bsdf */ - if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */ - sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); - else /* use multi-scatter GGX */ - sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); - } - } + float3 weight = sd->svm_closure_weight * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); + + if (!bsdf) { + break; + } + + float roughness = sqr(param1); + + bsdf->N = N; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->alpha_x = roughness; + bsdf->alpha_y = roughness; + bsdf->ior = 0.0f; + bsdf->extra = NULL; + + /* setup bsdf */ + if (type == CLOSURE_BSDF_REFLECTION_ID) + sd->flag |= bsdf_reflection_setup(bsdf); + else if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID) + sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); + else if (type == CLOSURE_BSDF_MICROFACET_GGX_ID) + sd->flag |= bsdf_microfacet_ggx_setup(bsdf); + else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) { + kernel_assert(stack_valid(data_node.z)); + bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (bsdf->extra) { + bsdf->extra->color = stack_load_float3(stack, data_node.z); + bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->clearcoat = 0.0f; + sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); + } + } + else { + sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf); + } + + break; + } + case CLOSURE_BSDF_REFRACTION_ID: + case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: { #ifdef __CAUSTICS_TRICKS__ - } + if (!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) + break; #endif - - /* BSDF */ + float3 weight = sd->svm_closure_weight * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); + + if (bsdf) { + bsdf->N = N; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra = NULL; + + float eta = fmaxf(param2, 1e-5f); + eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta; + + /* setup bsdf */ + if (type == CLOSURE_BSDF_REFRACTION_ID) { + bsdf->alpha_x = 0.0f; + bsdf->alpha_y = 0.0f; + bsdf->ior = eta; + + sd->flag |= bsdf_refraction_setup(bsdf); + } + else { + float roughness = sqr(param1); + bsdf->alpha_x = roughness; + bsdf->alpha_y = roughness; + bsdf->ior = eta; + + if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) + sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); + else + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); + } + } + + break; + } + case CLOSURE_BSDF_SHARP_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID: + case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: { #ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.caustics_reflective || kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) { + if (!kernel_data.integrator.caustics_reflective && + !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) { + break; + } #endif - if(final_transmission > CLOSURE_WEIGHT_CUTOFF) { - float3 glass_weight = weight * final_transmission; - float3 cspec0 = base_color * specular_tint + make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint); + float3 weight = sd->svm_closure_weight * mix_weight; - if(roughness <= 5e-2f || distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) { /* use single-scatter GGX */ - float refl_roughness = roughness; + /* index of refraction */ + float eta = fmaxf(param2, 1e-5f); + eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta; - /* reflection */ -#ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) -#endif - { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight*fresnel); - MicrofacetExtra *extra = (bsdf != NULL) - ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) - : NULL; - - if(bsdf && extra) { - bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra = extra; - - bsdf->alpha_x = refl_roughness * refl_roughness; - bsdf->alpha_y = refl_roughness * refl_roughness; - bsdf->ior = ior; - - bsdf->extra->color = base_color; - bsdf->extra->cspec0 = cspec0; - bsdf->extra->clearcoat = 0.0f; - - /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); - } - } - - /* refraction */ -#ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) -#endif - { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), base_color*glass_weight*(1.0f - fresnel)); - if(bsdf) { - bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra = NULL; - - if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) - transmission_roughness = 1.0f - (1.0f - refl_roughness) * (1.0f - transmission_roughness); - else - transmission_roughness = refl_roughness; - - bsdf->alpha_x = transmission_roughness * transmission_roughness; - bsdf->alpha_y = transmission_roughness * transmission_roughness; - bsdf->ior = ior; - - /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); - } - } - } - else { /* use multi-scatter GGX */ - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight); - MicrofacetExtra *extra = (bsdf != NULL) - ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) - : NULL; - - if(bsdf && extra) { - bsdf->N = N; - bsdf->extra = extra; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - - bsdf->alpha_x = roughness * roughness; - bsdf->alpha_y = roughness * roughness; - bsdf->ior = ior; - - bsdf->extra->color = base_color; - bsdf->extra->cspec0 = cspec0; - bsdf->extra->clearcoat = 0.0f; - - /* setup bsdf */ - sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); - } - } - } -#ifdef __CAUSTICS_TRICKS__ - } -#endif + /* fresnel */ + float cosNO = dot(N, sd->I); + float fresnel = fresnel_dielectric_cos(cosNO, eta); + float roughness = sqr(param1); - /* clearcoat */ + /* reflection */ #ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) { + if (kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) #endif - if(clearcoat > CLOSURE_WEIGHT_CUTOFF) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - MicrofacetExtra *extra = (bsdf != NULL) - ? (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)) - : NULL; - - if(bsdf && extra) { - bsdf->N = clearcoat_normal; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->ior = 1.5f; - bsdf->extra = extra; - - bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness; - bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness; - - bsdf->extra->color = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); - bsdf->extra->clearcoat = clearcoat; - - /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); - } - } + { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), weight * fresnel); + + if (bsdf) { + bsdf->N = N; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra = NULL; + svm_node_glass_setup(sd, bsdf, type, eta, roughness, false); + } + } + + /* refraction */ #ifdef __CAUSTICS_TRICKS__ - } + if (kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) #endif - - break; - } -#endif /* __PRINCIPLED__ */ - case CLOSURE_BSDF_DIFFUSE_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; - OrenNayarBsdf *bsdf = (OrenNayarBsdf*)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight); - - if(bsdf) { - bsdf->N = N; - - float roughness = param1; - - if(roughness == 0.0f) { - sd->flag |= bsdf_diffuse_setup((DiffuseBsdf*)bsdf); - } - else { - bsdf->roughness = roughness; - sd->flag |= bsdf_oren_nayar_setup(bsdf); - } - } - break; - } - case CLOSURE_BSDF_TRANSLUCENT_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; - DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight); - - if(bsdf) { - bsdf->N = N; - sd->flag |= bsdf_translucent_setup(bsdf); - } - break; - } - case CLOSURE_BSDF_TRANSPARENT_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; - bsdf_transparent_setup(sd, weight, path_flag); - break; - } - case CLOSURE_BSDF_REFLECTION_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID: { + { + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc( + sd, sizeof(MicrofacetBsdf), weight * (1.0f - fresnel)); + + if (bsdf) { + bsdf->N = N; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra = NULL; + svm_node_glass_setup(sd, bsdf, type, eta, roughness, true); + } + } + + break; + } + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: { #ifdef __CAUSTICS_TRICKS__ - if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) - break; + if (!kernel_data.integrator.caustics_reflective && + !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) + break; #endif - float3 weight = sd->svm_closure_weight * mix_weight; - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - - if(!bsdf) { - break; - } - - float roughness = sqr(param1); - - bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_x = roughness; - bsdf->alpha_y = roughness; - bsdf->ior = 0.0f; - bsdf->extra = NULL; - - /* setup bsdf */ - if(type == CLOSURE_BSDF_REFLECTION_ID) - sd->flag |= bsdf_reflection_setup(bsdf); - else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID) - sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); - else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID) - sd->flag |= bsdf_microfacet_ggx_setup(bsdf); - else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) { - kernel_assert(stack_valid(data_node.z)); - bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(bsdf->extra) { - bsdf->extra->color = stack_load_float3(stack, data_node.z); - bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->clearcoat = 0.0f; - sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); - } - } - else { - sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf); - } - - break; - } - case CLOSURE_BSDF_REFRACTION_ID: - case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); + if (!bsdf) { + break; + } + + MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (!extra) { + break; + } + + bsdf->N = N; + bsdf->extra = extra; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + + float roughness = sqr(param1); + bsdf->alpha_x = roughness; + bsdf->alpha_y = roughness; + float eta = fmaxf(param2, 1e-5f); + bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta; + + kernel_assert(stack_valid(data_node.z)); + bsdf->extra->color = stack_load_float3(stack, data_node.z); + bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->clearcoat = 0.0f; + + /* setup bsdf */ + sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); + break; + } + case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: + case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID: + case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: { #ifdef __CAUSTICS_TRICKS__ - if(!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) - break; + if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) + break; #endif - float3 weight = sd->svm_closure_weight * mix_weight; - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - - if(bsdf) { - bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra = NULL; - - float eta = fmaxf(param2, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; - - /* setup bsdf */ - if(type == CLOSURE_BSDF_REFRACTION_ID) { - bsdf->alpha_x = 0.0f; - bsdf->alpha_y = 0.0f; - bsdf->ior = eta; - - sd->flag |= bsdf_refraction_setup(bsdf); - } - else { - float roughness = sqr(param1); - bsdf->alpha_x = roughness; - bsdf->alpha_y = roughness; - bsdf->ior = eta; - - if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID) - sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf); - else - sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); - } - } - - break; - } - case CLOSURE_BSDF_SHARP_GLASS_ID: - case CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID: - case CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); + + if (bsdf) { + bsdf->N = N; + bsdf->extra = NULL; + bsdf->T = stack_load_float3(stack, data_node.y); + + /* rotate tangent */ + float rotation = stack_load_float(stack, data_node.z); + + if (rotation != 0.0f) + bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F); + + /* compute roughness */ + float roughness = sqr(param1); + float anisotropy = clamp(param2, -0.99f, 0.99f); + + if (anisotropy < 0.0f) { + bsdf->alpha_x = roughness / (1.0f + anisotropy); + bsdf->alpha_y = roughness * (1.0f + anisotropy); + } + else { + bsdf->alpha_x = roughness * (1.0f - anisotropy); + bsdf->alpha_y = roughness / (1.0f - anisotropy); + } + + bsdf->ior = 0.0f; + + if (type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) { + sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf); + } + else if (type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) { + sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf); + } + else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) { + kernel_assert(stack_valid(data_node.w)); + bsdf->extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if (bsdf->extra) { + bsdf->extra->color = stack_load_float3(stack, data_node.w); + bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); + bsdf->extra->clearcoat = 0.0f; + sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf); + } + } + else + sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf); + } + break; + } + case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + VelvetBsdf *bsdf = (VelvetBsdf *)bsdf_alloc(sd, sizeof(VelvetBsdf), weight); + + if (bsdf) { + bsdf->N = N; + + bsdf->sigma = saturate(param1); + sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf); + } + break; + } + case CLOSURE_BSDF_GLOSSY_TOON_ID: #ifdef __CAUSTICS_TRICKS__ - if(!kernel_data.integrator.caustics_reflective && - !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) - { - break; - } + if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) + break; + ATTR_FALLTHROUGH; #endif - float3 weight = sd->svm_closure_weight * mix_weight; - - /* index of refraction */ - float eta = fmaxf(param2, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; - - /* fresnel */ - float cosNO = dot(N, sd->I); - float fresnel = fresnel_dielectric_cos(cosNO, eta); - float roughness = sqr(param1); - - /* reflection */ -#ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.caustics_reflective || (path_flag & PATH_RAY_DIFFUSE) == 0) -#endif - { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*fresnel); - - if(bsdf) { - bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra = NULL; - svm_node_glass_setup(sd, bsdf, type, eta, roughness, false); - } - } - - /* refraction */ -#ifdef __CAUSTICS_TRICKS__ - if(kernel_data.integrator.caustics_refractive || (path_flag & PATH_RAY_DIFFUSE) == 0) -#endif - { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight*(1.0f - fresnel)); - - if(bsdf) { - bsdf->N = N; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra = NULL; - svm_node_glass_setup(sd, bsdf, type, eta, roughness, true); - } - } - - break; - } - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID: { -#ifdef __CAUSTICS_TRICKS__ - if(!kernel_data.integrator.caustics_reflective && !kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE)) - break; -#endif - float3 weight = sd->svm_closure_weight * mix_weight; - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - if(!bsdf) { - break; - } - - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(!extra) { - break; - } - - bsdf->N = N; - bsdf->extra = extra; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - - float roughness = sqr(param1); - bsdf->alpha_x = roughness; - bsdf->alpha_y = roughness; - float eta = fmaxf(param2, 1e-5f); - bsdf->ior = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; - - kernel_assert(stack_valid(data_node.z)); - bsdf->extra->color = stack_load_float3(stack, data_node.z); - bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->clearcoat = 0.0f; - - /* setup bsdf */ - sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); - break; - } - case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID: - case CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID: - case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID: { -#ifdef __CAUSTICS_TRICKS__ - if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) - break; -#endif - float3 weight = sd->svm_closure_weight * mix_weight; - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - - if(bsdf) { - bsdf->N = N; - bsdf->extra = NULL; - bsdf->T = stack_load_float3(stack, data_node.y); - - /* rotate tangent */ - float rotation = stack_load_float(stack, data_node.z); - - if(rotation != 0.0f) - bsdf->T = rotate_around_axis(bsdf->T, bsdf->N, rotation * M_2PI_F); - - /* compute roughness */ - float roughness = sqr(param1); - float anisotropy = clamp(param2, -0.99f, 0.99f); - - if(anisotropy < 0.0f) { - bsdf->alpha_x = roughness/(1.0f + anisotropy); - bsdf->alpha_y = roughness*(1.0f + anisotropy); - } - else { - bsdf->alpha_x = roughness*(1.0f - anisotropy); - bsdf->alpha_y = roughness/(1.0f - anisotropy); - } - - bsdf->ior = 0.0f; - - if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID) { - sd->flag |= bsdf_microfacet_beckmann_aniso_setup(bsdf); - } - else if(type == CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID) { - sd->flag |= bsdf_microfacet_ggx_aniso_setup(bsdf); - } - else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID) { - kernel_assert(stack_valid(data_node.w)); - bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(bsdf->extra) { - bsdf->extra->color = stack_load_float3(stack, data_node.w); - bsdf->extra->cspec0 = make_float3(0.0f, 0.0f, 0.0f); - bsdf->extra->clearcoat = 0.0f; - sd->flag |= bsdf_microfacet_multi_ggx_aniso_setup(bsdf); - } - } - else - sd->flag |= bsdf_ashikhmin_shirley_aniso_setup(bsdf); - } - break; - } - case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; - VelvetBsdf *bsdf = (VelvetBsdf*)bsdf_alloc(sd, sizeof(VelvetBsdf), weight); - - if(bsdf) { - bsdf->N = N; - - bsdf->sigma = saturate(param1); - sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf); - } - break; - } - case CLOSURE_BSDF_GLOSSY_TOON_ID: -#ifdef __CAUSTICS_TRICKS__ - if(!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE)) - break; - ATTR_FALLTHROUGH; -#endif - case CLOSURE_BSDF_DIFFUSE_TOON_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; - ToonBsdf *bsdf = (ToonBsdf*)bsdf_alloc(sd, sizeof(ToonBsdf), weight); - - if(bsdf) { - bsdf->N = N; - bsdf->size = param1; - bsdf->smooth = param2; - - if(type == CLOSURE_BSDF_DIFFUSE_TOON_ID) - sd->flag |= bsdf_diffuse_toon_setup(bsdf); - else - sd->flag |= bsdf_glossy_toon_setup(bsdf); - } - break; - } + case CLOSURE_BSDF_DIFFUSE_TOON_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + ToonBsdf *bsdf = (ToonBsdf *)bsdf_alloc(sd, sizeof(ToonBsdf), weight); + + if (bsdf) { + bsdf->N = N; + bsdf->size = param1; + bsdf->smooth = param2; + + if (type == CLOSURE_BSDF_DIFFUSE_TOON_ID) + sd->flag |= bsdf_diffuse_toon_setup(bsdf); + else + sd->flag |= bsdf_glossy_toon_setup(bsdf); + } + break; + } #ifdef __HAIR__ - case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: { - uint4 data_node2 = read_node(kg, offset); - uint4 data_node3 = read_node(kg, offset); - uint4 data_node4 = read_node(kg, offset); - - float3 weight = sd->svm_closure_weight * mix_weight; - - uint offset_ofs, ior_ofs, color_ofs, parametrization; - decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, ¶metrization); - float alpha = stack_load_float_default(stack, offset_ofs, data_node.z); - float ior = stack_load_float_default(stack, ior_ofs, data_node.w); - - uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs; - decode_node_uchar4(data_node2.x, &coat_ofs, &melanin_ofs, &melanin_redness_ofs, &absorption_coefficient_ofs); - - uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs; - decode_node_uchar4(data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs); - - const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y); - float random = 0.0f; - if(attr_descr_random.offset != ATTR_STD_NOT_FOUND) { - random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL); - } - else { - random = stack_load_float_default(stack, random_ofs, data_node3.y); - } - - - PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)bsdf_alloc(sd, sizeof(PrincipledHairBSDF), weight); - if(bsdf) { - PrincipledHairExtra *extra = (PrincipledHairExtra*)closure_alloc_extra(sd, sizeof(PrincipledHairExtra)); - - if(!extra) - break; - - /* Random factors range: [-randomization/2, +randomization/2]. */ - float random_roughness = stack_load_float_default(stack, random_roughness_ofs, data_node3.w); - float factor_random_roughness = 1.0f + 2.0f*(random - 0.5f)*random_roughness; - float roughness = param1 * factor_random_roughness; - float radial_roughness = param2 * factor_random_roughness; - - /* Remap Coat value to [0, 100]% of Roughness. */ - float coat = stack_load_float_default(stack, coat_ofs, data_node2.y); - float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f); - - bsdf->N = N; - bsdf->v = roughness; - bsdf->s = radial_roughness; - bsdf->m0_roughness = m0_roughness; - bsdf->alpha = alpha; - bsdf->eta = ior; - bsdf->extra = extra; - - switch(parametrization) { - case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: { - float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs); - bsdf->sigma = absorption_coefficient; - break; - } - case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: { - float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z); - float melanin_redness = stack_load_float_default(stack, melanin_redness_ofs, data_node2.w); - - /* Randomize melanin. */ - float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z); - random_color = clamp(random_color, 0.0f, 1.0f); - float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color; - melanin *= factor_random_color; - - /* Map melanin 0..inf from more perceptually linear 0..1. */ - melanin = -logf(fmaxf(1.0f - melanin, 0.0001f)); - - /* Benedikt Bitterli's melanin ratio remapping. */ - float eumelanin = melanin * (1.0f - melanin_redness); - float pheomelanin = melanin * melanin_redness; - float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin); - - /* Optional tint. */ - float3 tint = stack_load_float3(stack, tint_ofs); - float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness); - - bsdf->sigma = melanin_sigma + tint_sigma; - break; - } - case NODE_PRINCIPLED_HAIR_REFLECTANCE: { - float3 color = stack_load_float3(stack, color_ofs); - bsdf->sigma = sigma_from_reflectance(color, radial_roughness); - break; - } - default: { - /* Fallback to brownish hair, same as defaults for melanin. */ - kernel_assert(!"Invalid Principled Hair parametrization!"); - bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f); - break; - } - } - - sd->flag |= bsdf_principled_hair_setup(sd, bsdf); - } - break; - } - case CLOSURE_BSDF_HAIR_REFLECTION_ID: - case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; - - if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) { - /* todo: giving a fixed weight here will cause issues when - * mixing multiple BSDFS. energy will not be conserved and - * the throughput can blow up after multiple bounces. we - * better figure out a way to skip backfaces from rays - * spawned by transmission from the front */ - bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag); - } - else { - HairBsdf *bsdf = (HairBsdf*)bsdf_alloc(sd, sizeof(HairBsdf), weight); - - if(bsdf) { - bsdf->N = N; - bsdf->roughness1 = param1; - bsdf->roughness2 = param2; - bsdf->offset = -stack_load_float(stack, data_node.z); - - if(stack_valid(data_node.y)) { - bsdf->T = normalize(stack_load_float3(stack, data_node.y)); - } - else if(!(sd->type & PRIMITIVE_ALL_CURVE)) { - bsdf->T = normalize(sd->dPdv); - bsdf->offset = 0.0f; - } - else - bsdf->T = normalize(sd->dPdu); - - if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) { - sd->flag |= bsdf_hair_reflection_setup(bsdf); - } - else { - sd->flag |= bsdf_hair_transmission_setup(bsdf); - } - } - } - - break; - } -#endif /* __HAIR__ */ + case CLOSURE_BSDF_HAIR_PRINCIPLED_ID: { + uint4 data_node2 = read_node(kg, offset); + uint4 data_node3 = read_node(kg, offset); + uint4 data_node4 = read_node(kg, offset); + + float3 weight = sd->svm_closure_weight * mix_weight; + + uint offset_ofs, ior_ofs, color_ofs, parametrization; + decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, ¶metrization); + float alpha = stack_load_float_default(stack, offset_ofs, data_node.z); + float ior = stack_load_float_default(stack, ior_ofs, data_node.w); + + uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs; + decode_node_uchar4(data_node2.x, + &coat_ofs, + &melanin_ofs, + &melanin_redness_ofs, + &absorption_coefficient_ofs); + + uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs; + decode_node_uchar4( + data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs); + + const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y); + float random = 0.0f; + if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) { + random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL); + } + else { + random = stack_load_float_default(stack, random_ofs, data_node3.y); + } + + PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc( + sd, sizeof(PrincipledHairBSDF), weight); + if (bsdf) { + PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra( + sd, sizeof(PrincipledHairExtra)); + + if (!extra) + break; + + /* Random factors range: [-randomization/2, +randomization/2]. */ + float random_roughness = stack_load_float_default( + stack, random_roughness_ofs, data_node3.w); + float factor_random_roughness = 1.0f + 2.0f * (random - 0.5f) * random_roughness; + float roughness = param1 * factor_random_roughness; + float radial_roughness = param2 * factor_random_roughness; + + /* Remap Coat value to [0, 100]% of Roughness. */ + float coat = stack_load_float_default(stack, coat_ofs, data_node2.y); + float m0_roughness = 1.0f - clamp(coat, 0.0f, 1.0f); + + bsdf->N = N; + bsdf->v = roughness; + bsdf->s = radial_roughness; + bsdf->m0_roughness = m0_roughness; + bsdf->alpha = alpha; + bsdf->eta = ior; + bsdf->extra = extra; + + switch (parametrization) { + case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: { + float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs); + bsdf->sigma = absorption_coefficient; + break; + } + case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: { + float melanin = stack_load_float_default(stack, melanin_ofs, data_node2.z); + float melanin_redness = stack_load_float_default( + stack, melanin_redness_ofs, data_node2.w); + + /* Randomize melanin. */ + float random_color = stack_load_float_default(stack, random_color_ofs, data_node3.z); + random_color = clamp(random_color, 0.0f, 1.0f); + float factor_random_color = 1.0f + 2.0f * (random - 0.5f) * random_color; + melanin *= factor_random_color; + + /* Map melanin 0..inf from more perceptually linear 0..1. */ + melanin = -logf(fmaxf(1.0f - melanin, 0.0001f)); + + /* Benedikt Bitterli's melanin ratio remapping. */ + float eumelanin = melanin * (1.0f - melanin_redness); + float pheomelanin = melanin * melanin_redness; + float3 melanin_sigma = sigma_from_concentration(eumelanin, pheomelanin); + + /* Optional tint. */ + float3 tint = stack_load_float3(stack, tint_ofs); + float3 tint_sigma = sigma_from_reflectance(tint, radial_roughness); + + bsdf->sigma = melanin_sigma + tint_sigma; + break; + } + case NODE_PRINCIPLED_HAIR_REFLECTANCE: { + float3 color = stack_load_float3(stack, color_ofs); + bsdf->sigma = sigma_from_reflectance(color, radial_roughness); + break; + } + default: { + /* Fallback to brownish hair, same as defaults for melanin. */ + kernel_assert(!"Invalid Principled Hair parametrization!"); + bsdf->sigma = sigma_from_concentration(0.0f, 0.8054375f); + break; + } + } + + sd->flag |= bsdf_principled_hair_setup(sd, bsdf); + } + break; + } + case CLOSURE_BSDF_HAIR_REFLECTION_ID: + case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + + if (sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) { + /* todo: giving a fixed weight here will cause issues when + * mixing multiple BSDFS. energy will not be conserved and + * the throughput can blow up after multiple bounces. we + * better figure out a way to skip backfaces from rays + * spawned by transmission from the front */ + bsdf_transparent_setup(sd, make_float3(1.0f, 1.0f, 1.0f), path_flag); + } + else { + HairBsdf *bsdf = (HairBsdf *)bsdf_alloc(sd, sizeof(HairBsdf), weight); + + if (bsdf) { + bsdf->N = N; + bsdf->roughness1 = param1; + bsdf->roughness2 = param2; + bsdf->offset = -stack_load_float(stack, data_node.z); + + if (stack_valid(data_node.y)) { + bsdf->T = normalize(stack_load_float3(stack, data_node.y)); + } + else if (!(sd->type & PRIMITIVE_ALL_CURVE)) { + bsdf->T = normalize(sd->dPdv); + bsdf->offset = 0.0f; + } + else + bsdf->T = normalize(sd->dPdu); + + if (type == CLOSURE_BSDF_HAIR_REFLECTION_ID) { + sd->flag |= bsdf_hair_reflection_setup(bsdf); + } + else { + sd->flag |= bsdf_hair_transmission_setup(bsdf); + } + } + } + + break; + } +#endif /* __HAIR__ */ #ifdef __SUBSURFACE__ - case CLOSURE_BSSRDF_CUBIC_ID: - case CLOSURE_BSSRDF_GAUSSIAN_ID: - case CLOSURE_BSSRDF_BURLEY_ID: - case CLOSURE_BSSRDF_RANDOM_WALK_ID: { - float3 weight = sd->svm_closure_weight * mix_weight; - Bssrdf *bssrdf = bssrdf_alloc(sd, weight); - - if(bssrdf) { - /* disable in case of diffuse ancestor, can't see it well then and - * adds considerably noise due to probabilities of continuing path - * getting lower and lower */ - if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) - param1 = 0.0f; - - bssrdf->radius = stack_load_float3(stack, data_node.z)*param1; - bssrdf->albedo = sd->svm_closure_weight; - bssrdf->texture_blur = param2; - bssrdf->sharpness = stack_load_float(stack, data_node.w); - bssrdf->N = N; - bssrdf->roughness = 0.0f; - sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type); - } - - break; - } + case CLOSURE_BSSRDF_CUBIC_ID: + case CLOSURE_BSSRDF_GAUSSIAN_ID: + case CLOSURE_BSSRDF_BURLEY_ID: + case CLOSURE_BSSRDF_RANDOM_WALK_ID: { + float3 weight = sd->svm_closure_weight * mix_weight; + Bssrdf *bssrdf = bssrdf_alloc(sd, weight); + + if (bssrdf) { + /* disable in case of diffuse ancestor, can't see it well then and + * adds considerably noise due to probabilities of continuing path + * getting lower and lower */ + if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) + param1 = 0.0f; + + bssrdf->radius = stack_load_float3(stack, data_node.z) * param1; + bssrdf->albedo = sd->svm_closure_weight; + bssrdf->texture_blur = param2; + bssrdf->sharpness = stack_load_float(stack, data_node.w); + bssrdf->N = N; + bssrdf->roughness = 0.0f; + sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type); + } + + break; + } #endif - default: - break; - } + default: + break; + } } -ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type) +ccl_device void svm_node_closure_volume( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type) { #ifdef __VOLUME__ - /* Only sum extinction for volumes, variable is shared with surface transparency. */ - if(shader_type != SHADER_TYPE_VOLUME) { - return; - } - - uint type, density_offset, anisotropy_offset; - - uint mix_weight_offset; - decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset); - float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f); - - if(mix_weight == 0.0f) { - return; - } - - float density = (stack_valid(density_offset))? stack_load_float(stack, density_offset): __uint_as_float(node.z); - density = mix_weight * fmaxf(density, 0.0f); - - /* Compute scattering coefficient. */ - float3 weight = sd->svm_closure_weight; - - if(type == CLOSURE_VOLUME_ABSORPTION_ID) { - weight = make_float3(1.0f, 1.0f, 1.0f) - weight; - } - - weight *= density; - - /* Add closure for volume scattering. */ - if(type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { - HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), weight); - - if(volume) { - float anisotropy = (stack_valid(anisotropy_offset))? stack_load_float(stack, anisotropy_offset): __uint_as_float(node.w); - volume->g = anisotropy; /* g */ - sd->flag |= volume_henyey_greenstein_setup(volume); - } - } - - /* Sum total extinction weight. */ - volume_extinction_setup(sd, weight); + /* Only sum extinction for volumes, variable is shared with surface transparency. */ + if (shader_type != SHADER_TYPE_VOLUME) { + return; + } + + uint type, density_offset, anisotropy_offset; + + uint mix_weight_offset; + decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset); + float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) : + 1.0f); + + if (mix_weight == 0.0f) { + return; + } + + float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) : + __uint_as_float(node.z); + density = mix_weight * fmaxf(density, 0.0f); + + /* Compute scattering coefficient. */ + float3 weight = sd->svm_closure_weight; + + if (type == CLOSURE_VOLUME_ABSORPTION_ID) { + weight = make_float3(1.0f, 1.0f, 1.0f) - weight; + } + + weight *= density; + + /* Add closure for volume scattering. */ + if (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) { + HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc( + sd, sizeof(HenyeyGreensteinVolume), weight); + + if (volume) { + float anisotropy = (stack_valid(anisotropy_offset)) ? + stack_load_float(stack, anisotropy_offset) : + __uint_as_float(node.w); + volume->g = anisotropy; /* g */ + sd->flag |= volume_henyey_greenstein_setup(volume); + } + } + + /* Sum total extinction weight. */ + volume_extinction_setup(sd, weight); #endif } -ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, ShaderType shader_type, int path_flag, int *offset) +ccl_device void svm_node_principled_volume(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node, + ShaderType shader_type, + int path_flag, + int *offset) { #ifdef __VOLUME__ - uint4 value_node = read_node(kg, offset); - uint4 attr_node = read_node(kg, offset); - - /* Only sum extinction for volumes, variable is shared with surface transparency. */ - if(shader_type != SHADER_TYPE_VOLUME) { - return; - } - - uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset; - decode_node_uchar4(node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset); - float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f); - - if(mix_weight == 0.0f) { - return; - } - - /* Compute density. */ - float primitive_density = 1.0f; - float density = (stack_valid(density_offset))? stack_load_float(stack, density_offset): __uint_as_float(value_node.x); - density = mix_weight * fmaxf(density, 0.0f); - - if(density > CLOSURE_WEIGHT_CUTOFF) { - /* Density and color attribute lookup if available. */ - const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x); - if(attr_density.offset != ATTR_STD_NOT_FOUND) { - primitive_density = primitive_volume_attribute_float(kg, sd, attr_density); - density = fmaxf(density * primitive_density, 0.0f); - } - } - - if(density > CLOSURE_WEIGHT_CUTOFF) { - /* Compute scattering color. */ - float3 color = sd->svm_closure_weight; - - const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y); - if(attr_color.offset != ATTR_STD_NOT_FOUND) { - color *= primitive_volume_attribute_float3(kg, sd, attr_color); - } - - /* Add closure for volume scattering. */ - HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume*)bsdf_alloc(sd, sizeof(HenyeyGreensteinVolume), color * density); - if(volume) { - float anisotropy = (stack_valid(anisotropy_offset))? stack_load_float(stack, anisotropy_offset): __uint_as_float(value_node.y); - volume->g = anisotropy; - sd->flag |= volume_henyey_greenstein_setup(volume); - } - - /* Add extinction weight. */ - float3 zero = make_float3(0.0f, 0.0f, 0.0f); - float3 one = make_float3(1.0f, 1.0f, 1.0f); - float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero); - float3 absorption = max(one - color, zero) * max(one - absorption_color, zero); - volume_extinction_setup(sd, (color + absorption) * density); - } - - /* Compute emission. */ - if(path_flag & PATH_RAY_SHADOW) { - /* Don't need emission for shadows. */ - return; - } - - uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset; - decode_node_uchar4(node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset); - float emission = (stack_valid(emission_offset))? stack_load_float(stack, emission_offset): __uint_as_float(value_node.z); - float blackbody = (stack_valid(blackbody_offset))? stack_load_float(stack, blackbody_offset): __uint_as_float(value_node.w); - - if(emission > CLOSURE_WEIGHT_CUTOFF) { - float3 emission_color = stack_load_float3(stack, emission_color_offset); - emission_setup(sd, emission * emission_color); - } - - if(blackbody > CLOSURE_WEIGHT_CUTOFF) { - float T = stack_load_float(stack, temperature_offset); - - /* Add flame temperature from attribute if available. */ - const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z); - if(attr_temperature.offset != ATTR_STD_NOT_FOUND) { - float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature); - T *= fmaxf(temperature, 0.0f); - } - - T = fmaxf(T, 0.0f); - - /* Stefan-Boltzmann law. */ - float T4 = sqr(sqr(T)); - float sigma = 5.670373e-8f * 1e-6f / M_PI_F; - float intensity = sigma * mix(1.0f, T4, blackbody); - - if(intensity > CLOSURE_WEIGHT_CUTOFF) { - float3 blackbody_tint = stack_load_float3(stack, node.w); - float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T); - emission_setup(sd, bb); - } - } + uint4 value_node = read_node(kg, offset); + uint4 attr_node = read_node(kg, offset); + + /* Only sum extinction for volumes, variable is shared with surface transparency. */ + if (shader_type != SHADER_TYPE_VOLUME) { + return; + } + + uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset; + decode_node_uchar4( + node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset); + float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) : + 1.0f); + + if (mix_weight == 0.0f) { + return; + } + + /* Compute density. */ + float primitive_density = 1.0f; + float density = (stack_valid(density_offset)) ? stack_load_float(stack, density_offset) : + __uint_as_float(value_node.x); + density = mix_weight * fmaxf(density, 0.0f); + + if (density > CLOSURE_WEIGHT_CUTOFF) { + /* Density and color attribute lookup if available. */ + const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x); + if (attr_density.offset != ATTR_STD_NOT_FOUND) { + primitive_density = primitive_volume_attribute_float(kg, sd, attr_density); + density = fmaxf(density * primitive_density, 0.0f); + } + } + + if (density > CLOSURE_WEIGHT_CUTOFF) { + /* Compute scattering color. */ + float3 color = sd->svm_closure_weight; + + const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y); + if (attr_color.offset != ATTR_STD_NOT_FOUND) { + color *= primitive_volume_attribute_float3(kg, sd, attr_color); + } + + /* Add closure for volume scattering. */ + HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc( + sd, sizeof(HenyeyGreensteinVolume), color * density); + if (volume) { + float anisotropy = (stack_valid(anisotropy_offset)) ? + stack_load_float(stack, anisotropy_offset) : + __uint_as_float(value_node.y); + volume->g = anisotropy; + sd->flag |= volume_henyey_greenstein_setup(volume); + } + + /* Add extinction weight. */ + float3 zero = make_float3(0.0f, 0.0f, 0.0f); + float3 one = make_float3(1.0f, 1.0f, 1.0f); + float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero); + float3 absorption = max(one - color, zero) * max(one - absorption_color, zero); + volume_extinction_setup(sd, (color + absorption) * density); + } + + /* Compute emission. */ + if (path_flag & PATH_RAY_SHADOW) { + /* Don't need emission for shadows. */ + return; + } + + uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset; + decode_node_uchar4( + node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset); + float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) : + __uint_as_float(value_node.z); + float blackbody = (stack_valid(blackbody_offset)) ? stack_load_float(stack, blackbody_offset) : + __uint_as_float(value_node.w); + + if (emission > CLOSURE_WEIGHT_CUTOFF) { + float3 emission_color = stack_load_float3(stack, emission_color_offset); + emission_setup(sd, emission * emission_color); + } + + if (blackbody > CLOSURE_WEIGHT_CUTOFF) { + float T = stack_load_float(stack, temperature_offset); + + /* Add flame temperature from attribute if available. */ + const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z); + if (attr_temperature.offset != ATTR_STD_NOT_FOUND) { + float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature); + T *= fmaxf(temperature, 0.0f); + } + + T = fmaxf(T, 0.0f); + + /* Stefan-Boltzmann law. */ + float T4 = sqr(sqr(T)); + float sigma = 5.670373e-8f * 1e-6f / M_PI_F; + float intensity = sigma * mix(1.0f, T4, blackbody); + + if (intensity > CLOSURE_WEIGHT_CUTOFF) { + float3 blackbody_tint = stack_load_float3(stack, node.w); + float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T); + emission_setup(sd, bb); + } + } #endif } ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node) { - uint mix_weight_offset = node.y; - float3 weight = sd->svm_closure_weight; + uint mix_weight_offset = node.y; + float3 weight = sd->svm_closure_weight; - if(stack_valid(mix_weight_offset)) { - float mix_weight = stack_load_float(stack, mix_weight_offset); + if (stack_valid(mix_weight_offset)) { + float mix_weight = stack_load_float(stack, mix_weight_offset); - if(mix_weight == 0.0f) - return; + if (mix_weight == 0.0f) + return; - weight *= mix_weight; - } + weight *= mix_weight; + } - emission_setup(sd, weight); + emission_setup(sd, weight); } ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node) { - uint mix_weight_offset = node.y; - float3 weight = sd->svm_closure_weight; + uint mix_weight_offset = node.y; + float3 weight = sd->svm_closure_weight; - if(stack_valid(mix_weight_offset)) { - float mix_weight = stack_load_float(stack, mix_weight_offset); + if (stack_valid(mix_weight_offset)) { + float mix_weight = stack_load_float(stack, mix_weight_offset); - if(mix_weight == 0.0f) - return; + if (mix_weight == 0.0f) + return; - weight *= mix_weight; - } + weight *= mix_weight; + } - background_setup(sd, weight); + background_setup(sd, weight); } ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node) { - uint mix_weight_offset = node.y; + uint mix_weight_offset = node.y; - if(stack_valid(mix_weight_offset)) { - float mix_weight = stack_load_float(stack, mix_weight_offset); + if (stack_valid(mix_weight_offset)) { + float mix_weight = stack_load_float(stack, mix_weight_offset); - if(mix_weight == 0.0f) - return; + if (mix_weight == 0.0f) + return; - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight); - } - else - closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight); + closure_alloc( + sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight * mix_weight); + } + else + closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, sd->svm_closure_weight); - sd->flag |= SD_HOLDOUT; + sd->flag |= SD_HOLDOUT; } /* Closure Nodes */ ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight) { - sd->svm_closure_weight = weight; + sd->svm_closure_weight = weight; } ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b) { - float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b)); - svm_node_closure_store_weight(sd, weight); + float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b)); + svm_node_closure_store_weight(sd, weight); } ccl_device void svm_node_closure_weight(ShaderData *sd, float *stack, uint weight_offset) { - float3 weight = stack_load_float3(stack, weight_offset); + float3 weight = stack_load_float3(stack, weight_offset); - svm_node_closure_store_weight(sd, weight); + svm_node_closure_store_weight(sd, weight); } -ccl_device void svm_node_emission_weight(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device void svm_node_emission_weight(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { - uint color_offset = node.y; - uint strength_offset = node.z; + uint color_offset = node.y; + uint strength_offset = node.z; - float strength = stack_load_float(stack, strength_offset); - float3 weight = stack_load_float3(stack, color_offset)*strength; + float strength = stack_load_float(stack, strength_offset); + float3 weight = stack_load_float3(stack, color_offset) * strength; - svm_node_closure_store_weight(sd, weight); + svm_node_closure_store_weight(sd, weight); } ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) { - /* fetch weight from blend input, previous mix closures, - * and write to stack to be used by closure nodes later */ - uint weight_offset, in_weight_offset, weight1_offset, weight2_offset; - decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset); + /* fetch weight from blend input, previous mix closures, + * and write to stack to be used by closure nodes later */ + uint weight_offset, in_weight_offset, weight1_offset, weight2_offset; + decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset); - float weight = stack_load_float(stack, weight_offset); - weight = saturate(weight); + float weight = stack_load_float(stack, weight_offset); + weight = saturate(weight); - float in_weight = (stack_valid(in_weight_offset))? stack_load_float(stack, in_weight_offset): 1.0f; + float in_weight = (stack_valid(in_weight_offset)) ? stack_load_float(stack, in_weight_offset) : + 1.0f; - if(stack_valid(weight1_offset)) - stack_store_float(stack, weight1_offset, in_weight*(1.0f - weight)); - if(stack_valid(weight2_offset)) - stack_store_float(stack, weight2_offset, in_weight*weight); + if (stack_valid(weight1_offset)) + stack_store_float(stack, weight1_offset, in_weight * (1.0f - weight)); + if (stack_valid(weight2_offset)) + stack_store_float(stack, weight2_offset, in_weight * weight); } /* (Bump) normal */ -ccl_device void svm_node_set_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal) +ccl_device void svm_node_set_normal( + KernelGlobals *kg, ShaderData *sd, float *stack, uint in_direction, uint out_normal) { - float3 normal = stack_load_float3(stack, in_direction); - sd->N = normal; - stack_store_float3(stack, out_normal, normal); + float3 normal = stack_load_float3(stack, in_direction); + sd->N = normal; + stack_store_float3(stack, out_normal, normal); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h index d5945f915c6..12b59d2616b 100644 --- a/intern/cycles/kernel/svm/svm_color_util.h +++ b/intern/cycles/kernel/svm/svm_color_util.h @@ -18,288 +18,310 @@ CCL_NAMESPACE_BEGIN ccl_device float3 svm_mix_blend(float t, float3 col1, float3 col2) { - return interp(col1, col2, t); + return interp(col1, col2, t); } ccl_device float3 svm_mix_add(float t, float3 col1, float3 col2) { - return interp(col1, col1 + col2, t); + return interp(col1, col1 + col2, t); } ccl_device float3 svm_mix_mul(float t, float3 col1, float3 col2) { - return interp(col1, col1 * col2, t); + return interp(col1, col1 * col2, t); } ccl_device float3 svm_mix_screen(float t, float3 col1, float3 col2) { - float tm = 1.0f - t; - float3 one = make_float3(1.0f, 1.0f, 1.0f); - float3 tm3 = make_float3(tm, tm, tm); + float tm = 1.0f - t; + float3 one = make_float3(1.0f, 1.0f, 1.0f); + float3 tm3 = make_float3(tm, tm, tm); - return one - (tm3 + t*(one - col2))*(one - col1); + return one - (tm3 + t * (one - col2)) * (one - col1); } ccl_device float3 svm_mix_overlay(float t, float3 col1, float3 col2) { - float tm = 1.0f - t; + float tm = 1.0f - t; - float3 outcol = col1; + float3 outcol = col1; - if(outcol.x < 0.5f) - outcol.x *= tm + 2.0f*t*col2.x; - else - outcol.x = 1.0f - (tm + 2.0f*t*(1.0f - col2.x))*(1.0f - outcol.x); + if (outcol.x < 0.5f) + outcol.x *= tm + 2.0f * t * col2.x; + else + outcol.x = 1.0f - (tm + 2.0f * t * (1.0f - col2.x)) * (1.0f - outcol.x); - if(outcol.y < 0.5f) - outcol.y *= tm + 2.0f*t*col2.y; - else - outcol.y = 1.0f - (tm + 2.0f*t*(1.0f - col2.y))*(1.0f - outcol.y); + if (outcol.y < 0.5f) + outcol.y *= tm + 2.0f * t * col2.y; + else + outcol.y = 1.0f - (tm + 2.0f * t * (1.0f - col2.y)) * (1.0f - outcol.y); - if(outcol.z < 0.5f) - outcol.z *= tm + 2.0f*t*col2.z; - else - outcol.z = 1.0f - (tm + 2.0f*t*(1.0f - col2.z))*(1.0f - outcol.z); + if (outcol.z < 0.5f) + outcol.z *= tm + 2.0f * t * col2.z; + else + outcol.z = 1.0f - (tm + 2.0f * t * (1.0f - col2.z)) * (1.0f - outcol.z); - return outcol; + return outcol; } ccl_device float3 svm_mix_sub(float t, float3 col1, float3 col2) { - return interp(col1, col1 - col2, t); + return interp(col1, col1 - col2, t); } ccl_device float3 svm_mix_div(float t, float3 col1, float3 col2) { - float tm = 1.0f - t; + float tm = 1.0f - t; - float3 outcol = col1; + float3 outcol = col1; - if(col2.x != 0.0f) outcol.x = tm*outcol.x + t*outcol.x/col2.x; - if(col2.y != 0.0f) outcol.y = tm*outcol.y + t*outcol.y/col2.y; - if(col2.z != 0.0f) outcol.z = tm*outcol.z + t*outcol.z/col2.z; + if (col2.x != 0.0f) + outcol.x = tm * outcol.x + t * outcol.x / col2.x; + if (col2.y != 0.0f) + outcol.y = tm * outcol.y + t * outcol.y / col2.y; + if (col2.z != 0.0f) + outcol.z = tm * outcol.z + t * outcol.z / col2.z; - return outcol; + return outcol; } ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2) { - return interp(col1, fabs(col1 - col2), t); + return interp(col1, fabs(col1 - col2), t); } ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2) { - return min(col1, col2)*t + col1*(1.0f - t); + return min(col1, col2) * t + col1 * (1.0f - t); } ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2) { - return max(col1, col2*t); + return max(col1, col2 * t); } ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2) { - float3 outcol = col1; - - if(outcol.x != 0.0f) { - float tmp = 1.0f - t*col2.x; - if(tmp <= 0.0f) - outcol.x = 1.0f; - else if((tmp = outcol.x/tmp) > 1.0f) - outcol.x = 1.0f; - else - outcol.x = tmp; - } - if(outcol.y != 0.0f) { - float tmp = 1.0f - t*col2.y; - if(tmp <= 0.0f) - outcol.y = 1.0f; - else if((tmp = outcol.y/tmp) > 1.0f) - outcol.y = 1.0f; - else - outcol.y = tmp; - } - if(outcol.z != 0.0f) { - float tmp = 1.0f - t*col2.z; - if(tmp <= 0.0f) - outcol.z = 1.0f; - else if((tmp = outcol.z/tmp) > 1.0f) - outcol.z = 1.0f; - else - outcol.z = tmp; - } - - return outcol; + float3 outcol = col1; + + if (outcol.x != 0.0f) { + float tmp = 1.0f - t * col2.x; + if (tmp <= 0.0f) + outcol.x = 1.0f; + else if ((tmp = outcol.x / tmp) > 1.0f) + outcol.x = 1.0f; + else + outcol.x = tmp; + } + if (outcol.y != 0.0f) { + float tmp = 1.0f - t * col2.y; + if (tmp <= 0.0f) + outcol.y = 1.0f; + else if ((tmp = outcol.y / tmp) > 1.0f) + outcol.y = 1.0f; + else + outcol.y = tmp; + } + if (outcol.z != 0.0f) { + float tmp = 1.0f - t * col2.z; + if (tmp <= 0.0f) + outcol.z = 1.0f; + else if ((tmp = outcol.z / tmp) > 1.0f) + outcol.z = 1.0f; + else + outcol.z = tmp; + } + + return outcol; } ccl_device float3 svm_mix_burn(float t, float3 col1, float3 col2) { - float tmp, tm = 1.0f - t; - - float3 outcol = col1; - - tmp = tm + t*col2.x; - if(tmp <= 0.0f) - outcol.x = 0.0f; - else if((tmp = (1.0f - (1.0f - outcol.x)/tmp)) < 0.0f) - outcol.x = 0.0f; - else if(tmp > 1.0f) - outcol.x = 1.0f; - else - outcol.x = tmp; - - tmp = tm + t*col2.y; - if(tmp <= 0.0f) - outcol.y = 0.0f; - else if((tmp = (1.0f - (1.0f - outcol.y)/tmp)) < 0.0f) - outcol.y = 0.0f; - else if(tmp > 1.0f) - outcol.y = 1.0f; - else - outcol.y = tmp; - - tmp = tm + t*col2.z; - if(tmp <= 0.0f) - outcol.z = 0.0f; - else if((tmp = (1.0f - (1.0f - outcol.z)/tmp)) < 0.0f) - outcol.z = 0.0f; - else if(tmp > 1.0f) - outcol.z = 1.0f; - else - outcol.z = tmp; - - return outcol; + float tmp, tm = 1.0f - t; + + float3 outcol = col1; + + tmp = tm + t * col2.x; + if (tmp <= 0.0f) + outcol.x = 0.0f; + else if ((tmp = (1.0f - (1.0f - outcol.x) / tmp)) < 0.0f) + outcol.x = 0.0f; + else if (tmp > 1.0f) + outcol.x = 1.0f; + else + outcol.x = tmp; + + tmp = tm + t * col2.y; + if (tmp <= 0.0f) + outcol.y = 0.0f; + else if ((tmp = (1.0f - (1.0f - outcol.y) / tmp)) < 0.0f) + outcol.y = 0.0f; + else if (tmp > 1.0f) + outcol.y = 1.0f; + else + outcol.y = tmp; + + tmp = tm + t * col2.z; + if (tmp <= 0.0f) + outcol.z = 0.0f; + else if ((tmp = (1.0f - (1.0f - outcol.z) / tmp)) < 0.0f) + outcol.z = 0.0f; + else if (tmp > 1.0f) + outcol.z = 1.0f; + else + outcol.z = tmp; + + return outcol; } ccl_device float3 svm_mix_hue(float t, float3 col1, float3 col2) { - float3 outcol = col1; + float3 outcol = col1; - float3 hsv2 = rgb_to_hsv(col2); + float3 hsv2 = rgb_to_hsv(col2); - if(hsv2.y != 0.0f) { - float3 hsv = rgb_to_hsv(outcol); - hsv.x = hsv2.x; - float3 tmp = hsv_to_rgb(hsv); + if (hsv2.y != 0.0f) { + float3 hsv = rgb_to_hsv(outcol); + hsv.x = hsv2.x; + float3 tmp = hsv_to_rgb(hsv); - outcol = interp(outcol, tmp, t); - } + outcol = interp(outcol, tmp, t); + } - return outcol; + return outcol; } ccl_device float3 svm_mix_sat(float t, float3 col1, float3 col2) { - float tm = 1.0f - t; + float tm = 1.0f - t; - float3 outcol = col1; + float3 outcol = col1; - float3 hsv = rgb_to_hsv(outcol); + float3 hsv = rgb_to_hsv(outcol); - if(hsv.y != 0.0f) { - float3 hsv2 = rgb_to_hsv(col2); + if (hsv.y != 0.0f) { + float3 hsv2 = rgb_to_hsv(col2); - hsv.y = tm*hsv.y + t*hsv2.y; - outcol = hsv_to_rgb(hsv); - } + hsv.y = tm * hsv.y + t * hsv2.y; + outcol = hsv_to_rgb(hsv); + } - return outcol; + return outcol; } ccl_device float3 svm_mix_val(float t, float3 col1, float3 col2) { - float tm = 1.0f - t; + float tm = 1.0f - t; - float3 hsv = rgb_to_hsv(col1); - float3 hsv2 = rgb_to_hsv(col2); + float3 hsv = rgb_to_hsv(col1); + float3 hsv2 = rgb_to_hsv(col2); - hsv.z = tm*hsv.z + t*hsv2.z; + hsv.z = tm * hsv.z + t * hsv2.z; - return hsv_to_rgb(hsv); + return hsv_to_rgb(hsv); } ccl_device float3 svm_mix_color(float t, float3 col1, float3 col2) { - float3 outcol = col1; - float3 hsv2 = rgb_to_hsv(col2); + float3 outcol = col1; + float3 hsv2 = rgb_to_hsv(col2); - if(hsv2.y != 0.0f) { - float3 hsv = rgb_to_hsv(outcol); - hsv.x = hsv2.x; - hsv.y = hsv2.y; - float3 tmp = hsv_to_rgb(hsv); + if (hsv2.y != 0.0f) { + float3 hsv = rgb_to_hsv(outcol); + hsv.x = hsv2.x; + hsv.y = hsv2.y; + float3 tmp = hsv_to_rgb(hsv); - outcol = interp(outcol, tmp, t); - } + outcol = interp(outcol, tmp, t); + } - return outcol; + return outcol; } ccl_device float3 svm_mix_soft(float t, float3 col1, float3 col2) { - float tm = 1.0f - t; + float tm = 1.0f - t; - float3 one = make_float3(1.0f, 1.0f, 1.0f); - float3 scr = one - (one - col2)*(one - col1); + float3 one = make_float3(1.0f, 1.0f, 1.0f); + float3 scr = one - (one - col2) * (one - col1); - return tm*col1 + t*((one - col1)*col2*col1 + col1*scr); + return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr); } ccl_device float3 svm_mix_linear(float t, float3 col1, float3 col2) { - return col1 + t*(2.0f*col2 + make_float3(-1.0f, -1.0f, -1.0f)); + return col1 + t * (2.0f * col2 + make_float3(-1.0f, -1.0f, -1.0f)); } ccl_device float3 svm_mix_clamp(float3 col) { - float3 outcol = col; + float3 outcol = col; - outcol.x = saturate(col.x); - outcol.y = saturate(col.y); - outcol.z = saturate(col.z); + outcol.x = saturate(col.x); + outcol.y = saturate(col.y); + outcol.z = saturate(col.z); - return outcol; + return outcol; } ccl_device_noinline float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2) { - float t = saturate(fac); - - switch(type) { - case NODE_MIX_BLEND: return svm_mix_blend(t, c1, c2); - case NODE_MIX_ADD: return svm_mix_add(t, c1, c2); - case NODE_MIX_MUL: return svm_mix_mul(t, c1, c2); - case NODE_MIX_SCREEN: return svm_mix_screen(t, c1, c2); - case NODE_MIX_OVERLAY: return svm_mix_overlay(t, c1, c2); - case NODE_MIX_SUB: return svm_mix_sub(t, c1, c2); - case NODE_MIX_DIV: return svm_mix_div(t, c1, c2); - case NODE_MIX_DIFF: return svm_mix_diff(t, c1, c2); - case NODE_MIX_DARK: return svm_mix_dark(t, c1, c2); - case NODE_MIX_LIGHT: return svm_mix_light(t, c1, c2); - case NODE_MIX_DODGE: return svm_mix_dodge(t, c1, c2); - case NODE_MIX_BURN: return svm_mix_burn(t, c1, c2); - case NODE_MIX_HUE: return svm_mix_hue(t, c1, c2); - case NODE_MIX_SAT: return svm_mix_sat(t, c1, c2); - case NODE_MIX_VAL: return svm_mix_val (t, c1, c2); - case NODE_MIX_COLOR: return svm_mix_color(t, c1, c2); - case NODE_MIX_SOFT: return svm_mix_soft(t, c1, c2); - case NODE_MIX_LINEAR: return svm_mix_linear(t, c1, c2); - case NODE_MIX_CLAMP: return svm_mix_clamp(c1); - } - - return make_float3(0.0f, 0.0f, 0.0f); + float t = saturate(fac); + + switch (type) { + case NODE_MIX_BLEND: + return svm_mix_blend(t, c1, c2); + case NODE_MIX_ADD: + return svm_mix_add(t, c1, c2); + case NODE_MIX_MUL: + return svm_mix_mul(t, c1, c2); + case NODE_MIX_SCREEN: + return svm_mix_screen(t, c1, c2); + case NODE_MIX_OVERLAY: + return svm_mix_overlay(t, c1, c2); + case NODE_MIX_SUB: + return svm_mix_sub(t, c1, c2); + case NODE_MIX_DIV: + return svm_mix_div(t, c1, c2); + case NODE_MIX_DIFF: + return svm_mix_diff(t, c1, c2); + case NODE_MIX_DARK: + return svm_mix_dark(t, c1, c2); + case NODE_MIX_LIGHT: + return svm_mix_light(t, c1, c2); + case NODE_MIX_DODGE: + return svm_mix_dodge(t, c1, c2); + case NODE_MIX_BURN: + return svm_mix_burn(t, c1, c2); + case NODE_MIX_HUE: + return svm_mix_hue(t, c1, c2); + case NODE_MIX_SAT: + return svm_mix_sat(t, c1, c2); + case NODE_MIX_VAL: + return svm_mix_val(t, c1, c2); + case NODE_MIX_COLOR: + return svm_mix_color(t, c1, c2); + case NODE_MIX_SOFT: + return svm_mix_soft(t, c1, c2); + case NODE_MIX_LINEAR: + return svm_mix_linear(t, c1, c2); + case NODE_MIX_CLAMP: + return svm_mix_clamp(c1); + } + + return make_float3(0.0f, 0.0f, 0.0f); } ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, float contrast) { - float a = 1.0f + contrast; - float b = brightness - contrast*0.5f; + float a = 1.0f + contrast; + float b = brightness - contrast * 0.5f; - color.x = max(a*color.x + b, 0.0f); - color.y = max(a*color.y + b, 0.0f); - color.z = max(a*color.z + b, 0.0f); + color.x = max(a * color.x + b, 0.0f); + color.y = max(a * color.y + b, 0.0f); + color.z = max(a * color.z + b, 0.0f); - return color; + return color; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_convert.h b/intern/cycles/kernel/svm/svm_convert.h index 63b1dc6865e..5df6c9fb755 100644 --- a/intern/cycles/kernel/svm/svm_convert.h +++ b/intern/cycles/kernel/svm/svm_convert.h @@ -18,54 +18,55 @@ CCL_NAMESPACE_BEGIN /* Conversion Nodes */ -ccl_device void svm_node_convert(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to) +ccl_device void svm_node_convert( + KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint from, uint to) { - switch(type) { - case NODE_CONVERT_FI: { - float f = stack_load_float(stack, from); - stack_store_int(stack, to, float_to_int(f)); - break; - } - case NODE_CONVERT_FV: { - float f = stack_load_float(stack, from); - stack_store_float3(stack, to, make_float3(f, f, f)); - break; - } - case NODE_CONVERT_CF: { - float3 f = stack_load_float3(stack, from); - float g = linear_rgb_to_gray(kg, f); - stack_store_float(stack, to, g); - break; - } - case NODE_CONVERT_CI: { - float3 f = stack_load_float3(stack, from); - int i = (int)linear_rgb_to_gray(kg, f); - stack_store_int(stack, to, i); - break; - } - case NODE_CONVERT_VF: { - float3 f = stack_load_float3(stack, from); - float g = average(f); - stack_store_float(stack, to, g); - break; - } - case NODE_CONVERT_VI: { - float3 f = stack_load_float3(stack, from); - int i = (int)average(f); - stack_store_int(stack, to, i); - break; - } - case NODE_CONVERT_IF: { - float f = (float)stack_load_int(stack, from); - stack_store_float(stack, to, f); - break; - } - case NODE_CONVERT_IV: { - float f = (float)stack_load_int(stack, from); - stack_store_float3(stack, to, make_float3(f, f, f)); - break; - } - } + switch (type) { + case NODE_CONVERT_FI: { + float f = stack_load_float(stack, from); + stack_store_int(stack, to, float_to_int(f)); + break; + } + case NODE_CONVERT_FV: { + float f = stack_load_float(stack, from); + stack_store_float3(stack, to, make_float3(f, f, f)); + break; + } + case NODE_CONVERT_CF: { + float3 f = stack_load_float3(stack, from); + float g = linear_rgb_to_gray(kg, f); + stack_store_float(stack, to, g); + break; + } + case NODE_CONVERT_CI: { + float3 f = stack_load_float3(stack, from); + int i = (int)linear_rgb_to_gray(kg, f); + stack_store_int(stack, to, i); + break; + } + case NODE_CONVERT_VF: { + float3 f = stack_load_float3(stack, from); + float g = average(f); + stack_store_float(stack, to, g); + break; + } + case NODE_CONVERT_VI: { + float3 f = stack_load_float3(stack, from); + int i = (int)average(f); + stack_store_int(stack, to, i); + break; + } + case NODE_CONVERT_IF: { + float f = (float)stack_load_int(stack, from); + stack_store_float(stack, to, f); + break; + } + case NODE_CONVERT_IV: { + float f = (float)stack_load_int(stack, from); + stack_store_float3(stack, to, make_float3(f, f, f)); + break; + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h index a69c9fe81f9..f16664a684c 100644 --- a/intern/cycles/kernel/svm/svm_displace.h +++ b/intern/cycles/kernel/svm/svm_displace.h @@ -21,144 +21,149 @@ CCL_NAMESPACE_BEGIN ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { #ifdef __RAY_DIFFERENTIALS__ - /* get normal input */ - uint normal_offset, scale_offset, invert, use_object_space; - decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space); + /* get normal input */ + uint normal_offset, scale_offset, invert, use_object_space; + decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space); - float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; + float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; - float3 dPdx = sd->dP.dx; - float3 dPdy = sd->dP.dy; + float3 dPdx = sd->dP.dx; + float3 dPdy = sd->dP.dy; - if(use_object_space) { - object_inverse_normal_transform(kg, sd, &normal_in); - object_inverse_dir_transform(kg, sd, &dPdx); - object_inverse_dir_transform(kg, sd, &dPdy); - } + if (use_object_space) { + object_inverse_normal_transform(kg, sd, &normal_in); + object_inverse_dir_transform(kg, sd, &dPdx); + object_inverse_dir_transform(kg, sd, &dPdy); + } - /* get surface tangents from normal */ - float3 Rx = cross(dPdy, normal_in); - float3 Ry = cross(normal_in, dPdx); + /* get surface tangents from normal */ + float3 Rx = cross(dPdy, normal_in); + float3 Ry = cross(normal_in, dPdx); - /* get bump values */ - uint c_offset, x_offset, y_offset, strength_offset; - decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset); + /* get bump values */ + uint c_offset, x_offset, y_offset, strength_offset; + decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset); - float h_c = stack_load_float(stack, c_offset); - float h_x = stack_load_float(stack, x_offset); - float h_y = stack_load_float(stack, y_offset); + float h_c = stack_load_float(stack, c_offset); + float h_x = stack_load_float(stack, x_offset); + float h_y = stack_load_float(stack, y_offset); - /* compute surface gradient and determinant */ - float det = dot(dPdx, Rx); - float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry; + /* compute surface gradient and determinant */ + float det = dot(dPdx, Rx); + float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry; - float absdet = fabsf(det); + float absdet = fabsf(det); - float strength = stack_load_float(stack, strength_offset); - float scale = stack_load_float(stack, scale_offset); + float strength = stack_load_float(stack, strength_offset); + float scale = stack_load_float(stack, scale_offset); - if(invert) - scale *= -1.0f; + if (invert) + scale *= -1.0f; - strength = max(strength, 0.0f); + strength = max(strength, 0.0f); - /* compute and output perturbed normal */ - float3 normal_out = safe_normalize(absdet*normal_in - scale*signf(det)*surfgrad); - if(is_zero(normal_out)) { - normal_out = normal_in; - } - else { - normal_out = normalize(strength*normal_out + (1.0f - strength)*normal_in); - } + /* compute and output perturbed normal */ + float3 normal_out = safe_normalize(absdet * normal_in - scale * signf(det) * surfgrad); + if (is_zero(normal_out)) { + normal_out = normal_in; + } + else { + normal_out = normalize(strength * normal_out + (1.0f - strength) * normal_in); + } - if(use_object_space) { - object_normal_transform(kg, sd, &normal_out); - } + if (use_object_space) { + object_normal_transform(kg, sd, &normal_out); + } - normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out); + normal_out = ensure_valid_reflection(sd->Ng, sd->I, normal_out); - stack_store_float3(stack, node.w, normal_out); + stack_store_float3(stack, node.w, normal_out); #endif } /* Displacement Node */ -ccl_device void svm_node_set_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint fac_offset) +ccl_device void svm_node_set_displacement(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint fac_offset) { - float3 dP = stack_load_float3(stack, fac_offset); - sd->P += dP; + float3 dP = stack_load_float3(stack, fac_offset); + sd->P += dP; } ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - uint height_offset, midlevel_offset, scale_offset, normal_offset; - decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset); - - float height = stack_load_float(stack, height_offset); - float midlevel = stack_load_float(stack, midlevel_offset); - float scale = stack_load_float(stack, scale_offset); - float3 normal = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; - uint space = node.w; - - float3 dP = normal; - - if(space == NODE_NORMAL_MAP_OBJECT) { - /* Object space. */ - object_inverse_normal_transform(kg, sd, &dP); - dP *= (height - midlevel) * scale; - object_dir_transform(kg, sd, &dP); - } - else { - /* World space. */ - dP *= (height - midlevel) * scale; - } - - stack_store_float3(stack, node.z, dP); + uint height_offset, midlevel_offset, scale_offset, normal_offset; + decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset); + + float height = stack_load_float(stack, height_offset); + float midlevel = stack_load_float(stack, midlevel_offset); + float scale = stack_load_float(stack, scale_offset); + float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; + uint space = node.w; + + float3 dP = normal; + + if (space == NODE_NORMAL_MAP_OBJECT) { + /* Object space. */ + object_inverse_normal_transform(kg, sd, &dP); + dP *= (height - midlevel) * scale; + object_dir_transform(kg, sd, &dP); + } + else { + /* World space. */ + dP *= (height - midlevel) * scale; + } + + stack_store_float3(stack, node.z, dP); } -ccl_device void svm_node_vector_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_vector_displacement( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint4 data_node = read_node(kg, offset); - uint space = data_node.x; - - uint vector_offset, midlevel_offset,scale_offset, displacement_offset; - decode_node_uchar4(node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset); - - float3 vector = stack_load_float3(stack, vector_offset); - float midlevel = stack_load_float(stack, midlevel_offset); - float scale = stack_load_float(stack, scale_offset); - float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale; - - if(space == NODE_NORMAL_MAP_TANGENT) { - /* Tangent space. */ - float3 normal = sd->N; - object_inverse_normal_transform(kg, sd, &normal); - - const AttributeDescriptor attr = find_attribute(kg, sd, node.z); - float3 tangent; - if(attr.offset != ATTR_STD_NOT_FOUND) { - tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL); - } - else { - tangent = normalize(sd->dPdu); - } - - float3 bitangent = normalize(cross(normal, tangent)); - const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); - if(attr_sign.offset != ATTR_STD_NOT_FOUND) { - float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL); - bitangent *= sign; - } - - dP = tangent*dP.x + normal*dP.y + bitangent*dP.z; - } - - if(space != NODE_NORMAL_MAP_WORLD) { - /* Tangent or object space. */ - object_dir_transform(kg, sd, &dP); - } - - stack_store_float3(stack, displacement_offset, dP); + uint4 data_node = read_node(kg, offset); + uint space = data_node.x; + + uint vector_offset, midlevel_offset, scale_offset, displacement_offset; + decode_node_uchar4( + node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset); + + float3 vector = stack_load_float3(stack, vector_offset); + float midlevel = stack_load_float(stack, midlevel_offset); + float scale = stack_load_float(stack, scale_offset); + float3 dP = (vector - make_float3(midlevel, midlevel, midlevel)) * scale; + + if (space == NODE_NORMAL_MAP_TANGENT) { + /* Tangent space. */ + float3 normal = sd->N; + object_inverse_normal_transform(kg, sd, &normal); + + const AttributeDescriptor attr = find_attribute(kg, sd, node.z); + float3 tangent; + if (attr.offset != ATTR_STD_NOT_FOUND) { + tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL); + } + else { + tangent = normalize(sd->dPdu); + } + + float3 bitangent = normalize(cross(normal, tangent)); + const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); + if (attr_sign.offset != ATTR_STD_NOT_FOUND) { + float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL); + bitangent *= sign; + } + + dP = tangent * dP.x + normal * dP.y + bitangent * dP.z; + } + + if (space != NODE_NORMAL_MAP_WORLD) { + /* Tangent or object space. */ + object_dir_transform(kg, sd, &dP); + } + + stack_store_float3(stack, displacement_offset, dP); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h index 99dda5fb170..03119991597 100644 --- a/intern/cycles/kernel/svm/svm_fresnel.h +++ b/intern/cycles/kernel/svm/svm_fresnel.h @@ -18,56 +18,60 @@ CCL_NAMESPACE_BEGIN /* Fresnel Node */ -ccl_device void svm_node_fresnel(ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node) +ccl_device void svm_node_fresnel( + ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node) { - uint normal_offset, out_offset; - decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL); - float eta = (stack_valid(ior_offset))? stack_load_float(stack, ior_offset): __uint_as_float(ior_value); - float3 normal_in = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; + uint normal_offset, out_offset; + decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL); + float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) : + __uint_as_float(ior_value); + float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; - eta = fmaxf(eta, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; + eta = fmaxf(eta, 1e-5f); + eta = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta; - float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); + float f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); - stack_store_float(stack, out_offset, f); + stack_store_float(stack, out_offset, f); } /* Layer Weight Node */ ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node) { - uint blend_offset = node.y; - uint blend_value = node.z; + uint blend_offset = node.y; + uint blend_value = node.z; - uint type, normal_offset, out_offset; - decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL); + uint type, normal_offset, out_offset; + decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL); - float blend = (stack_valid(blend_offset))? stack_load_float(stack, blend_offset): __uint_as_float(blend_value); - float3 normal_in = (stack_valid(normal_offset))? stack_load_float3(stack, normal_offset): sd->N; + float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) : + __uint_as_float(blend_value); + float3 normal_in = (stack_valid(normal_offset)) ? stack_load_float3(stack, normal_offset) : + sd->N; - float f; + float f; - if(type == NODE_LAYER_WEIGHT_FRESNEL) { - float eta = fmaxf(1.0f - blend, 1e-5f); - eta = (sd->flag & SD_BACKFACING)? eta: 1.0f/eta; + if (type == NODE_LAYER_WEIGHT_FRESNEL) { + float eta = fmaxf(1.0f - blend, 1e-5f); + eta = (sd->flag & SD_BACKFACING) ? eta : 1.0f / eta; - f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); - } - else { - f = fabsf(dot(sd->I, normal_in)); + f = fresnel_dielectric_cos(dot(sd->I, normal_in), eta); + } + else { + f = fabsf(dot(sd->I, normal_in)); - if(blend != 0.5f) { - blend = clamp(blend, 0.0f, 1.0f-1e-5f); - blend = (blend < 0.5f)? 2.0f*blend: 0.5f/(1.0f - blend); + if (blend != 0.5f) { + blend = clamp(blend, 0.0f, 1.0f - 1e-5f); + blend = (blend < 0.5f) ? 2.0f * blend : 0.5f / (1.0f - blend); - f = powf(f, blend); - } + f = powf(f, blend); + } - f = 1.0f - f; - } + f = 1.0f - f; + } - stack_store_float(stack, out_offset, f); + stack_store_float(stack, out_offset, f); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_gamma.h b/intern/cycles/kernel/svm/svm_gamma.h index 171945a60bc..65eb08eb0eb 100644 --- a/intern/cycles/kernel/svm/svm_gamma.h +++ b/intern/cycles/kernel/svm/svm_gamma.h @@ -16,15 +16,16 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_gamma(ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color) +ccl_device void svm_node_gamma( + ShaderData *sd, float *stack, uint in_gamma, uint in_color, uint out_color) { - float3 color = stack_load_float3(stack, in_color); - float gamma = stack_load_float(stack, in_gamma); + float3 color = stack_load_float3(stack, in_color); + float gamma = stack_load_float(stack, in_gamma); - color = svm_math_gamma_color(color, gamma); + color = svm_math_gamma_color(color, gamma); - if(stack_valid(out_color)) - stack_store_float3(stack, out_color, color); + if (stack_valid(out_color)) + stack_store_float3(stack, out_color, color); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index 05443772505..a9104643299 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -18,192 +18,217 @@ CCL_NAMESPACE_BEGIN /* Geometry Node */ -ccl_device_inline void svm_node_geometry(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint type, - uint out_offset) +ccl_device_inline void svm_node_geometry( + KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { - float3 data; - - switch(type) { - case NODE_GEOM_P: data = sd->P; break; - case NODE_GEOM_N: data = sd->N; break; + float3 data; + + switch (type) { + case NODE_GEOM_P: + data = sd->P; + break; + case NODE_GEOM_N: + data = sd->N; + break; #ifdef __DPDU__ - case NODE_GEOM_T: data = primitive_tangent(kg, sd); break; + case NODE_GEOM_T: + data = primitive_tangent(kg, sd); + break; #endif - case NODE_GEOM_I: data = sd->I; break; - case NODE_GEOM_Ng: data = sd->Ng; break; + case NODE_GEOM_I: + data = sd->I; + break; + case NODE_GEOM_Ng: + data = sd->Ng; + break; #ifdef __UV__ - case NODE_GEOM_uv: data = make_float3(sd->u, sd->v, 0.0f); break; + case NODE_GEOM_uv: + data = make_float3(sd->u, sd->v, 0.0f); + break; #endif - default: data = make_float3(0.0f, 0.0f, 0.0f); - } + default: + data = make_float3(0.0f, 0.0f, 0.0f); + } - stack_store_float3(stack, out_offset, data); + stack_store_float3(stack, out_offset, data); } -ccl_device void svm_node_geometry_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device void svm_node_geometry_bump_dx( + KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { #ifdef __RAY_DIFFERENTIALS__ - float3 data; - - switch(type) { - case NODE_GEOM_P: data = sd->P + sd->dP.dx; break; - case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); break; - default: svm_node_geometry(kg, sd, stack, type, out_offset); return; - } - - stack_store_float3(stack, out_offset, data); + float3 data; + + switch (type) { + case NODE_GEOM_P: + data = sd->P + sd->dP.dx; + break; + case NODE_GEOM_uv: + data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f); + break; + default: + svm_node_geometry(kg, sd, stack, type, out_offset); + return; + } + + stack_store_float3(stack, out_offset, data); #else - svm_node_geometry(kg, sd, stack, type, out_offset); + svm_node_geometry(kg, sd, stack, type, out_offset); #endif } -ccl_device void svm_node_geometry_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device void svm_node_geometry_bump_dy( + KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { #ifdef __RAY_DIFFERENTIALS__ - float3 data; - - switch(type) { - case NODE_GEOM_P: data = sd->P + sd->dP.dy; break; - case NODE_GEOM_uv: data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); break; - default: svm_node_geometry(kg, sd, stack, type, out_offset); return; - } - - stack_store_float3(stack, out_offset, data); + float3 data; + + switch (type) { + case NODE_GEOM_P: + data = sd->P + sd->dP.dy; + break; + case NODE_GEOM_uv: + data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f); + break; + default: + svm_node_geometry(kg, sd, stack, type, out_offset); + return; + } + + stack_store_float3(stack, out_offset, data); #else - svm_node_geometry(kg, sd, stack, type, out_offset); + svm_node_geometry(kg, sd, stack, type, out_offset); #endif } /* Object Info */ -ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) +ccl_device void svm_node_object_info( + KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { - float data; - - switch(type) { - case NODE_INFO_OB_LOCATION: { - stack_store_float3(stack, out_offset, object_location(kg, sd)); - return; - } - case NODE_INFO_OB_INDEX: data = object_pass_id(kg, sd->object); break; - case NODE_INFO_MAT_INDEX: data = shader_pass_id(kg, sd); break; - case NODE_INFO_OB_RANDOM: { - if(sd->lamp != LAMP_NONE) { - data = lamp_random_number(kg, sd->lamp); - } - else { - data = object_random_number(kg, sd->object); - } - break; - } - default: data = 0.0f; break; - } - - stack_store_float(stack, out_offset, data); + float data; + + switch (type) { + case NODE_INFO_OB_LOCATION: { + stack_store_float3(stack, out_offset, object_location(kg, sd)); + return; + } + case NODE_INFO_OB_INDEX: + data = object_pass_id(kg, sd->object); + break; + case NODE_INFO_MAT_INDEX: + data = shader_pass_id(kg, sd); + break; + case NODE_INFO_OB_RANDOM: { + if (sd->lamp != LAMP_NONE) { + data = lamp_random_number(kg, sd->lamp); + } + else { + data = object_random_number(kg, sd->object); + } + break; + } + default: + data = 0.0f; + break; + } + + stack_store_float(stack, out_offset, data); } /* Particle Info */ -ccl_device void svm_node_particle_info(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint type, - uint out_offset) +ccl_device void svm_node_particle_info( + KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { - switch(type) { - case NODE_INFO_PAR_INDEX: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float(stack, out_offset, particle_index(kg, particle_id)); - break; - } - case NODE_INFO_PAR_RANDOM: { - int particle_id = object_particle_id(kg, sd->object); - float random = hash_int_01(particle_index(kg, particle_id)); - stack_store_float(stack, out_offset, random); - break; - } - case NODE_INFO_PAR_AGE: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float(stack, out_offset, particle_age(kg, particle_id)); - break; - } - case NODE_INFO_PAR_LIFETIME: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id)); - break; - } - case NODE_INFO_PAR_LOCATION: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float3(stack, out_offset, particle_location(kg, particle_id)); - break; - } -#if 0 /* XXX float4 currently not supported in SVM stack */ - case NODE_INFO_PAR_ROTATION: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id)); - break; - } + switch (type) { + case NODE_INFO_PAR_INDEX: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float(stack, out_offset, particle_index(kg, particle_id)); + break; + } + case NODE_INFO_PAR_RANDOM: { + int particle_id = object_particle_id(kg, sd->object); + float random = hash_int_01(particle_index(kg, particle_id)); + stack_store_float(stack, out_offset, random); + break; + } + case NODE_INFO_PAR_AGE: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float(stack, out_offset, particle_age(kg, particle_id)); + break; + } + case NODE_INFO_PAR_LIFETIME: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id)); + break; + } + case NODE_INFO_PAR_LOCATION: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float3(stack, out_offset, particle_location(kg, particle_id)); + break; + } +#if 0 /* XXX float4 currently not supported in SVM stack */ + case NODE_INFO_PAR_ROTATION: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id)); + break; + } #endif - case NODE_INFO_PAR_SIZE: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float(stack, out_offset, particle_size(kg, particle_id)); - break; - } - case NODE_INFO_PAR_VELOCITY: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id)); - break; - } - case NODE_INFO_PAR_ANGULAR_VELOCITY: { - int particle_id = object_particle_id(kg, sd->object); - stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id)); - break; - } - } + case NODE_INFO_PAR_SIZE: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float(stack, out_offset, particle_size(kg, particle_id)); + break; + } + case NODE_INFO_PAR_VELOCITY: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id)); + break; + } + case NODE_INFO_PAR_ANGULAR_VELOCITY: { + int particle_id = object_particle_id(kg, sd->object); + stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id)); + break; + } + } } #ifdef __HAIR__ /* Hair Info */ -ccl_device void svm_node_hair_info(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint type, - uint out_offset) +ccl_device void svm_node_hair_info( + KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset) { - float data; - float3 data3; - - switch(type) { - case NODE_INFO_CURVE_IS_STRAND: { - data = (sd->type & PRIMITIVE_ALL_CURVE) != 0; - stack_store_float(stack, out_offset, data); - break; - } - case NODE_INFO_CURVE_INTERCEPT: - break; /* handled as attribute */ - case NODE_INFO_CURVE_RANDOM: - break; /* handled as attribute */ - case NODE_INFO_CURVE_THICKNESS: { - data = curve_thickness(kg, sd); - stack_store_float(stack, out_offset, data); - break; - } - /*case NODE_INFO_CURVE_FADE: { - data = sd->curve_transparency; - stack_store_float(stack, out_offset, data); - break; - }*/ - case NODE_INFO_CURVE_TANGENT_NORMAL: { - data3 = curve_tangent_normal(kg, sd); - stack_store_float3(stack, out_offset, data3); - break; - } - } + float data; + float3 data3; + + switch (type) { + case NODE_INFO_CURVE_IS_STRAND: { + data = (sd->type & PRIMITIVE_ALL_CURVE) != 0; + stack_store_float(stack, out_offset, data); + break; + } + case NODE_INFO_CURVE_INTERCEPT: + break; /* handled as attribute */ + case NODE_INFO_CURVE_RANDOM: + break; /* handled as attribute */ + case NODE_INFO_CURVE_THICKNESS: { + data = curve_thickness(kg, sd); + stack_store_float(stack, out_offset, data); + break; + } + /*case NODE_INFO_CURVE_FADE: { + data = sd->curve_transparency; + stack_store_float(stack, out_offset, data); + break; + }*/ + case NODE_INFO_CURVE_TANGENT_NORMAL: { + data3 = curve_tangent_normal(kg, sd); + stack_store_float3(stack, out_offset, data3); + break; + } + } } #endif diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h index 177e0506dee..c315564fbc2 100644 --- a/intern/cycles/kernel/svm/svm_gradient.h +++ b/intern/cycles/kernel/svm/svm_gradient.h @@ -20,61 +20,61 @@ CCL_NAMESPACE_BEGIN ccl_device float svm_gradient(float3 p, NodeGradientType type) { - float x, y, z; + float x, y, z; - x = p.x; - y = p.y; - z = p.z; + x = p.x; + y = p.y; + z = p.z; - if(type == NODE_BLEND_LINEAR) { - return x; - } - else if(type == NODE_BLEND_QUADRATIC) { - float r = fmaxf(x, 0.0f); - return r*r; - } - else if(type == NODE_BLEND_EASING) { - float r = fminf(fmaxf(x, 0.0f), 1.0f); - float t = r*r; + if (type == NODE_BLEND_LINEAR) { + return x; + } + else if (type == NODE_BLEND_QUADRATIC) { + float r = fmaxf(x, 0.0f); + return r * r; + } + else if (type == NODE_BLEND_EASING) { + float r = fminf(fmaxf(x, 0.0f), 1.0f); + float t = r * r; - return (3.0f*t - 2.0f*t*r); - } - else if(type == NODE_BLEND_DIAGONAL) { - return (x + y) * 0.5f; - } - else if(type == NODE_BLEND_RADIAL) { - return atan2f(y, x) / M_2PI_F + 0.5f; - } - else { - /* Bias a little bit for the case where p is a unit length vector, - * to get exactly zero instead of a small random value depending - * on float precision. */ - float r = fmaxf(0.999999f - sqrtf(x*x + y*y + z*z), 0.0f); + return (3.0f * t - 2.0f * t * r); + } + else if (type == NODE_BLEND_DIAGONAL) { + return (x + y) * 0.5f; + } + else if (type == NODE_BLEND_RADIAL) { + return atan2f(y, x) / M_2PI_F + 0.5f; + } + else { + /* Bias a little bit for the case where p is a unit length vector, + * to get exactly zero instead of a small random value depending + * on float precision. */ + float r = fmaxf(0.999999f - sqrtf(x * x + y * y + z * z), 0.0f); - if(type == NODE_BLEND_QUADRATIC_SPHERE) - return r*r; - else if(type == NODE_BLEND_SPHERICAL) - return r; - } + if (type == NODE_BLEND_QUADRATIC_SPHERE) + return r * r; + else if (type == NODE_BLEND_SPHERICAL) + return r; + } - return 0.0f; + return 0.0f; } ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node) { - uint type, co_offset, color_offset, fac_offset; + uint type, co_offset, color_offset, fac_offset; - decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset); + decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset); - float3 co = stack_load_float3(stack, co_offset); + float3 co = stack_load_float3(stack, co_offset); - float f = svm_gradient(co, (NodeGradientType)type); - f = saturate(f); + float f = svm_gradient(co, (NodeGradientType)type); + f = saturate(f); - if(stack_valid(fac_offset)) - stack_store_float(stack, fac_offset, f); - if(stack_valid(color_offset)) - stack_store_float3(stack, color_offset, make_float3(f, f, f)); + if (stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, f); + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, make_float3(f, f, f)); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h index 6f3efa639e2..72379fba870 100644 --- a/intern/cycles/kernel/svm/svm_hsv.h +++ b/intern/cycles/kernel/svm/svm_hsv.h @@ -19,43 +19,44 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_hsv( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint in_color_offset, fac_offset, out_color_offset; - uint hue_offset, sat_offset, val_offset; - decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL); - decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL); + uint in_color_offset, fac_offset, out_color_offset; + uint hue_offset, sat_offset, val_offset; + decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL); + decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL); - float fac = stack_load_float(stack, fac_offset); - float3 in_color = stack_load_float3(stack, in_color_offset); - float3 color = in_color; + float fac = stack_load_float(stack, fac_offset); + float3 in_color = stack_load_float3(stack, in_color_offset); + float3 color = in_color; - float hue = stack_load_float(stack, hue_offset); - float sat = stack_load_float(stack, sat_offset); - float val = stack_load_float(stack, val_offset); + float hue = stack_load_float(stack, hue_offset); + float sat = stack_load_float(stack, sat_offset); + float val = stack_load_float(stack, val_offset); - color = rgb_to_hsv(color); + color = rgb_to_hsv(color); - /* remember: fmod doesn't work for negative numbers here */ - color.x = fmodf(color.x + hue + 0.5f, 1.0f); - color.y = saturate(color.y * sat); - color.z *= val; + /* remember: fmod doesn't work for negative numbers here */ + color.x = fmodf(color.x + hue + 0.5f, 1.0f); + color.y = saturate(color.y * sat); + color.z *= val; - color = hsv_to_rgb(color); + color = hsv_to_rgb(color); - color.x = fac*color.x + (1.0f - fac)*in_color.x; - color.y = fac*color.y + (1.0f - fac)*in_color.y; - color.z = fac*color.z + (1.0f - fac)*in_color.z; + color.x = fac * color.x + (1.0f - fac) * in_color.x; + color.y = fac * color.y + (1.0f - fac) * in_color.y; + color.z = fac * color.z + (1.0f - fac) * in_color.z; - /* Clamp color to prevent negative values caused by oversaturation. */ - color.x = max(color.x, 0.0f); - color.y = max(color.y, 0.0f); - color.z = max(color.z, 0.0f); + /* Clamp color to prevent negative values caused by oversaturation. */ + color.x = max(color.x, 0.0f); + color.y = max(color.y, 0.0f); + color.z = max(color.z, 0.0f); - if(stack_valid(out_color_offset)) - stack_store_float3(stack, out_color_offset, color); + if (stack_valid(out_color_offset)) + stack_store_float3(stack, out_color_offset, color); } CCL_NAMESPACE_END -#endif /* __SVM_HSV_H__ */ +#endif /* __SVM_HSV_H__ */ diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h index 6130c3348b0..9434c0c5505 100644 --- a/intern/cycles/kernel/svm/svm_ies.h +++ b/intern/cycles/kernel/svm/svm_ies.h @@ -18,93 +18,102 @@ CCL_NAMESPACE_BEGIN /* IES Light */ -ccl_device_inline float interpolate_ies_vertical(KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h) +ccl_device_inline float interpolate_ies_vertical( + KernelGlobals *kg, int ofs, int v, int v_num, float v_frac, int h) { - /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v - * (corresponding to the north pole) would result in artifacts. - * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole, - * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation. - * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take - * the corresponding value at the current horizontal coordinate. */ - -#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs+h*v_num+(v)) - /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */ - float a = IES_LOOKUP((v == 0)? 1 : v-1); - float b = IES_LOOKUP(v); - float c = IES_LOOKUP(v+1); - float d = IES_LOOKUP(min(v+2, v_num-1)); + /* Since lookups are performed in spherical coordinates, clamping the coordinates at the low end of v + * (corresponding to the north pole) would result in artifacts. + * The proper way of dealing with this would be to lookup the corresponding value on the other side of the pole, + * but since the horizontal coordinates might be nonuniform, this would require yet another interpolation. + * Therefore, the assumtion is made that the light is going to be symmetrical, which means that we can just take + * the corresponding value at the current horizontal coordinate. */ + +#define IES_LOOKUP(v) kernel_tex_fetch(__ies, ofs + h * v_num + (v)) + /* If v is zero, assume symmetry and read at v=1 instead of v=-1. */ + float a = IES_LOOKUP((v == 0) ? 1 : v - 1); + float b = IES_LOOKUP(v); + float c = IES_LOOKUP(v + 1); + float d = IES_LOOKUP(min(v + 2, v_num - 1)); #undef IES_LOOKUP - return cubic_interp(a, b, c, d, v_frac); + return cubic_interp(a, b, c, d, v_frac); } -ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, int slot, float h_angle, float v_angle) +ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, + int slot, + float h_angle, + float v_angle) { - /* Find offset of the IES data in the table. */ - int ofs = __float_as_int(kernel_tex_fetch(__ies, slot)); - if(ofs == -1) { - return 100.0f; - } - - int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++)); - int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++)); - -#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs+(h)) -#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs+h_num+(v)) - - /* Check whether the angle is within the bounds of the IES texture. */ - if(v_angle >= IES_LOOKUP_ANGLE_V(v_num-1)) { - return 0.0f; - } - kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0)); - kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0)); - kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num-1)); - - /* Lookup the angles to find the table position. */ - int h_i, v_i; - /* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */ - for(h_i = 0; IES_LOOKUP_ANGLE_H(h_i+1) < h_angle; h_i++); - for(v_i = 0; IES_LOOKUP_ANGLE_V(v_i+1) < v_angle; v_i++); - - float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i+1), h_angle); - float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i+1), v_angle); + /* Find offset of the IES data in the table. */ + int ofs = __float_as_int(kernel_tex_fetch(__ies, slot)); + if (ofs == -1) { + return 100.0f; + } + + int h_num = __float_as_int(kernel_tex_fetch(__ies, ofs++)); + int v_num = __float_as_int(kernel_tex_fetch(__ies, ofs++)); + +#define IES_LOOKUP_ANGLE_H(h) kernel_tex_fetch(__ies, ofs + (h)) +#define IES_LOOKUP_ANGLE_V(v) kernel_tex_fetch(__ies, ofs + h_num + (v)) + + /* Check whether the angle is within the bounds of the IES texture. */ + if (v_angle >= IES_LOOKUP_ANGLE_V(v_num - 1)) { + return 0.0f; + } + kernel_assert(v_angle >= IES_LOOKUP_ANGLE_V(0)); + kernel_assert(h_angle >= IES_LOOKUP_ANGLE_H(0)); + kernel_assert(h_angle <= IES_LOOKUP_ANGLE_H(h_num - 1)); + + /* Lookup the angles to find the table position. */ + int h_i, v_i; + /* TODO(lukas): Consider using bisection. Probably not worth it for the vast majority of IES files. */ + for (h_i = 0; IES_LOOKUP_ANGLE_H(h_i + 1) < h_angle; h_i++) + ; + for (v_i = 0; IES_LOOKUP_ANGLE_V(v_i + 1) < v_angle; v_i++) + ; + + float h_frac = inverse_lerp(IES_LOOKUP_ANGLE_H(h_i), IES_LOOKUP_ANGLE_H(h_i + 1), h_angle); + float v_frac = inverse_lerp(IES_LOOKUP_ANGLE_V(v_i), IES_LOOKUP_ANGLE_V(v_i + 1), v_angle); #undef IES_LOOKUP_ANGLE_H #undef IES_LOOKUP_ANGLE_V - /* Skip forward to the actual intensity data. */ - ofs += h_num+v_num; - - /* Perform cubic interpolation along the horizontal coordinate to get the intensity value. - * If h_i is zero, just wrap around since the horizontal angles always go over the full circle. - * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */ - float a = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, (h_i == 0)? h_num-2 : h_i-1); - float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i); - float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i+1); - /* Same logic here, wrap around to the second element if necessary. */ - float d = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, (h_i+2 == h_num)? 1 : h_i+2); - - /* Cubic interpolation can result in negative values, so get rid of them. */ - return max(cubic_interp(a, b, c, d, h_frac), 0.0f); + /* Skip forward to the actual intensity data. */ + ofs += h_num + v_num; + + /* Perform cubic interpolation along the horizontal coordinate to get the intensity value. + * If h_i is zero, just wrap around since the horizontal angles always go over the full circle. + * However, the last entry (360°) equals the first one, so we need to wrap around to the one before that. */ + float a = interpolate_ies_vertical( + kg, ofs, v_i, v_num, v_frac, (h_i == 0) ? h_num - 2 : h_i - 1); + float b = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i); + float c = interpolate_ies_vertical(kg, ofs, v_i, v_num, v_frac, h_i + 1); + /* Same logic here, wrap around to the second element if necessary. */ + float d = interpolate_ies_vertical( + kg, ofs, v_i, v_num, v_frac, (h_i + 2 == h_num) ? 1 : h_i + 2); + + /* Cubic interpolation can result in negative values, so get rid of them. */ + return max(cubic_interp(a, b, c, d, h_frac), 0.0f); } -ccl_device void svm_node_ies(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_ies( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z; - decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy); + uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z; + decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy); - float3 vector = stack_load_float3(stack, vector_offset); - float strength = stack_load_float_default(stack, strength_offset, node.w); + float3 vector = stack_load_float3(stack, vector_offset); + float strength = stack_load_float_default(stack, strength_offset, node.w); - vector = normalize(vector); - float v_angle = safe_acosf(-vector.z); - float h_angle = atan2f(vector.x, vector.y) + M_PI_F; + vector = normalize(vector); + float v_angle = safe_acosf(-vector.z); + float h_angle = atan2f(vector.x, vector.y) + M_PI_F; - float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle); + float fac = strength * kernel_ies_interp(kg, slot, h_angle, v_angle); - if(stack_valid(fac_offset)) { - stack_store_float(stack, fac_offset, fac); - } + if (stack_valid(fac_offset)) { + stack_store_float(stack, fac_offset, fac); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 81ee79c984e..ee4b8b6e50c 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -16,190 +16,192 @@ CCL_NAMESPACE_BEGIN -ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha) +ccl_device float4 +svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha) { - float4 r = kernel_tex_image_interp(kg, id, x, y); - const float alpha = r.w; - - if(use_alpha && alpha != 1.0f && alpha != 0.0f) { - r /= alpha; - const int texture_type = kernel_tex_type(id); - if(texture_type == IMAGE_DATA_TYPE_BYTE4 || - texture_type == IMAGE_DATA_TYPE_BYTE) - { - r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f)); - } - r.w = alpha; - } - - if(srgb) { - /* TODO(lukas): Implement proper conversion for image textures. */ - r = color_srgb_to_linear_v4(r); - } - - return r; + float4 r = kernel_tex_image_interp(kg, id, x, y); + const float alpha = r.w; + + if (use_alpha && alpha != 1.0f && alpha != 0.0f) { + r /= alpha; + const int texture_type = kernel_tex_type(id); + if (texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) { + r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f)); + } + r.w = alpha; + } + + if (srgb) { + /* TODO(lukas): Implement proper conversion for image textures. */ + r = color_srgb_to_linear_v4(r); + } + + return r; } /* Remap coordnate from 0..1 box to -1..-1 */ ccl_device_inline float3 texco_remap_square(float3 co) { - return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f; + return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f; } ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - uint id = node.y; - uint co_offset, out_offset, alpha_offset, srgb; - - decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); - - float3 co = stack_load_float3(stack, co_offset); - float2 tex_co; - uint use_alpha = stack_valid(alpha_offset); - if(node.w == NODE_IMAGE_PROJ_SPHERE) { - co = texco_remap_square(co); - tex_co = map_to_sphere(co); - } - else if(node.w == NODE_IMAGE_PROJ_TUBE) { - co = texco_remap_square(co); - tex_co = map_to_tube(co); - } - else { - tex_co = make_float2(co.x, co.y); - } - float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha); - - if(stack_valid(out_offset)) - stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); - if(stack_valid(alpha_offset)) - stack_store_float(stack, alpha_offset, f.w); + uint id = node.y; + uint co_offset, out_offset, alpha_offset, srgb; + + decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); + + float3 co = stack_load_float3(stack, co_offset); + float2 tex_co; + uint use_alpha = stack_valid(alpha_offset); + if (node.w == NODE_IMAGE_PROJ_SPHERE) { + co = texco_remap_square(co); + tex_co = map_to_sphere(co); + } + else if (node.w == NODE_IMAGE_PROJ_TUBE) { + co = texco_remap_square(co); + tex_co = map_to_tube(co); + } + else { + tex_co = make_float2(co.x, co.y); + } + float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha); + + if (stack_valid(out_offset)) + stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); + if (stack_valid(alpha_offset)) + stack_store_float(stack, alpha_offset, f.w); } ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - /* get object space normal */ - float3 N = sd->N; - - N = sd->N; - object_inverse_normal_transform(kg, sd, &N); - - /* project from direction vector to barycentric coordinates in triangles */ - float3 signed_N = N; - - N.x = fabsf(N.x); - N.y = fabsf(N.y); - N.z = fabsf(N.z); - - N /= (N.x + N.y + N.z); - - /* basic idea is to think of this as a triangle, each corner representing - * one of the 3 faces of the cube. in the corners we have single textures, - * in between we blend between two textures, and in the middle we a blend - * between three textures. - * - * the Nxyz values are the barycentric coordinates in an equilateral - * triangle, which in case of blending, in the middle has a smaller - * equilateral triangle where 3 textures blend. this divides things into - * 7 zones, with an if() test for each zone */ - - float3 weight = make_float3(0.0f, 0.0f, 0.0f); - float blend = __int_as_float(node.w); - float limit = 0.5f*(1.0f + blend); - - /* first test for corners with single texture */ - if(N.x > limit*(N.x + N.y) && N.x > limit*(N.x + N.z)) { - weight.x = 1.0f; - } - else if(N.y > limit*(N.x + N.y) && N.y > limit*(N.y + N.z)) { - weight.y = 1.0f; - } - else if(N.z > limit*(N.x + N.z) && N.z > limit*(N.y + N.z)) { - weight.z = 1.0f; - } - else if(blend > 0.0f) { - /* in case of blending, test for mixes between two textures */ - if(N.z < (1.0f - limit)*(N.y + N.x)) { - weight.x = N.x/(N.x + N.y); - weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend); - weight.y = 1.0f - weight.x; - } - else if(N.x < (1.0f - limit)*(N.y + N.z)) { - weight.y = N.y/(N.y + N.z); - weight.y = saturate((weight.y - 0.5f*(1.0f - blend))/blend); - weight.z = 1.0f - weight.y; - } - else if(N.y < (1.0f - limit)*(N.x + N.z)) { - weight.x = N.x/(N.x + N.z); - weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend); - weight.z = 1.0f - weight.x; - } - else { - /* last case, we have a mix between three */ - weight.x = ((2.0f - limit)*N.x + (limit - 1.0f))/(2.0f*limit - 1.0f); - weight.y = ((2.0f - limit)*N.y + (limit - 1.0f))/(2.0f*limit - 1.0f); - weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f); - } - } - else { - /* Desperate mode, no valid choice anyway, fallback to one side.*/ - weight.x = 1.0f; - } - - /* now fetch textures */ - uint co_offset, out_offset, alpha_offset, srgb; - decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); - - float3 co = stack_load_float3(stack, co_offset); - uint id = node.y; - - float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - uint use_alpha = stack_valid(alpha_offset); - - /* Map so that no textures are flipped, rotation is somewhat arbitrary. */ - if(weight.x > 0.0f) { - float2 uv = make_float2((signed_N.x < 0.0f)? 1.0f - co.y: co.y, co.z); - f += weight.x*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); - } - if(weight.y > 0.0f) { - float2 uv = make_float2((signed_N.y > 0.0f)? 1.0f - co.x: co.x, co.z); - f += weight.y*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); - } - if(weight.z > 0.0f) { - float2 uv = make_float2((signed_N.z > 0.0f)? 1.0f - co.y: co.y, co.x); - f += weight.z*svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); - } - - if(stack_valid(out_offset)) - stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); - if(stack_valid(alpha_offset)) - stack_store_float(stack, alpha_offset, f.w); + /* get object space normal */ + float3 N = sd->N; + + N = sd->N; + object_inverse_normal_transform(kg, sd, &N); + + /* project from direction vector to barycentric coordinates in triangles */ + float3 signed_N = N; + + N.x = fabsf(N.x); + N.y = fabsf(N.y); + N.z = fabsf(N.z); + + N /= (N.x + N.y + N.z); + + /* basic idea is to think of this as a triangle, each corner representing + * one of the 3 faces of the cube. in the corners we have single textures, + * in between we blend between two textures, and in the middle we a blend + * between three textures. + * + * the Nxyz values are the barycentric coordinates in an equilateral + * triangle, which in case of blending, in the middle has a smaller + * equilateral triangle where 3 textures blend. this divides things into + * 7 zones, with an if() test for each zone */ + + float3 weight = make_float3(0.0f, 0.0f, 0.0f); + float blend = __int_as_float(node.w); + float limit = 0.5f * (1.0f + blend); + + /* first test for corners with single texture */ + if (N.x > limit * (N.x + N.y) && N.x > limit * (N.x + N.z)) { + weight.x = 1.0f; + } + else if (N.y > limit * (N.x + N.y) && N.y > limit * (N.y + N.z)) { + weight.y = 1.0f; + } + else if (N.z > limit * (N.x + N.z) && N.z > limit * (N.y + N.z)) { + weight.z = 1.0f; + } + else if (blend > 0.0f) { + /* in case of blending, test for mixes between two textures */ + if (N.z < (1.0f - limit) * (N.y + N.x)) { + weight.x = N.x / (N.x + N.y); + weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend); + weight.y = 1.0f - weight.x; + } + else if (N.x < (1.0f - limit) * (N.y + N.z)) { + weight.y = N.y / (N.y + N.z); + weight.y = saturate((weight.y - 0.5f * (1.0f - blend)) / blend); + weight.z = 1.0f - weight.y; + } + else if (N.y < (1.0f - limit) * (N.x + N.z)) { + weight.x = N.x / (N.x + N.z); + weight.x = saturate((weight.x - 0.5f * (1.0f - blend)) / blend); + weight.z = 1.0f - weight.x; + } + else { + /* last case, we have a mix between three */ + weight.x = ((2.0f - limit) * N.x + (limit - 1.0f)) / (2.0f * limit - 1.0f); + weight.y = ((2.0f - limit) * N.y + (limit - 1.0f)) / (2.0f * limit - 1.0f); + weight.z = ((2.0f - limit) * N.z + (limit - 1.0f)) / (2.0f * limit - 1.0f); + } + } + else { + /* Desperate mode, no valid choice anyway, fallback to one side.*/ + weight.x = 1.0f; + } + + /* now fetch textures */ + uint co_offset, out_offset, alpha_offset, srgb; + decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); + + float3 co = stack_load_float3(stack, co_offset); + uint id = node.y; + + float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + uint use_alpha = stack_valid(alpha_offset); + + /* Map so that no textures are flipped, rotation is somewhat arbitrary. */ + if (weight.x > 0.0f) { + float2 uv = make_float2((signed_N.x < 0.0f) ? 1.0f - co.y : co.y, co.z); + f += weight.x * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); + } + if (weight.y > 0.0f) { + float2 uv = make_float2((signed_N.y > 0.0f) ? 1.0f - co.x : co.x, co.z); + f += weight.y * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); + } + if (weight.z > 0.0f) { + float2 uv = make_float2((signed_N.z > 0.0f) ? 1.0f - co.y : co.y, co.x); + f += weight.z * svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); + } + + if (stack_valid(out_offset)) + stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); + if (stack_valid(alpha_offset)) + stack_store_float(stack, alpha_offset, f.w); } -ccl_device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device void svm_node_tex_environment(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { - uint id = node.y; - uint co_offset, out_offset, alpha_offset, srgb; - uint projection = node.w; + uint id = node.y; + uint co_offset, out_offset, alpha_offset, srgb; + uint projection = node.w; - decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); + decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb); - float3 co = stack_load_float3(stack, co_offset); - float2 uv; + float3 co = stack_load_float3(stack, co_offset); + float2 uv; - co = safe_normalize(co); + co = safe_normalize(co); - if(projection == 0) - uv = direction_to_equirectangular(co); - else - uv = direction_to_mirrorball(co); + if (projection == 0) + uv = direction_to_equirectangular(co); + else + uv = direction_to_mirrorball(co); - uint use_alpha = stack_valid(alpha_offset); - float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); + uint use_alpha = stack_valid(alpha_offset); + float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha); - if(stack_valid(out_offset)) - stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); - if(stack_valid(alpha_offset)) - stack_store_float(stack, alpha_offset, f.w); + if (stack_valid(out_offset)) + stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z)); + if (stack_valid(alpha_offset)) + stack_store_float(stack, alpha_offset, f.w); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_invert.h b/intern/cycles/kernel/svm/svm_invert.h index 57cc4281101..02024742b13 100644 --- a/intern/cycles/kernel/svm/svm_invert.h +++ b/intern/cycles/kernel/svm/svm_invert.h @@ -18,20 +18,21 @@ CCL_NAMESPACE_BEGIN ccl_device float invert(float color, float factor) { - return factor*(1.0f - color) + (1.0f - factor) * color; + return factor * (1.0f - color) + (1.0f - factor) * color; } -ccl_device void svm_node_invert(ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color) +ccl_device void svm_node_invert( + ShaderData *sd, float *stack, uint in_fac, uint in_color, uint out_color) { - float factor = stack_load_float(stack, in_fac); - float3 color = stack_load_float3(stack, in_color); + float factor = stack_load_float(stack, in_fac); + float3 color = stack_load_float3(stack, in_color); - color.x = invert(color.x, factor); - color.y = invert(color.y, factor); - color.z = invert(color.z, factor); + color.x = invert(color.x, factor); + color.y = invert(color.y, factor); + color.z = invert(color.z, factor); - if(stack_valid(out_color)) - stack_store_float3(stack, out_color, color); + if (stack_valid(out_color)) + stack_store_float3(stack, out_color, color); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index dd4390057cf..65a9a284a17 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -18,59 +18,99 @@ CCL_NAMESPACE_BEGIN /* Light Path Node */ -ccl_device void svm_node_light_path(ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint type, uint out_offset, int path_flag) +ccl_device void svm_node_light_path(ShaderData *sd, + ccl_addr_space PathState *state, + float *stack, + uint type, + uint out_offset, + int path_flag) { - float info = 0.0f; + float info = 0.0f; - switch(type) { - case NODE_LP_camera: info = (path_flag & PATH_RAY_CAMERA)? 1.0f: 0.0f; break; - case NODE_LP_shadow: info = (path_flag & PATH_RAY_SHADOW)? 1.0f: 0.0f; break; - case NODE_LP_diffuse: info = (path_flag & PATH_RAY_DIFFUSE)? 1.0f: 0.0f; break; - case NODE_LP_glossy: info = (path_flag & PATH_RAY_GLOSSY)? 1.0f: 0.0f; break; - case NODE_LP_singular: info = (path_flag & PATH_RAY_SINGULAR)? 1.0f: 0.0f; break; - case NODE_LP_reflection: info = (path_flag & PATH_RAY_REFLECT)? 1.0f: 0.0f; break; - case NODE_LP_transmission: info = (path_flag & PATH_RAY_TRANSMIT)? 1.0f: 0.0f; break; - case NODE_LP_volume_scatter: info = (path_flag & PATH_RAY_VOLUME_SCATTER)? 1.0f: 0.0f; break; - case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break; - case NODE_LP_ray_length: info = sd->ray_length; break; - case NODE_LP_ray_depth: info = (float)state->bounce; break; - case NODE_LP_ray_diffuse: info = (float)state->diffuse_bounce; break; - case NODE_LP_ray_glossy: info = (float)state->glossy_bounce; break; - case NODE_LP_ray_transparent: info = (float)state->transparent_bounce; break; - case NODE_LP_ray_transmission: info = (float)state->transmission_bounce; break; - } + switch (type) { + case NODE_LP_camera: + info = (path_flag & PATH_RAY_CAMERA) ? 1.0f : 0.0f; + break; + case NODE_LP_shadow: + info = (path_flag & PATH_RAY_SHADOW) ? 1.0f : 0.0f; + break; + case NODE_LP_diffuse: + info = (path_flag & PATH_RAY_DIFFUSE) ? 1.0f : 0.0f; + break; + case NODE_LP_glossy: + info = (path_flag & PATH_RAY_GLOSSY) ? 1.0f : 0.0f; + break; + case NODE_LP_singular: + info = (path_flag & PATH_RAY_SINGULAR) ? 1.0f : 0.0f; + break; + case NODE_LP_reflection: + info = (path_flag & PATH_RAY_REFLECT) ? 1.0f : 0.0f; + break; + case NODE_LP_transmission: + info = (path_flag & PATH_RAY_TRANSMIT) ? 1.0f : 0.0f; + break; + case NODE_LP_volume_scatter: + info = (path_flag & PATH_RAY_VOLUME_SCATTER) ? 1.0f : 0.0f; + break; + case NODE_LP_backfacing: + info = (sd->flag & SD_BACKFACING) ? 1.0f : 0.0f; + break; + case NODE_LP_ray_length: + info = sd->ray_length; + break; + case NODE_LP_ray_depth: + info = (float)state->bounce; + break; + case NODE_LP_ray_diffuse: + info = (float)state->diffuse_bounce; + break; + case NODE_LP_ray_glossy: + info = (float)state->glossy_bounce; + break; + case NODE_LP_ray_transparent: + info = (float)state->transparent_bounce; + break; + case NODE_LP_ray_transmission: + info = (float)state->transmission_bounce; + break; + } - stack_store_float(stack, out_offset, info); + stack_store_float(stack, out_offset, info); } /* Light Falloff Node */ ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node) { - uint strength_offset, out_offset, smooth_offset; + uint strength_offset, out_offset, smooth_offset; - decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL); + decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL); - float strength = stack_load_float(stack, strength_offset); - uint type = node.y; + float strength = stack_load_float(stack, strength_offset); + uint type = node.y; - switch(type) { - case NODE_LIGHT_FALLOFF_QUADRATIC: break; - case NODE_LIGHT_FALLOFF_LINEAR: strength *= sd->ray_length; break; - case NODE_LIGHT_FALLOFF_CONSTANT: strength *= sd->ray_length*sd->ray_length; break; - } + switch (type) { + case NODE_LIGHT_FALLOFF_QUADRATIC: + break; + case NODE_LIGHT_FALLOFF_LINEAR: + strength *= sd->ray_length; + break; + case NODE_LIGHT_FALLOFF_CONSTANT: + strength *= sd->ray_length * sd->ray_length; + break; + } - float smooth = stack_load_float(stack, smooth_offset); + float smooth = stack_load_float(stack, smooth_offset); - if(smooth > 0.0f) { - float squared = sd->ray_length*sd->ray_length; - /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */ - if(isfinite(squared)) { - strength *= squared/(smooth + squared); - } - } + if (smooth > 0.0f) { + float squared = sd->ray_length * sd->ray_length; + /* Distant lamps set the ray length to FLT_MAX, which causes squared to overflow. */ + if (isfinite(squared)) { + strength *= squared / (smooth + squared); + } + } - stack_store_float(stack, out_offset, strength); + stack_store_float(stack, out_offset, strength); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h index 6afaff37acd..115d2e2fe4b 100644 --- a/intern/cycles/kernel/svm/svm_magic.h +++ b/intern/cycles/kernel/svm/svm_magic.h @@ -20,92 +20,93 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline float3 svm_magic(float3 p, int n, float distortion) { - float x = sinf((p.x + p.y + p.z)*5.0f); - float y = cosf((-p.x + p.y - p.z)*5.0f); - float z = -cosf((-p.x - p.y + p.z)*5.0f); - - if(n > 0) { - x *= distortion; - y *= distortion; - z *= distortion; - y = -cosf(x-y+z); - y *= distortion; - - if(n > 1) { - x = cosf(x-y-z); - x *= distortion; - - if(n > 2) { - z = sinf(-x-y-z); - z *= distortion; - - if(n > 3) { - x = -cosf(-x+y-z); - x *= distortion; - - if(n > 4) { - y = -sinf(-x+y+z); - y *= distortion; - - if(n > 5) { - y = -cosf(-x+y+z); - y *= distortion; - - if(n > 6) { - x = cosf(x+y+z); - x *= distortion; - - if(n > 7) { - z = sinf(x+y-z); - z *= distortion; - - if(n > 8) { - x = -cosf(-x-y+z); - x *= distortion; - - if(n > 9) { - y = -sinf(x-y+z); - y *= distortion; - } - } - } - } - } - } - } - } - } - } - - if(distortion != 0.0f) { - distortion *= 2.0f; - x /= distortion; - y /= distortion; - z /= distortion; - } - - return make_float3(0.5f - x, 0.5f - y, 0.5f - z); + float x = sinf((p.x + p.y + p.z) * 5.0f); + float y = cosf((-p.x + p.y - p.z) * 5.0f); + float z = -cosf((-p.x - p.y + p.z) * 5.0f); + + if (n > 0) { + x *= distortion; + y *= distortion; + z *= distortion; + y = -cosf(x - y + z); + y *= distortion; + + if (n > 1) { + x = cosf(x - y - z); + x *= distortion; + + if (n > 2) { + z = sinf(-x - y - z); + z *= distortion; + + if (n > 3) { + x = -cosf(-x + y - z); + x *= distortion; + + if (n > 4) { + y = -sinf(-x + y + z); + y *= distortion; + + if (n > 5) { + y = -cosf(-x + y + z); + y *= distortion; + + if (n > 6) { + x = cosf(x + y + z); + x *= distortion; + + if (n > 7) { + z = sinf(x + y - z); + z *= distortion; + + if (n > 8) { + x = -cosf(-x - y + z); + x *= distortion; + + if (n > 9) { + y = -sinf(x - y + z); + y *= distortion; + } + } + } + } + } + } + } + } + } + } + + if (distortion != 0.0f) { + distortion *= 2.0f; + x /= distortion; + y /= distortion; + z /= distortion; + } + + return make_float3(0.5f - x, 0.5f - y, 0.5f - z); } -ccl_device void svm_node_tex_magic(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_tex_magic( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint depth; - uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset; + uint depth; + uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset; - decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL); - decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL); + decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL); + decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL); - uint4 node2 = read_node(kg, offset); - float3 co = stack_load_float3(stack, co_offset); - float scale = stack_load_float_default(stack, scale_offset, node2.x); - float distortion = stack_load_float_default(stack, distortion_offset, node2.y); + uint4 node2 = read_node(kg, offset); + float3 co = stack_load_float3(stack, co_offset); + float scale = stack_load_float_default(stack, scale_offset, node2.x); + float distortion = stack_load_float_default(stack, distortion_offset, node2.y); - float3 color = svm_magic(co*scale, depth, distortion); + float3 color = svm_magic(co * scale, depth, distortion); - if(stack_valid(fac_offset)) - stack_store_float(stack, fac_offset, average(color)); - if(stack_valid(color_offset)) - stack_store_float3(stack, color_offset, color); + if (stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, average(color)); + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, color); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_mapping.h b/intern/cycles/kernel/svm/svm_mapping.h index 86181283821..998a29912d4 100644 --- a/intern/cycles/kernel/svm/svm_mapping.h +++ b/intern/cycles/kernel/svm/svm_mapping.h @@ -18,28 +18,30 @@ CCL_NAMESPACE_BEGIN /* Mapping Node */ -ccl_device void svm_node_mapping(KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset) +ccl_device void svm_node_mapping( + KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset) { - float3 v = stack_load_float3(stack, vec_offset); + float3 v = stack_load_float3(stack, vec_offset); - Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); - float3 r = transform_point(&tfm, v); - stack_store_float3(stack, out_offset, r); + float3 r = transform_point(&tfm, v); + stack_store_float3(stack, out_offset, r); } -ccl_device void svm_node_min_max(KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset) +ccl_device void svm_node_min_max( + KernelGlobals *kg, ShaderData *sd, float *stack, uint vec_offset, uint out_offset, int *offset) { - float3 v = stack_load_float3(stack, vec_offset); + float3 v = stack_load_float3(stack, vec_offset); - float3 mn = float4_to_float3(read_node_float(kg, offset)); - float3 mx = float4_to_float3(read_node_float(kg, offset)); + float3 mn = float4_to_float3(read_node_float(kg, offset)); + float3 mx = float4_to_float3(read_node_float(kg, offset)); - float3 r = min(max(mn, v), mx); - stack_store_float3(stack, out_offset, r); + float3 r = min(max(mn, v), mx); + stack_store_float3(stack, out_offset, r); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index c9a838361cd..5920913825b 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -18,32 +18,46 @@ CCL_NAMESPACE_BEGIN /* Nodes */ -ccl_device void svm_node_math(KernelGlobals *kg, ShaderData *sd, float *stack, uint itype, uint f1_offset, uint f2_offset, int *offset) +ccl_device void svm_node_math(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint itype, + uint f1_offset, + uint f2_offset, + int *offset) { - NodeMath type = (NodeMath)itype; - float f1 = stack_load_float(stack, f1_offset); - float f2 = stack_load_float(stack, f2_offset); - float f = svm_math(type, f1, f2); + NodeMath type = (NodeMath)itype; + float f1 = stack_load_float(stack, f1_offset); + float f2 = stack_load_float(stack, f2_offset); + float f = svm_math(type, f1, f2); - uint4 node1 = read_node(kg, offset); + uint4 node1 = read_node(kg, offset); - stack_store_float(stack, node1.y, f); + stack_store_float(stack, node1.y, f); } -ccl_device void svm_node_vector_math(KernelGlobals *kg, ShaderData *sd, float *stack, uint itype, uint v1_offset, uint v2_offset, int *offset) +ccl_device void svm_node_vector_math(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint itype, + uint v1_offset, + uint v2_offset, + int *offset) { - NodeVectorMath type = (NodeVectorMath)itype; - float3 v1 = stack_load_float3(stack, v1_offset); - float3 v2 = stack_load_float3(stack, v2_offset); - float f; - float3 v; + NodeVectorMath type = (NodeVectorMath)itype; + float3 v1 = stack_load_float3(stack, v1_offset); + float3 v2 = stack_load_float3(stack, v2_offset); + float f; + float3 v; - svm_vector_math(&f, &v, type, v1, v2); + svm_vector_math(&f, &v, type, v1, v2); - uint4 node1 = read_node(kg, offset); + uint4 node1 = read_node(kg, offset); - if(stack_valid(node1.y)) stack_store_float(stack, node1.y, f); - if(stack_valid(node1.z)) stack_store_float3(stack, node1.z, v); + if (stack_valid(node1.y)) + stack_store_float(stack, node1.y, f); + if (stack_valid(node1.z)) + stack_store_float3(stack, node1.z, v); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index 669b174e4a3..e3544515f1b 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -18,96 +18,97 @@ CCL_NAMESPACE_BEGIN ccl_device float average_fac(float3 v) { - return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z))/3.0f; + return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)) / 3.0f; } -ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2) +ccl_device void svm_vector_math( + float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2) { - if(type == NODE_VECTOR_MATH_ADD) { - *Vector = Vector1 + Vector2; - *Fac = average_fac(*Vector); - } - else if(type == NODE_VECTOR_MATH_SUBTRACT) { - *Vector = Vector1 - Vector2; - *Fac = average_fac(*Vector); - } - else if(type == NODE_VECTOR_MATH_AVERAGE) { - *Vector = safe_normalize_len(Vector1 + Vector2, Fac); - } - else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) { - *Fac = dot(Vector1, Vector2); - *Vector = make_float3(0.0f, 0.0f, 0.0f); - } - else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) { - *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac); - } - else if(type == NODE_VECTOR_MATH_NORMALIZE) { - *Vector = safe_normalize_len(Vector1, Fac); - } - else { - *Fac = 0.0f; - *Vector = make_float3(0.0f, 0.0f, 0.0f); - } + if (type == NODE_VECTOR_MATH_ADD) { + *Vector = Vector1 + Vector2; + *Fac = average_fac(*Vector); + } + else if (type == NODE_VECTOR_MATH_SUBTRACT) { + *Vector = Vector1 - Vector2; + *Fac = average_fac(*Vector); + } + else if (type == NODE_VECTOR_MATH_AVERAGE) { + *Vector = safe_normalize_len(Vector1 + Vector2, Fac); + } + else if (type == NODE_VECTOR_MATH_DOT_PRODUCT) { + *Fac = dot(Vector1, Vector2); + *Vector = make_float3(0.0f, 0.0f, 0.0f); + } + else if (type == NODE_VECTOR_MATH_CROSS_PRODUCT) { + *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac); + } + else if (type == NODE_VECTOR_MATH_NORMALIZE) { + *Vector = safe_normalize_len(Vector1, Fac); + } + else { + *Fac = 0.0f; + *Vector = make_float3(0.0f, 0.0f, 0.0f); + } } ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) { - float Fac; - - if(type == NODE_MATH_ADD) - Fac = Fac1 + Fac2; - else if(type == NODE_MATH_SUBTRACT) - Fac = Fac1 - Fac2; - else if(type == NODE_MATH_MULTIPLY) - Fac = Fac1*Fac2; - else if(type == NODE_MATH_DIVIDE) - Fac = safe_divide(Fac1, Fac2); - else if(type == NODE_MATH_SINE) - Fac = sinf(Fac1); - else if(type == NODE_MATH_COSINE) - Fac = cosf(Fac1); - else if(type == NODE_MATH_TANGENT) - Fac = tanf(Fac1); - else if(type == NODE_MATH_ARCSINE) - Fac = safe_asinf(Fac1); - else if(type == NODE_MATH_ARCCOSINE) - Fac = safe_acosf(Fac1); - else if(type == NODE_MATH_ARCTANGENT) - Fac = atanf(Fac1); - else if(type == NODE_MATH_POWER) - Fac = safe_powf(Fac1, Fac2); - else if(type == NODE_MATH_LOGARITHM) - Fac = safe_logf(Fac1, Fac2); - else if(type == NODE_MATH_MINIMUM) - Fac = fminf(Fac1, Fac2); - else if(type == NODE_MATH_MAXIMUM) - Fac = fmaxf(Fac1, Fac2); - else if(type == NODE_MATH_ROUND) - Fac = floorf(Fac1 + 0.5f); - else if(type == NODE_MATH_LESS_THAN) - Fac = Fac1 < Fac2; - else if(type == NODE_MATH_GREATER_THAN) - Fac = Fac1 > Fac2; - else if(type == NODE_MATH_MODULO) - Fac = safe_modulo(Fac1, Fac2); - else if(type == NODE_MATH_ABSOLUTE) - Fac = fabsf(Fac1); - else if(type == NODE_MATH_ARCTAN2) - Fac = atan2f(Fac1, Fac2); - else if(type == NODE_MATH_FLOOR) - Fac = floorf(Fac1); - else if(type == NODE_MATH_CEIL) - Fac = ceilf(Fac1); - else if(type == NODE_MATH_FRACT) - Fac = Fac1 - floorf(Fac1); - else if(type == NODE_MATH_SQRT) - Fac = safe_sqrtf(Fac1); - else if(type == NODE_MATH_CLAMP) - Fac = saturate(Fac1); - else - Fac = 0.0f; - - return Fac; + float Fac; + + if (type == NODE_MATH_ADD) + Fac = Fac1 + Fac2; + else if (type == NODE_MATH_SUBTRACT) + Fac = Fac1 - Fac2; + else if (type == NODE_MATH_MULTIPLY) + Fac = Fac1 * Fac2; + else if (type == NODE_MATH_DIVIDE) + Fac = safe_divide(Fac1, Fac2); + else if (type == NODE_MATH_SINE) + Fac = sinf(Fac1); + else if (type == NODE_MATH_COSINE) + Fac = cosf(Fac1); + else if (type == NODE_MATH_TANGENT) + Fac = tanf(Fac1); + else if (type == NODE_MATH_ARCSINE) + Fac = safe_asinf(Fac1); + else if (type == NODE_MATH_ARCCOSINE) + Fac = safe_acosf(Fac1); + else if (type == NODE_MATH_ARCTANGENT) + Fac = atanf(Fac1); + else if (type == NODE_MATH_POWER) + Fac = safe_powf(Fac1, Fac2); + else if (type == NODE_MATH_LOGARITHM) + Fac = safe_logf(Fac1, Fac2); + else if (type == NODE_MATH_MINIMUM) + Fac = fminf(Fac1, Fac2); + else if (type == NODE_MATH_MAXIMUM) + Fac = fmaxf(Fac1, Fac2); + else if (type == NODE_MATH_ROUND) + Fac = floorf(Fac1 + 0.5f); + else if (type == NODE_MATH_LESS_THAN) + Fac = Fac1 < Fac2; + else if (type == NODE_MATH_GREATER_THAN) + Fac = Fac1 > Fac2; + else if (type == NODE_MATH_MODULO) + Fac = safe_modulo(Fac1, Fac2); + else if (type == NODE_MATH_ABSOLUTE) + Fac = fabsf(Fac1); + else if (type == NODE_MATH_ARCTAN2) + Fac = atan2f(Fac1, Fac2); + else if (type == NODE_MATH_FLOOR) + Fac = floorf(Fac1); + else if (type == NODE_MATH_CEIL) + Fac = ceilf(Fac1); + else if (type == NODE_MATH_FRACT) + Fac = Fac1 - floorf(Fac1); + else if (type == NODE_MATH_SQRT) + Fac = safe_sqrtf(Fac1); + else if (type == NODE_MATH_CLAMP) + Fac = saturate(Fac1); + else + Fac = 0.0f; + + return Fac; } /* Calculate color in range 800..12000 using an approximation @@ -117,74 +118,72 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) */ ccl_static_constant float blackbody_table_r[6][3] = { - { 2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f }, - { 3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f }, - { 4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f }, - { 4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f }, - { 4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f }, - { 3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f }, + {2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f}, + {3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f}, + {4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f}, + {4.66849800e+03f, 2.85655028e-05f, 1.29075375e-01f}, + {4.60124770e+03f, 2.89727618e-05f, 1.48001316e-01f}, + {3.78765709e+03f, 9.36026367e-06f, 3.98995841e-01f}, }; ccl_static_constant float blackbody_table_g[6][3] = { - { -7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f }, - { -1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f }, - { -1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f }, - { -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f }, - { -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f }, - { -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f }, + {-7.50343014e+02f, 3.15679613e-04f, 4.73464526e-01f}, + {-1.00402363e+03f, 1.29189794e-04f, 9.08181524e-01f}, + {-1.22075471e+03f, 2.56245413e-05f, 1.20753416e+00f}, + {-1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f}, + {-1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f}, + {-5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f}, }; ccl_static_constant float blackbody_table_b[6][4] = { - { 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */ - { 0.0f, 0.0f, 0.0f, 0.0f }, - { 0.0f, 0.0f, 0.0f, 0.0f }, - { -2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f }, - { -2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f }, - { 6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f }, + {0.0f, 0.0f, 0.0f, 0.0f}, /* zeros should be optimized by compiler */ + {0.0f, 0.0f, 0.0f, 0.0f}, + {0.0f, 0.0f, 0.0f, 0.0f}, + {-2.02524603e-11f, 1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f}, + {-2.22463426e-13f, -1.55078698e-08f, 3.81675160e-04f, -7.30646033e-01f}, + {6.72595954e-13f, -2.73059993e-08f, 4.24068546e-04f, -7.52204323e-01f}, }; - ccl_device float3 svm_math_blackbody_color(float t) { - /* TODO(lukas): Reimplement in XYZ. */ - - if(t >= 12000.0f) { - return make_float3(0.826270103f, 0.994478524f, 1.56626022f); - } - else if(t < 965.0f) { - /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */ - return make_float3(4.70366907f, 0.0f, 0.0f); - } - - int i = (t >= 6365.0f)? 5: - (t >= 3315.0f)? 4: - (t >= 1902.0f)? 3: - (t >= 1449.0f)? 2: - (t >= 1167.0f)? 1: 0; - - ccl_constant float *r = blackbody_table_r[i]; - ccl_constant float *g = blackbody_table_g[i]; - ccl_constant float *b = blackbody_table_b[i]; - - const float t_inv = 1.0f / t; - return make_float3(r[0] * t_inv + r[1] * t + r[2], - g[0] * t_inv + g[1] * t + g[2], - ((b[0] * t + b[1]) * t + b[2]) * t + b[3]); + /* TODO(lukas): Reimplement in XYZ. */ + + if (t >= 12000.0f) { + return make_float3(0.826270103f, 0.994478524f, 1.56626022f); + } + else if (t < 965.0f) { + /* For 800 <= t < 965 color does not change in OSL implementation, so keep color the same */ + return make_float3(4.70366907f, 0.0f, 0.0f); + } + + int i = (t >= 6365.0f) ? + 5 : + (t >= 3315.0f) ? 4 : + (t >= 1902.0f) ? 3 : (t >= 1449.0f) ? 2 : (t >= 1167.0f) ? 1 : 0; + + ccl_constant float *r = blackbody_table_r[i]; + ccl_constant float *g = blackbody_table_g[i]; + ccl_constant float *b = blackbody_table_b[i]; + + const float t_inv = 1.0f / t; + return make_float3(r[0] * t_inv + r[1] * t + r[2], + g[0] * t_inv + g[1] * t + g[2], + ((b[0] * t + b[1]) * t + b[2]) * t + b[3]); } ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma) { - if(gamma == 0.0f) - return make_float3(1.0f, 1.0f, 1.0f); + if (gamma == 0.0f) + return make_float3(1.0f, 1.0f, 1.0f); - if(color.x > 0.0f) - color.x = powf(color.x, gamma); - if(color.y > 0.0f) - color.y = powf(color.y, gamma); - if(color.z > 0.0f) - color.z = powf(color.z, gamma); + if (color.x > 0.0f) + color.x = powf(color.x, gamma); + if (color.y > 0.0f) + color.y = powf(color.y, gamma); + if (color.z > 0.0f) + color.z = powf(color.z, gamma); - return color; + return color; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h index 903a4dacebf..15114bfd5e4 100644 --- a/intern/cycles/kernel/svm/svm_mix.h +++ b/intern/cycles/kernel/svm/svm_mix.h @@ -18,17 +18,23 @@ CCL_NAMESPACE_BEGIN /* Node */ -ccl_device void svm_node_mix(KernelGlobals *kg, ShaderData *sd, float *stack, uint fac_offset, uint c1_offset, uint c2_offset, int *offset) +ccl_device void svm_node_mix(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint fac_offset, + uint c1_offset, + uint c2_offset, + int *offset) { - /* read extra data */ - uint4 node1 = read_node(kg, offset); + /* read extra data */ + uint4 node1 = read_node(kg, offset); - float fac = stack_load_float(stack, fac_offset); - float3 c1 = stack_load_float3(stack, c1_offset); - float3 c2 = stack_load_float3(stack, c2_offset); - float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2); + float fac = stack_load_float(stack, fac_offset); + float3 c1 = stack_load_float3(stack, c1_offset); + float3 c2 = stack_load_float3(stack, c2_offset); + float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2); - stack_store_float3(stack, node1.z, result); + stack_store_float3(stack, node1.z, result); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h index 5d9e12628ca..67fb5ca6241 100644 --- a/intern/cycles/kernel/svm/svm_musgrave.h +++ b/intern/cycles/kernel/svm/svm_musgrave.h @@ -27,23 +27,23 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves) { - float rmd; - float value = 0.0f; - float pwr = 1.0f; - float pwHL = powf(lacunarity, -H); - int i; - - for(i = 0; i < float_to_int(octaves); i++) { - value += snoise(p) * pwr; - pwr *= pwHL; - p *= lacunarity; - } - - rmd = octaves - floorf(octaves); - if(rmd != 0.0f) - value += rmd * snoise(p) * pwr; - - return value; + float rmd; + float value = 0.0f; + float pwr = 1.0f; + float pwHL = powf(lacunarity, -H); + int i; + + for (i = 0; i < float_to_int(octaves); i++) { + value += snoise(p) * pwr; + pwr *= pwHL; + p *= lacunarity; + } + + rmd = octaves - floorf(octaves); + if (rmd != 0.0f) + value += rmd * snoise(p) * pwr; + + return value; } /* Musgrave Multifractal @@ -53,25 +53,28 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity * octaves: number of frequencies in the fBm */ -ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float lacunarity, float octaves) +ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, + float H, + float lacunarity, + float octaves) { - float rmd; - float value = 1.0f; - float pwr = 1.0f; - float pwHL = powf(lacunarity, -H); - int i; - - for(i = 0; i < float_to_int(octaves); i++) { - value *= (pwr * snoise(p) + 1.0f); - pwr *= pwHL; - p *= lacunarity; - } - - rmd = octaves - floorf(octaves); - if(rmd != 0.0f) - value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */ - - return value; + float rmd; + float value = 1.0f; + float pwr = 1.0f; + float pwHL = powf(lacunarity, -H); + int i; + + for (i = 0; i < float_to_int(octaves); i++) { + value *= (pwr * snoise(p) + 1.0f); + pwr *= pwHL; + p *= lacunarity; + } + + rmd = octaves - floorf(octaves); + if (rmd != 0.0f) + value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */ + + return value; } /* Musgrave Heterogeneous Terrain @@ -82,31 +85,32 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, float H, float * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float lacunarity, float octaves, float offset) +ccl_device_noinline float noise_musgrave_hetero_terrain( + float3 p, float H, float lacunarity, float octaves, float offset) { - float value, increment, rmd; - float pwHL = powf(lacunarity, -H); - float pwr = pwHL; - int i; - - /* first unscaled octave of function; later octaves are scaled */ - value = offset + snoise(p); - p *= lacunarity; - - for(i = 1; i < float_to_int(octaves); i++) { - increment = (snoise(p) + offset) * pwr * value; - value += increment; - pwr *= pwHL; - p *= lacunarity; - } - - rmd = octaves - floorf(octaves); - if(rmd != 0.0f) { - increment = (snoise(p) + offset) * pwr * value; - value += rmd * increment; - } - - return value; + float value, increment, rmd; + float pwHL = powf(lacunarity, -H); + float pwr = pwHL; + int i; + + /* first unscaled octave of function; later octaves are scaled */ + value = offset + snoise(p); + p *= lacunarity; + + for (i = 1; i < float_to_int(octaves); i++) { + increment = (snoise(p) + offset) * pwr * value; + value += increment; + pwr *= pwHL; + p *= lacunarity; + } + + rmd = octaves - floorf(octaves); + if (rmd != 0.0f) { + increment = (snoise(p) + offset) * pwr * value; + value += rmd * increment; + } + + return value; } /* Hybrid Additive/Multiplicative Multifractal Terrain @@ -117,33 +121,34 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(float3 p, float H, float * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain) +ccl_device_noinline float noise_musgrave_hybrid_multi_fractal( + float3 p, float H, float lacunarity, float octaves, float offset, float gain) { - float result, signal, weight, rmd; - float pwHL = powf(lacunarity, -H); - float pwr = pwHL; - int i; - - result = snoise(p) + offset; - weight = gain * result; - p *= lacunarity; - - for(i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { - if(weight > 1.0f) - weight = 1.0f; - - signal = (snoise(p) + offset) * pwr; - pwr *= pwHL; - result += weight * signal; - weight *= gain * signal; - p *= lacunarity; - } - - rmd = octaves - floorf(octaves); - if(rmd != 0.0f) - result += rmd * ((snoise(p) + offset) * pwr); - - return result; + float result, signal, weight, rmd; + float pwHL = powf(lacunarity, -H); + float pwr = pwHL; + int i; + + result = snoise(p) + offset; + weight = gain * result; + p *= lacunarity; + + for (i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) { + if (weight > 1.0f) + weight = 1.0f; + + signal = (snoise(p) + offset) * pwr; + pwr *= pwHL; + result += weight * signal; + weight *= gain * signal; + p *= lacunarity; + } + + rmd = octaves - floorf(octaves); + if (rmd != 0.0f) + result += rmd * ((snoise(p) + offset) * pwr); + + return result; } /* Ridged Multifractal Terrain @@ -154,81 +159,93 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(float3 p, float H, * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_ridged_multi_fractal(float3 p, float H, float lacunarity, float octaves, float offset, float gain) +ccl_device_noinline float noise_musgrave_ridged_multi_fractal( + float3 p, float H, float lacunarity, float octaves, float offset, float gain) { - float result, signal, weight; - float pwHL = powf(lacunarity, -H); - float pwr = pwHL; - int i; - - signal = offset - fabsf(snoise(p)); - signal *= signal; - result = signal; - weight = 1.0f; - - for(i = 1; i < float_to_int(octaves); i++) { - p *= lacunarity; - weight = saturate(signal * gain); - signal = offset - fabsf(snoise(p)); - signal *= signal; - signal *= weight; - result += signal * pwr; - pwr *= pwHL; - } - - return result; + float result, signal, weight; + float pwHL = powf(lacunarity, -H); + float pwr = pwHL; + int i; + + signal = offset - fabsf(snoise(p)); + signal *= signal; + result = signal; + weight = 1.0f; + + for (i = 1; i < float_to_int(octaves); i++) { + p *= lacunarity; + weight = saturate(signal * gain); + signal = offset - fabsf(snoise(p)); + signal *= signal; + signal *= weight; + result += signal * pwr; + pwr *= pwHL; + } + + return result; } /* Shader */ -ccl_device float svm_musgrave(NodeMusgraveType type, float dimension, float lacunarity, float octaves, float offset, float intensity, float gain, float3 p) +ccl_device float svm_musgrave(NodeMusgraveType type, + float dimension, + float lacunarity, + float octaves, + float offset, + float intensity, + float gain, + float3 p) { - if(type == NODE_MUSGRAVE_MULTIFRACTAL) - return intensity*noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves); - else if(type == NODE_MUSGRAVE_FBM) - return intensity*noise_musgrave_fBm(p, dimension, lacunarity, octaves); - else if(type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL) - return intensity*noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain); - else if(type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL) - return intensity*noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain); - else if(type == NODE_MUSGRAVE_HETERO_TERRAIN) - return intensity*noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset); - - return 0.0f; + if (type == NODE_MUSGRAVE_MULTIFRACTAL) + return intensity * noise_musgrave_multi_fractal(p, dimension, lacunarity, octaves); + else if (type == NODE_MUSGRAVE_FBM) + return intensity * noise_musgrave_fBm(p, dimension, lacunarity, octaves); + else if (type == NODE_MUSGRAVE_HYBRID_MULTIFRACTAL) + return intensity * + noise_musgrave_hybrid_multi_fractal(p, dimension, lacunarity, octaves, offset, gain); + else if (type == NODE_MUSGRAVE_RIDGED_MULTIFRACTAL) + return intensity * + noise_musgrave_ridged_multi_fractal(p, dimension, lacunarity, octaves, offset, gain); + else if (type == NODE_MUSGRAVE_HETERO_TERRAIN) + return intensity * noise_musgrave_hetero_terrain(p, dimension, lacunarity, octaves, offset); + + return 0.0f; } -ccl_device void svm_node_tex_musgrave(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_tex_musgrave( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint4 node2 = read_node(kg, offset); - uint4 node3 = read_node(kg, offset); - - uint type, co_offset, color_offset, fac_offset; - uint dimension_offset, lacunarity_offset, detail_offset, offset_offset; - uint gain_offset, scale_offset; - - decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset); - decode_node_uchar4(node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset); - decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL); - - float3 co = stack_load_float3(stack, co_offset); - float dimension = stack_load_float_default(stack, dimension_offset, node2.x); - float lacunarity = stack_load_float_default(stack, lacunarity_offset, node2.y); - float detail = stack_load_float_default(stack, detail_offset, node2.z); - float foffset = stack_load_float_default(stack, offset_offset, node2.w); - float gain = stack_load_float_default(stack, gain_offset, node3.x); - float scale = stack_load_float_default(stack, scale_offset, node3.y); - - dimension = fmaxf(dimension, 1e-5f); - detail = clamp(detail, 0.0f, 16.0f); - lacunarity = fmaxf(lacunarity, 1e-5f); - - float f = svm_musgrave((NodeMusgraveType)type, - dimension, lacunarity, detail, foffset, 1.0f, gain, co*scale); - - if(stack_valid(fac_offset)) - stack_store_float(stack, fac_offset, f); - if(stack_valid(color_offset)) - stack_store_float3(stack, color_offset, make_float3(f, f, f)); + uint4 node2 = read_node(kg, offset); + uint4 node3 = read_node(kg, offset); + + uint type, co_offset, color_offset, fac_offset; + uint dimension_offset, lacunarity_offset, detail_offset, offset_offset; + uint gain_offset, scale_offset; + + decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset); + decode_node_uchar4( + node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset); + decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL); + + float3 co = stack_load_float3(stack, co_offset); + float dimension = stack_load_float_default(stack, dimension_offset, node2.x); + float lacunarity = stack_load_float_default(stack, lacunarity_offset, node2.y); + float detail = stack_load_float_default(stack, detail_offset, node2.z); + float foffset = stack_load_float_default(stack, offset_offset, node2.w); + float gain = stack_load_float_default(stack, gain_offset, node3.x); + float scale = stack_load_float_default(stack, scale_offset, node3.y); + + dimension = fmaxf(dimension, 1e-5f); + detail = clamp(detail, 0.0f, 16.0f); + lacunarity = fmaxf(lacunarity, 1e-5f); + + float f = svm_musgrave( + (NodeMusgraveType)type, dimension, lacunarity, detail, foffset, 1.0f, gain, co * scale); + + if (stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, f); + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, make_float3(f, f, f)); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 8c425ecf326..322579ccfe3 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -33,280 +33,302 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_SSE2__ -ccl_device_inline ssei quick_floor_sse(const ssef& x) +ccl_device_inline ssei quick_floor_sse(const ssef &x) { - ssei b = truncatei(x); - ssei isneg = cast((x < ssef(0.0f)).m128); - return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1 + ssei b = truncatei(x); + ssei isneg = cast((x < ssef(0.0f)).m128); + return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1 } #endif ccl_device uint hash(uint kx, uint ky, uint kz) { - // define some handy macros -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) -#define final(a,b,c) \ -{ \ - c ^= b; c -= rot(b,14); \ - a ^= c; a -= rot(c,11); \ - b ^= a; b -= rot(a,25); \ - c ^= b; c -= rot(b,16); \ - a ^= c; a -= rot(c,4); \ - b ^= a; b -= rot(a,14); \ - c ^= b; c -= rot(b,24); \ -} - // now hash the data! - uint a, b, c, len = 3; - a = b = c = 0xdeadbeef + (len << 2) + 13; - - c += kz; - b += ky; - a += kx; - final(a, b, c); - - return c; - // macros not needed anymore + // define some handy macros +#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) +#define final(a, b, c) \ + { \ + c ^= b; \ + c -= rot(b, 14); \ + a ^= c; \ + a -= rot(c, 11); \ + b ^= a; \ + b -= rot(a, 25); \ + c ^= b; \ + c -= rot(b, 16); \ + a ^= c; \ + a -= rot(c, 4); \ + b ^= a; \ + b -= rot(a, 14); \ + c ^= b; \ + c -= rot(b, 24); \ + } + // now hash the data! + uint a, b, c, len = 3; + a = b = c = 0xdeadbeef + (len << 2) + 13; + + c += kz; + b += ky; + a += kx; + final(a, b, c); + + return c; + // macros not needed anymore #undef rot #undef final } #ifdef __KERNEL_SSE2__ -ccl_device_inline ssei hash_sse(const ssei& kx, const ssei& ky, const ssei& kz) +ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz) { -# define rot(x,k) (((x)<<(k)) | (srl(x, 32-(k)))) -# define xor_rot(a, b, c) do {a = a^b; a = a - rot(b, c);} while(0) - - uint len = 3; - ssei magic = ssei(0xdeadbeef + (len << 2) + 13); - ssei a = magic + kx; - ssei b = magic + ky; - ssei c = magic + kz; - - xor_rot(c, b, 14); - xor_rot(a, c, 11); - xor_rot(b, a, 25); - xor_rot(c, b, 16); - xor_rot(a, c, 4); - xor_rot(b, a, 14); - xor_rot(c, b, 24); - - return c; +# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k)))) +# define xor_rot(a, b, c) \ + do { \ + a = a ^ b; \ + a = a - rot(b, c); \ + } while (0) + + uint len = 3; + ssei magic = ssei(0xdeadbeef + (len << 2) + 13); + ssei a = magic + kx; + ssei b = magic + ky; + ssei c = magic + kz; + + xor_rot(c, b, 14); + xor_rot(a, c, 11); + xor_rot(b, a, 25); + xor_rot(c, b, 16); + xor_rot(a, c, 4); + xor_rot(b, a, 14); + xor_rot(c, b, 24); + + return c; # undef rot # undef xor_rot } #endif -#if 0 // unused +#if 0 // unused ccl_device int imod(int a, int b) { - a %= b; - return a < 0 ? a + b : a; + a %= b; + return a < 0 ? a + b : a; } ccl_device uint phash(int kx, int ky, int kz, int3 p) { - return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z)); + return hash(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z)); } #endif #ifndef __KERNEL_SSE2__ -ccl_device float floorfrac(float x, int* i) +ccl_device float floorfrac(float x, int *i) { - *i = quick_floor_to_int(x); - return x - *i; + *i = quick_floor_to_int(x); + return x - *i; } #else -ccl_device_inline ssef floorfrac_sse(const ssef& x, ssei *i) +ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i) { - *i = quick_floor_sse(x); - return x - ssef(*i); + *i = quick_floor_sse(x); + return x - ssef(*i); } #endif #ifndef __KERNEL_SSE2__ ccl_device float fade(float t) { - return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f); + return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f); } #else ccl_device_inline ssef fade_sse(const ssef *t) { - ssef a = madd(*t, ssef(6.0f), ssef(-15.0f)); - ssef b = madd(*t, a, ssef(10.0f)); - return ((*t) * (*t)) * ((*t) * b); + ssef a = madd(*t, ssef(6.0f), ssef(-15.0f)); + ssef b = madd(*t, a, ssef(10.0f)); + return ((*t) * (*t)) * ((*t) * b); } #endif #ifndef __KERNEL_SSE2__ ccl_device float nerp(float t, float a, float b) { - return (1.0f - t) * a + t * b; + return (1.0f - t) * a + t * b; } #else -ccl_device_inline ssef nerp_sse(const ssef& t, const ssef& a, const ssef& b) +ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b) { - ssef x1 = (ssef(1.0f) - t) * a; - return madd(t, b, x1); + ssef x1 = (ssef(1.0f) - t) * a; + return madd(t, b, x1); } #endif #ifndef __KERNEL_SSE2__ ccl_device float grad(int hash, float x, float y, float z) { - // use vectors pointing to the edges of the cube - int h = hash & 15; - float u = h<8 ? x : y; - float vt = ((h == 12) | (h == 14)) ? x : z; - float v = h < 4 ? y : vt; - return ((h&1) ? -u : u) + ((h&2) ? -v : v); + // use vectors pointing to the edges of the cube + int h = hash & 15; + float u = h < 8 ? x : y; + float vt = ((h == 12) | (h == 14)) ? x : z; + float v = h < 4 ? y : vt; + return ((h & 1) ? -u : u) + ((h & 2) ? -v : v); } #else -ccl_device_inline ssef grad_sse(const ssei& hash, const ssef& x, const ssef& y, const ssef& z) +ccl_device_inline ssef grad_sse(const ssei &hash, const ssef &x, const ssef &y, const ssef &z) { - ssei c1 = ssei(1); - ssei c2 = ssei(2); + ssei c1 = ssei(1); + ssei c2 = ssei(2); - ssei h = hash & ssei(15); // h = hash & 15 + ssei h = hash & ssei(15); // h = hash & 15 - sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0 + sseb case_ux = h < ssei(8); // 0xffffffff if h < 8 else 0 - ssef u = select(case_ux, x, y); // u = h<8 ? x : y + ssef u = select(case_ux, x, y); // u = h<8 ? x : y - sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0 + sseb case_vy = h < ssei(4); // 0xffffffff if h < 4 else 0 - sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0 - sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0 + sseb case_h12 = h == ssei(12); // 0xffffffff if h == 12 else 0 + sseb case_h14 = h == ssei(14); // 0xffffffff if h == 14 else 0 - sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0 + sseb case_vx = case_h12 | case_h14; // 0xffffffff if h == 12 or h == 14 else 0 - ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z + ssef v = select(case_vy, y, select(case_vx, x, z)); // v = h<4 ? y : h == 12 || h == 14 ? x : z - ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0 - ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0 - ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign) + ssei case_uneg = (h & c1) << 31; // 1<<31 if h&1 else 0 + ssef case_uneg_mask = cast(case_uneg); // -0.0 if h&1 else +0.0 + ssef ru = u ^ case_uneg_mask; // -u if h&1 else u (copy float sign) - ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0 - ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0 - ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign) + ssei case_vneg = (h & c2) << 30; // 2<<30 if h&2 else 0 + ssef case_vneg_mask = cast(case_vneg); // -0.0 if h&2 else +0.0 + ssef rv = v ^ case_vneg_mask; // -v if h&2 else v (copy float sign) - ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v) - return r; + ssef r = ru + rv; // ((h&1) ? -u : u) + ((h&2) ? -v : v) + return r; } #endif #ifndef __KERNEL_SSE2__ ccl_device float scale3(float result) { - return 0.9820f * result; + return 0.9820f * result; } #else -ccl_device_inline ssef scale3_sse(const ssef& result) +ccl_device_inline ssef scale3_sse(const ssef &result) { - return ssef(0.9820f) * result; + return ssef(0.9820f) * result; } #endif #ifndef __KERNEL_SSE2__ ccl_device_noinline float perlin(float x, float y, float z) { - int X; float fx = floorfrac(x, &X); - int Y; float fy = floorfrac(y, &Y); - int Z; float fz = floorfrac(z, &Z); - - float u = fade(fx); - float v = fade(fy); - float w = fade(fz); - - float result; - - result = nerp (w, nerp (v, nerp (u, grad (hash (X , Y , Z ), fx , fy , fz ), - grad (hash (X+1, Y , Z ), fx-1.0f, fy , fz )), - nerp (u, grad (hash (X , Y+1, Z ), fx , fy-1.0f, fz ), - grad (hash (X+1, Y+1, Z ), fx-1.0f, fy-1.0f, fz ))), - nerp (v, nerp (u, grad (hash (X , Y , Z+1), fx , fy , fz-1.0f ), - grad (hash (X+1, Y , Z+1), fx-1.0f, fy , fz-1.0f )), - nerp (u, grad (hash (X , Y+1, Z+1), fx , fy-1.0f, fz-1.0f ), - grad (hash (X+1, Y+1, Z+1), fx-1.0f, fy-1.0f, fz-1.0f )))); - float r = scale3(result); - - /* can happen for big coordinates, things even out to 0.0 then anyway */ - return (isfinite(r))? r: 0.0f; + int X; + float fx = floorfrac(x, &X); + int Y; + float fy = floorfrac(y, &Y); + int Z; + float fz = floorfrac(z, &Z); + + float u = fade(fx); + float v = fade(fy); + float w = fade(fz); + + float result; + + result = nerp( + w, + nerp(v, + nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)), + nerp(u, + grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz), + grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))), + nerp(v, + nerp(u, + grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f), + grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)), + nerp(u, + grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f), + grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f)))); + float r = scale3(result); + + /* can happen for big coordinates, things even out to 0.0 then anyway */ + return (isfinite(r)) ? r : 0.0f; } #else ccl_device_noinline float perlin(float x, float y, float z) { - ssef xyz = ssef(x, y, z, 0.0f); - ssei XYZ; + ssef xyz = ssef(x, y, z, 0.0f); + ssei XYZ; - ssef fxyz = floorfrac_sse(xyz, &XYZ); + ssef fxyz = floorfrac_sse(xyz, &XYZ); - ssef uvw = fade_sse(&fxyz); - ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw); + ssef uvw = fade_sse(&fxyz); + ssef u = shuffle<0>(uvw), v = shuffle<1>(uvw), w = shuffle<2>(uvw); - ssei XYZ_ofc = XYZ + ssei(1); - ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1 - ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1 + ssei XYZ_ofc = XYZ + ssei(1); + ssei vdy = shuffle<1, 1, 1, 1>(XYZ, XYZ_ofc); // +0, +0, +1, +1 + ssei vdz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ_ofc)); // +0, +1, +0, +1 - ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011 - ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111 + ssei h1 = hash_sse(shuffle<0>(XYZ), vdy, vdz); // hash directions 000, 001, 010, 011 + ssei h2 = hash_sse(shuffle<0>(XYZ_ofc), vdy, vdz); // hash directions 100, 101, 110, 111 - ssef fxyz_ofc = fxyz - ssef(1.0f); - ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc); - ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc)); + ssef fxyz_ofc = fxyz - ssef(1.0f); + ssef vfy = shuffle<1, 1, 1, 1>(fxyz, fxyz_ofc); + ssef vfz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz_ofc)); - ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz); - ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz); - ssef n1 = nerp_sse(u, g1, g2); + ssef g1 = grad_sse(h1, shuffle<0>(fxyz), vfy, vfz); + ssef g2 = grad_sse(h2, shuffle<0>(fxyz_ofc), vfy, vfz); + ssef n1 = nerp_sse(u, g1, g2); - ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector - ssef n2 = nerp_sse(v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _] + ssef n1_half = shuffle<2, 3, 2, 3>(n1); // extract 2 floats to a separate vector + ssef n2 = nerp_sse( + v, n1, n1_half); // process nerp([a, b, _, _], [c, d, _, _]) -> [a', b', _, _] - ssef n2_second = shuffle<1>(n2); // extract b to a separate vector - ssef result = nerp_sse(w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _] + ssef n2_second = shuffle<1>(n2); // extract b to a separate vector + ssef result = nerp_sse( + w, n2, n2_second); // process nerp([a', _, _, _], [b', _, _, _]) -> [a'', _, _, _] - ssef r = scale3_sse(result); + ssef r = scale3_sse(result); - ssef infmask = cast(ssei(0x7f800000)); - ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0 - ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r - return extract<0>(rfinite); + ssef infmask = cast(ssei(0x7f800000)); + ssef rinfmask = ((r & infmask) == infmask).m128; // 0xffffffff if r is inf/-inf/nan else 0 + ssef rfinite = andnot(rinfmask, r); // 0 if r is inf/-inf/nan else r + return extract<0>(rfinite); } #endif /* perlin noise in range 0..1 */ ccl_device float noise(float3 p) { - float r = perlin(p.x, p.y, p.z); - return 0.5f*r + 0.5f; + float r = perlin(p.x, p.y, p.z); + return 0.5f * r + 0.5f; } /* perlin noise in range -1..1 */ ccl_device float snoise(float3 p) { - return perlin(p.x, p.y, p.z); + return perlin(p.x, p.y, p.z); } /* cell noise */ ccl_device float cellnoise(float3 p) { - int3 ip = quick_floor_to_int3(p); - return bits_to_01(hash(ip.x, ip.y, ip.z)); + int3 ip = quick_floor_to_int3(p); + return bits_to_01(hash(ip.x, ip.y, ip.z)); } ccl_device float3 cellnoise3(float3 p) { - int3 ip = quick_floor_to_int3(p); + int3 ip = quick_floor_to_int3(p); #ifndef __KERNEL_SSE__ - float r = bits_to_01(hash(ip.x, ip.y, ip.z)); - float g = bits_to_01(hash(ip.y, ip.x, ip.z)); - float b = bits_to_01(hash(ip.y, ip.z, ip.x)); - return make_float3(r, g, b); + float r = bits_to_01(hash(ip.x, ip.y, ip.z)); + float g = bits_to_01(hash(ip.y, ip.x, ip.z)); + float b = bits_to_01(hash(ip.y, ip.z, ip.x)); + return make_float3(r, g, b); #else - ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128)); - ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128)); - ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128)); - ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx); - return float3(uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF)); + ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128)); + ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128)); + ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128)); + ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx); + return float3(uint32_to_float(bits) * ssef(1.0f / (float)0xFFFFFFFF)); #endif } diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h index c02940f96d6..3324e86fcd8 100644 --- a/intern/cycles/kernel/svm/svm_noisetex.h +++ b/intern/cycles/kernel/svm/svm_noisetex.h @@ -18,42 +18,43 @@ CCL_NAMESPACE_BEGIN /* Noise */ -ccl_device void svm_node_tex_noise(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_tex_noise( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset; + uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset; - decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset); - decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); + decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset); + decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); - uint4 node2 = read_node(kg, offset); + uint4 node2 = read_node(kg, offset); - float scale = stack_load_float_default(stack, scale_offset, node2.x); - float detail = stack_load_float_default(stack, detail_offset, node2.y); - float distortion = stack_load_float_default(stack, distortion_offset, node2.z); - float3 p = stack_load_float3(stack, co_offset) * scale; - int hard = 0; + float scale = stack_load_float_default(stack, scale_offset, node2.x); + float detail = stack_load_float_default(stack, detail_offset, node2.y); + float distortion = stack_load_float_default(stack, distortion_offset, node2.z); + float3 p = stack_load_float3(stack, co_offset) * scale; + int hard = 0; - if(distortion != 0.0f) { - float3 r, offset = make_float3(13.5f, 13.5f, 13.5f); + if (distortion != 0.0f) { + float3 r, offset = make_float3(13.5f, 13.5f, 13.5f); - r.x = noise(p + offset) * distortion; - r.y = noise(p) * distortion; - r.z = noise(p - offset) * distortion; + r.x = noise(p + offset) * distortion; + r.y = noise(p) * distortion; + r.z = noise(p - offset) * distortion; - p += r; - } + p += r; + } - float f = noise_turbulence(p, detail, hard); + float f = noise_turbulence(p, detail, hard); - if(stack_valid(fac_offset)) { - stack_store_float(stack, fac_offset, f); - } - if(stack_valid(color_offset)) { - float3 color = make_float3(f, - noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard), - noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard)); - stack_store_float3(stack, color_offset, color); - } + if (stack_valid(fac_offset)) { + stack_store_float(stack, fac_offset, f); + } + if (stack_valid(color_offset)) { + float3 color = make_float3(f, + noise_turbulence(make_float3(p.y, p.x, p.z), detail, hard), + noise_turbulence(make_float3(p.y, p.z, p.x), detail, hard)); + stack_store_float3(stack, color_offset, color); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_normal.h b/intern/cycles/kernel/svm/svm_normal.h index fe46d79fe15..4cd3eab0ed2 100644 --- a/intern/cycles/kernel/svm/svm_normal.h +++ b/intern/cycles/kernel/svm/svm_normal.h @@ -16,23 +16,29 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_normal(KernelGlobals *kg, ShaderData *sd, float *stack, uint in_normal_offset, uint out_normal_offset, uint out_dot_offset, int *offset) +ccl_device void svm_node_normal(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint in_normal_offset, + uint out_normal_offset, + uint out_dot_offset, + int *offset) { - /* read extra data */ - uint4 node1 = read_node(kg, offset); - float3 normal = stack_load_float3(stack, in_normal_offset); + /* read extra data */ + uint4 node1 = read_node(kg, offset); + float3 normal = stack_load_float3(stack, in_normal_offset); - float3 direction; - direction.x = __int_as_float(node1.x); - direction.y = __int_as_float(node1.y); - direction.z = __int_as_float(node1.z); - direction = normalize(direction); + float3 direction; + direction.x = __int_as_float(node1.x); + direction.y = __int_as_float(node1.y); + direction.z = __int_as_float(node1.z); + direction = normalize(direction); - if(stack_valid(out_normal_offset)) - stack_store_float3(stack, out_normal_offset, direction); + if (stack_valid(out_normal_offset)) + stack_store_float3(stack, out_normal_offset, direction); - if(stack_valid(out_dot_offset)) - stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal))); + if (stack_valid(out_dot_offset)) + stack_store_float(stack, out_dot_offset, dot(direction, normalize(normal))); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index 6f39391057e..6084ee35a1f 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -21,91 +21,84 @@ CCL_NAMESPACE_BEGIN /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ -ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg, - int offset, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device_inline float4 rgb_ramp_lookup( + KernelGlobals *kg, int offset, float f, bool interpolate, bool extrapolate, int table_size) { - if((f < 0.0f || f > 1.0f) && extrapolate) { - float4 t0, dy; - if(f < 0.0f) { - t0 = fetch_node_float(kg, offset); - dy = t0 - fetch_node_float(kg, offset + 1); - f = -f; - } - else { - t0 = fetch_node_float(kg, offset + table_size - 1); - dy = t0 - fetch_node_float(kg, offset + table_size - 2); - f = f - 1.0f; - } - return t0 + dy * f * (table_size-1); - } - - f = saturate(f)*(table_size-1); - - /* clamp int as well in case of NaN */ - int i = clamp(float_to_int(f), 0, table_size-1); - float t = f - (float)i; - - float4 a = fetch_node_float(kg, offset+i); - - if(interpolate && t > 0.0f) - a = (1.0f - t)*a + t*fetch_node_float(kg, offset+i+1); - - return a; + if ((f < 0.0f || f > 1.0f) && extrapolate) { + float4 t0, dy; + if (f < 0.0f) { + t0 = fetch_node_float(kg, offset); + dy = t0 - fetch_node_float(kg, offset + 1); + f = -f; + } + else { + t0 = fetch_node_float(kg, offset + table_size - 1); + dy = t0 - fetch_node_float(kg, offset + table_size - 2); + f = f - 1.0f; + } + return t0 + dy * f * (table_size - 1); + } + + f = saturate(f) * (table_size - 1); + + /* clamp int as well in case of NaN */ + int i = clamp(float_to_int(f), 0, table_size - 1); + float t = f - (float)i; + + float4 a = fetch_node_float(kg, offset + i); + + if (interpolate && t > 0.0f) + a = (1.0f - t) * a + t * fetch_node_float(kg, offset + i + 1); + + return a; } -ccl_device void svm_node_rgb_ramp(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_rgb_ramp( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint fac_offset, color_offset, alpha_offset; - uint interpolate = node.z; + uint fac_offset, color_offset, alpha_offset; + uint interpolate = node.z; - decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL); + decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL); - uint table_size = read_node(kg, offset).x; + uint table_size = read_node(kg, offset).x; - float fac = stack_load_float(stack, fac_offset); - float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size); + float fac = stack_load_float(stack, fac_offset); + float4 color = rgb_ramp_lookup(kg, *offset, fac, interpolate, false, table_size); - if(stack_valid(color_offset)) - stack_store_float3(stack, color_offset, float4_to_float3(color)); - if(stack_valid(alpha_offset)) - stack_store_float(stack, alpha_offset, color.w); + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, float4_to_float3(color)); + if (stack_valid(alpha_offset)) + stack_store_float(stack, alpha_offset, color.w); - *offset += table_size; + *offset += table_size; } -ccl_device void svm_node_curves(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_curves( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint fac_offset, color_offset, out_offset; - decode_node_uchar4(node.y, - &fac_offset, - &color_offset, - &out_offset, - NULL); + uint fac_offset, color_offset, out_offset; + decode_node_uchar4(node.y, &fac_offset, &color_offset, &out_offset, NULL); - uint table_size = read_node(kg, offset).x; + uint table_size = read_node(kg, offset).x; - float fac = stack_load_float(stack, fac_offset); - float3 color = stack_load_float3(stack, color_offset); + float fac = stack_load_float(stack, fac_offset); + float3 color = stack_load_float3(stack, color_offset); - const float min_x = __int_as_float(node.z), - max_x = __int_as_float(node.w); - const float range_x = max_x - min_x; - const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x; + const float min_x = __int_as_float(node.z), max_x = __int_as_float(node.w); + const float range_x = max_x - min_x; + const float3 relpos = (color - make_float3(min_x, min_x, min_x)) / range_x; - float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x; - float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y; - float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z; + float r = rgb_ramp_lookup(kg, *offset, relpos.x, true, true, table_size).x; + float g = rgb_ramp_lookup(kg, *offset, relpos.y, true, true, table_size).y; + float b = rgb_ramp_lookup(kg, *offset, relpos.z, true, true, table_size).z; - color = (1.0f - fac)*color + fac*make_float3(r, g, b); - stack_store_float3(stack, out_offset, color); + color = (1.0f - fac) * color + fac * make_float3(r, g, b); + stack_store_float3(stack, out_offset, color); - *offset += table_size; + *offset += table_size; } CCL_NAMESPACE_END -#endif /* __SVM_RAMP_H__ */ +#endif /* __SVM_RAMP_H__ */ diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h index 847108ff1c2..202596c1fe3 100644 --- a/intern/cycles/kernel/svm/svm_ramp_util.h +++ b/intern/cycles/kernel/svm/svm_ramp_util.h @@ -21,78 +21,70 @@ CCL_NAMESPACE_BEGIN /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */ -ccl_device_inline float3 rgb_ramp_lookup(const float3 *ramp, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device_inline float3 +rgb_ramp_lookup(const float3 *ramp, float f, bool interpolate, bool extrapolate, int table_size) { - if((f < 0.0f || f > 1.0f) && extrapolate) { - float3 t0, dy; - if(f < 0.0f) { - t0 = ramp[0]; - dy = t0 - ramp[1], - f = -f; - } - else { - t0 = ramp[table_size - 1]; - dy = t0 - ramp[table_size - 2]; - f = f - 1.0f; - } - return t0 + dy * f * (table_size - 1); - } - - f = clamp(f, 0.0f, 1.0f) * (table_size - 1); - - /* clamp int as well in case of NaN */ - int i = clamp(float_to_int(f), 0, table_size-1); - float t = f - (float)i; - - float3 result = ramp[i]; - - if(interpolate && t > 0.0f) { - result = (1.0f - t) * result + t * ramp[i + 1]; - } - - return result; + if ((f < 0.0f || f > 1.0f) && extrapolate) { + float3 t0, dy; + if (f < 0.0f) { + t0 = ramp[0]; + dy = t0 - ramp[1], f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0f; + } + return t0 + dy * f * (table_size - 1); + } + + f = clamp(f, 0.0f, 1.0f) * (table_size - 1); + + /* clamp int as well in case of NaN */ + int i = clamp(float_to_int(f), 0, table_size - 1); + float t = f - (float)i; + + float3 result = ramp[i]; + + if (interpolate && t > 0.0f) { + result = (1.0f - t) * result + t * ramp[i + 1]; + } + + return result; } -ccl_device float float_ramp_lookup(const float *ramp, - float f, - bool interpolate, - bool extrapolate, - int table_size) +ccl_device float float_ramp_lookup( + const float *ramp, float f, bool interpolate, bool extrapolate, int table_size) { - if((f < 0.0f || f > 1.0f) && extrapolate) { - float t0, dy; - if(f < 0.0f) { - t0 = ramp[0]; - dy = t0 - ramp[1], - f = -f; - } - else { - t0 = ramp[table_size - 1]; - dy = t0 - ramp[table_size - 2]; - f = f - 1.0f; - } - return t0 + dy * f * (table_size - 1); - } - - f = clamp(f, 0.0f, 1.0f) * (table_size - 1); - - /* clamp int as well in case of NaN */ - int i = clamp(float_to_int(f), 0, table_size-1); - float t = f - (float)i; - - float result = ramp[i]; - - if(interpolate && t > 0.0f) { - result = (1.0f - t) * result + t * ramp[i + 1]; - } - - return result; + if ((f < 0.0f || f > 1.0f) && extrapolate) { + float t0, dy; + if (f < 0.0f) { + t0 = ramp[0]; + dy = t0 - ramp[1], f = -f; + } + else { + t0 = ramp[table_size - 1]; + dy = t0 - ramp[table_size - 2]; + f = f - 1.0f; + } + return t0 + dy * f * (table_size - 1); + } + + f = clamp(f, 0.0f, 1.0f) * (table_size - 1); + + /* clamp int as well in case of NaN */ + int i = clamp(float_to_int(f), 0, table_size - 1); + float t = f - (float)i; + + float result = ramp[i]; + + if (interpolate && t > 0.0f) { + result = (1.0f - t) * result + t * ramp[i + 1]; + } + + return result; } CCL_NAMESPACE_END -#endif /* __SVM_RAMP_UTIL_H__ */ +#endif /* __SVM_RAMP_UTIL_H__ */ diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h index 1096aed2d97..f501252062e 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h @@ -16,38 +16,50 @@ CCL_NAMESPACE_BEGIN -ccl_device void svm_node_combine_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint hue_in, uint saturation_in, uint value_in, int *offset) +ccl_device void svm_node_combine_hsv(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint hue_in, + uint saturation_in, + uint value_in, + int *offset) { - uint4 node1 = read_node(kg, offset); - uint color_out = node1.y; + uint4 node1 = read_node(kg, offset); + uint color_out = node1.y; - float hue = stack_load_float(stack, hue_in); - float saturation = stack_load_float(stack, saturation_in); - float value = stack_load_float(stack, value_in); + float hue = stack_load_float(stack, hue_in); + float saturation = stack_load_float(stack, saturation_in); + float value = stack_load_float(stack, value_in); - /* Combine, and convert back to RGB */ - float3 color = hsv_to_rgb(make_float3(hue, saturation, value)); + /* Combine, and convert back to RGB */ + float3 color = hsv_to_rgb(make_float3(hue, saturation, value)); - if(stack_valid(color_out)) - stack_store_float3(stack, color_out, color); + if (stack_valid(color_out)) + stack_store_float3(stack, color_out, color); } -ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, uint color_in, uint hue_out, uint saturation_out, int *offset) +ccl_device void svm_node_separate_hsv(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint color_in, + uint hue_out, + uint saturation_out, + int *offset) { - uint4 node1 = read_node(kg, offset); - uint value_out = node1.y; + uint4 node1 = read_node(kg, offset); + uint value_out = node1.y; - float3 color = stack_load_float3(stack, color_in); + float3 color = stack_load_float3(stack, color_in); - /* Convert to HSV */ - color = rgb_to_hsv(color); + /* Convert to HSV */ + color = rgb_to_hsv(color); - if(stack_valid(hue_out)) - stack_store_float(stack, hue_out, color.x); - if(stack_valid(saturation_out)) - stack_store_float(stack, saturation_out, color.y); - if(stack_valid(value_out)) - stack_store_float(stack, value_out, color.z); + if (stack_valid(hue_out)) + stack_store_float(stack, hue_out, color.x); + if (stack_valid(saturation_out)) + stack_store_float(stack, saturation_out, color.y); + if (stack_valid(value_out)) + stack_store_float(stack, value_out, color.z); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_sepcomb_vector.h b/intern/cycles/kernel/svm/svm_sepcomb_vector.h index 0d85c0d6f1d..cbf77f1e640 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_vector.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_vector.h @@ -18,26 +18,28 @@ CCL_NAMESPACE_BEGIN /* Vector combine / separate, used for the RGB and XYZ nodes */ -ccl_device void svm_node_combine_vector(ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset) +ccl_device void svm_node_combine_vector( + ShaderData *sd, float *stack, uint in_offset, uint vector_index, uint out_offset) { - float vector = stack_load_float(stack, in_offset); + float vector = stack_load_float(stack, in_offset); - if(stack_valid(out_offset)) - stack_store_float(stack, out_offset+vector_index, vector); + if (stack_valid(out_offset)) + stack_store_float(stack, out_offset + vector_index, vector); } -ccl_device void svm_node_separate_vector(ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset) +ccl_device void svm_node_separate_vector( + ShaderData *sd, float *stack, uint ivector_offset, uint vector_index, uint out_offset) { - float3 vector = stack_load_float3(stack, ivector_offset); + float3 vector = stack_load_float3(stack, ivector_offset); - if(stack_valid(out_offset)) { - if(vector_index == 0) - stack_store_float(stack, out_offset, vector.x); - else if(vector_index == 1) - stack_store_float(stack, out_offset, vector.y); - else - stack_store_float(stack, out_offset, vector.z); - } + if (stack_valid(out_offset)) { + if (vector_index == 0) + stack_store_float(stack, out_offset, vector.x); + else if (vector_index == 1) + stack_store_float(stack, out_offset, vector.y); + else + stack_store_float(stack, out_offset, vector.z); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h index 092f6e045d6..50fe0c8232f 100644 --- a/intern/cycles/kernel/svm/svm_sky.h +++ b/intern/cycles/kernel/svm/svm_sky.h @@ -20,8 +20,8 @@ CCL_NAMESPACE_BEGIN ccl_device float sky_angle_between(float thetav, float phiv, float theta, float phi) { - float cospsi = sinf(thetav)*sinf(theta)*cosf(phi - phiv) + cosf(thetav)*cosf(theta); - return safe_acosf(cospsi); + float cospsi = sinf(thetav) * sinf(theta) * cosf(phi - phiv) + cosf(thetav) * cosf(theta); + return safe_acosf(cospsi); } /* @@ -30,36 +30,43 @@ ccl_device float sky_angle_between(float thetav, float phiv, float theta, float */ ccl_device float sky_perez_function(float *lam, float theta, float gamma) { - float ctheta = cosf(theta); - float cgamma = cosf(gamma); + float ctheta = cosf(theta); + float cgamma = cosf(gamma); - return (1.0f + lam[0]*expf(lam[1]/ctheta)) * (1.0f + lam[2]*expf(lam[3]*gamma) + lam[4]*cgamma*cgamma); + return (1.0f + lam[0] * expf(lam[1] / ctheta)) * + (1.0f + lam[2] * expf(lam[3] * gamma) + lam[4] * cgamma * cgamma); } -ccl_device float3 sky_radiance_old(KernelGlobals *kg, float3 dir, - float sunphi, float suntheta, - float radiance_x, float radiance_y, float radiance_z, - float *config_x, float *config_y, float *config_z) +ccl_device float3 sky_radiance_old(KernelGlobals *kg, + float3 dir, + float sunphi, + float suntheta, + float radiance_x, + float radiance_y, + float radiance_z, + float *config_x, + float *config_y, + float *config_z) { - /* convert vector to spherical coordinates */ - float2 spherical = direction_to_spherical(dir); - float theta = spherical.x; - float phi = spherical.y; + /* convert vector to spherical coordinates */ + float2 spherical = direction_to_spherical(dir); + float theta = spherical.x; + float phi = spherical.y; - /* angle between sun direction and dir */ - float gamma = sky_angle_between(theta, phi, suntheta, sunphi); + /* angle between sun direction and dir */ + float gamma = sky_angle_between(theta, phi, suntheta, sunphi); - /* clamp theta to horizon */ - theta = min(theta, M_PI_2_F - 0.001f); + /* clamp theta to horizon */ + theta = min(theta, M_PI_2_F - 0.001f); - /* compute xyY color space values */ - float x = radiance_y * sky_perez_function(config_y, theta, gamma); - float y = radiance_z * sky_perez_function(config_z, theta, gamma); - float Y = radiance_x * sky_perez_function(config_x, theta, gamma); + /* compute xyY color space values */ + float x = radiance_y * sky_perez_function(config_y, theta, gamma); + float y = radiance_z * sky_perez_function(config_z, theta, gamma); + float Y = radiance_x * sky_perez_function(config_x, theta, gamma); - /* convert to RGB */ - float3 xyz = xyY_to_xyz(x, y, Y); - return xyz_to_rgb(kg, xyz); + /* convert to RGB */ + float3 xyz = xyY_to_xyz(x, y, Y); + return xyz_to_rgb(kg, xyz); } /* @@ -68,118 +75,142 @@ ccl_device float3 sky_radiance_old(KernelGlobals *kg, float3 dir, */ ccl_device float sky_radiance_internal(float *configuration, float theta, float gamma) { - float ctheta = cosf(theta); - float cgamma = cosf(gamma); - - float expM = expf(configuration[4] * gamma); - float rayM = cgamma * cgamma; - float mieM = (1.0f + rayM) / powf((1.0f + configuration[8]*configuration[8] - 2.0f*configuration[8]*cgamma), 1.5f); - float zenith = sqrtf(ctheta); - - return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) * - (configuration[2] + configuration[3] * expM + configuration[5] * rayM + configuration[6] * mieM + configuration[7] * zenith); + float ctheta = cosf(theta); + float cgamma = cosf(gamma); + + float expM = expf(configuration[4] * gamma); + float rayM = cgamma * cgamma; + float mieM = (1.0f + rayM) / powf((1.0f + configuration[8] * configuration[8] - + 2.0f * configuration[8] * cgamma), + 1.5f); + float zenith = sqrtf(ctheta); + + return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) * + (configuration[2] + configuration[3] * expM + configuration[5] * rayM + + configuration[6] * mieM + configuration[7] * zenith); } -ccl_device float3 sky_radiance_new(KernelGlobals *kg, float3 dir, - float sunphi, float suntheta, - float radiance_x, float radiance_y, float radiance_z, - float *config_x, float *config_y, float *config_z) +ccl_device float3 sky_radiance_new(KernelGlobals *kg, + float3 dir, + float sunphi, + float suntheta, + float radiance_x, + float radiance_y, + float radiance_z, + float *config_x, + float *config_y, + float *config_z) { - /* convert vector to spherical coordinates */ - float2 spherical = direction_to_spherical(dir); - float theta = spherical.x; - float phi = spherical.y; + /* convert vector to spherical coordinates */ + float2 spherical = direction_to_spherical(dir); + float theta = spherical.x; + float phi = spherical.y; - /* angle between sun direction and dir */ - float gamma = sky_angle_between(theta, phi, suntheta, sunphi); + /* angle between sun direction and dir */ + float gamma = sky_angle_between(theta, phi, suntheta, sunphi); - /* clamp theta to horizon */ - theta = min(theta, M_PI_2_F - 0.001f); + /* clamp theta to horizon */ + theta = min(theta, M_PI_2_F - 0.001f); - /* compute xyz color space values */ - float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x; - float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y; - float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z; + /* compute xyz color space values */ + float x = sky_radiance_internal(config_x, theta, gamma) * radiance_x; + float y = sky_radiance_internal(config_y, theta, gamma) * radiance_y; + float z = sky_radiance_internal(config_z, theta, gamma) * radiance_z; - /* convert to RGB and adjust strength */ - return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F/683); + /* convert to RGB and adjust strength */ + return xyz_to_rgb(kg, make_float3(x, y, z)) * (M_2PI_F / 683); } -ccl_device void svm_node_tex_sky(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_tex_sky( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - /* Define variables */ - float sunphi, suntheta, radiance_x, radiance_y, radiance_z; - float config_x[9], config_y[9], config_z[9]; - - /* Load data */ - uint dir_offset = node.y; - uint out_offset = node.z; - int sky_model = node.w; - - float4 data = read_node_float(kg, offset); - sunphi = data.x; - suntheta = data.y; - radiance_x = data.z; - radiance_y = data.w; - - data = read_node_float(kg, offset); - radiance_z = data.x; - config_x[0] = data.y; - config_x[1] = data.z; - config_x[2] = data.w; - - data = read_node_float(kg, offset); - config_x[3] = data.x; - config_x[4] = data.y; - config_x[5] = data.z; - config_x[6] = data.w; - - data = read_node_float(kg, offset); - config_x[7] = data.x; - config_x[8] = data.y; - config_y[0] = data.z; - config_y[1] = data.w; - - data = read_node_float(kg, offset); - config_y[2] = data.x; - config_y[3] = data.y; - config_y[4] = data.z; - config_y[5] = data.w; - - data = read_node_float(kg, offset); - config_y[6] = data.x; - config_y[7] = data.y; - config_y[8] = data.z; - config_z[0] = data.w; - - data = read_node_float(kg, offset); - config_z[1] = data.x; - config_z[2] = data.y; - config_z[3] = data.z; - config_z[4] = data.w; - - data = read_node_float(kg, offset); - config_z[5] = data.x; - config_z[6] = data.y; - config_z[7] = data.z; - config_z[8] = data.w; - - float3 dir = stack_load_float3(stack, dir_offset); - float3 f; - - /* Compute Sky */ - if(sky_model == 0) { - f = sky_radiance_old(kg, dir, sunphi, suntheta, - radiance_x, radiance_y, radiance_z, - config_x, config_y, config_z); - } - else { - f = sky_radiance_new(kg, dir, sunphi, suntheta, - radiance_x, radiance_y, radiance_z, - config_x, config_y, config_z); - } - - stack_store_float3(stack, out_offset, f); + /* Define variables */ + float sunphi, suntheta, radiance_x, radiance_y, radiance_z; + float config_x[9], config_y[9], config_z[9]; + + /* Load data */ + uint dir_offset = node.y; + uint out_offset = node.z; + int sky_model = node.w; + + float4 data = read_node_float(kg, offset); + sunphi = data.x; + suntheta = data.y; + radiance_x = data.z; + radiance_y = data.w; + + data = read_node_float(kg, offset); + radiance_z = data.x; + config_x[0] = data.y; + config_x[1] = data.z; + config_x[2] = data.w; + + data = read_node_float(kg, offset); + config_x[3] = data.x; + config_x[4] = data.y; + config_x[5] = data.z; + config_x[6] = data.w; + + data = read_node_float(kg, offset); + config_x[7] = data.x; + config_x[8] = data.y; + config_y[0] = data.z; + config_y[1] = data.w; + + data = read_node_float(kg, offset); + config_y[2] = data.x; + config_y[3] = data.y; + config_y[4] = data.z; + config_y[5] = data.w; + + data = read_node_float(kg, offset); + config_y[6] = data.x; + config_y[7] = data.y; + config_y[8] = data.z; + config_z[0] = data.w; + + data = read_node_float(kg, offset); + config_z[1] = data.x; + config_z[2] = data.y; + config_z[3] = data.z; + config_z[4] = data.w; + + data = read_node_float(kg, offset); + config_z[5] = data.x; + config_z[6] = data.y; + config_z[7] = data.z; + config_z[8] = data.w; + + float3 dir = stack_load_float3(stack, dir_offset); + float3 f; + + /* Compute Sky */ + if (sky_model == 0) { + f = sky_radiance_old(kg, + dir, + sunphi, + suntheta, + radiance_x, + radiance_y, + radiance_z, + config_x, + config_y, + config_z); + } + else { + f = sky_radiance_new(kg, + dir, + sunphi, + suntheta, + radiance_x, + radiance_y, + radiance_z, + config_x, + config_y, + config_z); + } + + stack_store_float3(stack, out_offset, f); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index fe61292d0b0..1fb3e20f9e0 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -18,390 +18,381 @@ CCL_NAMESPACE_BEGIN /* Texture Coordinate Node */ -ccl_device void svm_node_tex_coord(KernelGlobals *kg, - ShaderData *sd, - int path_flag, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_coord( + KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset) { - float3 data; - uint type = node.y; - uint out_offset = node.z; - - switch(type) { - case NODE_TEXCO_OBJECT: { - data = sd->P; - if(node.w == 0) { - if(sd->object != OBJECT_NONE) { - object_inverse_position_transform(kg, sd, &data); - } - } - else { - Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); - data = transform_point(&tfm, data); - } - break; - } - case NODE_TEXCO_NORMAL: { - data = sd->N; - object_inverse_normal_transform(kg, sd, &data); - break; - } - case NODE_TEXCO_CAMERA: { - Transform tfm = kernel_data.cam.worldtocamera; - - if(sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P); - else - data = transform_point(&tfm, sd->P + camera_position(kg)); - break; - } - case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P); - else - data = camera_world_to_ndc(kg, sd, sd->P); - data.z = 0.0f; - break; - } - case NODE_TEXCO_REFLECTION: { - if(sd->object != OBJECT_NONE) - data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; - else - data = sd->I; - break; - } - case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, sd->object); - break; - } - case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, sd->object); - break; - } - case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P; + float3 data; + uint type = node.y; + uint out_offset = node.z; + + switch (type) { + case NODE_TEXCO_OBJECT: { + data = sd->P; + if (node.w == 0) { + if (sd->object != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &data); + } + } + else { + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + data = transform_point(&tfm, data); + } + break; + } + case NODE_TEXCO_NORMAL: { + data = sd->N; + object_inverse_normal_transform(kg, sd, &data); + break; + } + case NODE_TEXCO_CAMERA: { + Transform tfm = kernel_data.cam.worldtocamera; + + if (sd->object != OBJECT_NONE) + data = transform_point(&tfm, sd->P); + else + data = transform_point(&tfm, sd->P + camera_position(kg)); + break; + } + case NODE_TEXCO_WINDOW: { + if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && + kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, sd->ray_P); + else + data = camera_world_to_ndc(kg, sd, sd->P); + data.z = 0.0f; + break; + } + case NODE_TEXCO_REFLECTION: { + if (sd->object != OBJECT_NONE) + data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I; + else + data = sd->I; + break; + } + case NODE_TEXCO_DUPLI_GENERATED: { + data = object_dupli_generated(kg, sd->object); + break; + } + case NODE_TEXCO_DUPLI_UV: { + data = object_dupli_uv(kg, sd->object); + break; + } + case NODE_TEXCO_VOLUME_GENERATED: { + data = sd->P; #ifdef __VOLUME__ - if(sd->object != OBJECT_NONE) - data = volume_normalized_position(kg, sd, data); + if (sd->object != OBJECT_NONE) + data = volume_normalized_position(kg, sd, data); #endif - break; - } - } + break; + } + } - stack_store_float3(stack, out_offset, data); + stack_store_float3(stack, out_offset, data); } -ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, - ShaderData *sd, - int path_flag, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_coord_bump_dx( + KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset) { #ifdef __RAY_DIFFERENTIALS__ - float3 data; - uint type = node.y; - uint out_offset = node.z; - - switch(type) { - case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dx; - if(node.w == 0) { - if(sd->object != OBJECT_NONE) { - object_inverse_position_transform(kg, sd, &data); - } - } - else { - Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); - data = transform_point(&tfm, data); - } - break; - } - case NODE_TEXCO_NORMAL: { - data = sd->N; - object_inverse_normal_transform(kg, sd, &data); - break; - } - case NODE_TEXCO_CAMERA: { - Transform tfm = kernel_data.cam.worldtocamera; - - if(sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dx); - else - data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg)); - break; - } - case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx); - else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); - data.z = 0.0f; - break; - } - case NODE_TEXCO_REFLECTION: { - if(sd->object != OBJECT_NONE) - data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; - else - data = sd->I; - break; - } - case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, sd->object); - break; - } - case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, sd->object); - break; - } - case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dx; - -#ifdef __VOLUME__ - if(sd->object != OBJECT_NONE) - data = volume_normalized_position(kg, sd, data); -#endif - break; - } - } - - stack_store_float3(stack, out_offset, data); + float3 data; + uint type = node.y; + uint out_offset = node.z; + + switch (type) { + case NODE_TEXCO_OBJECT: { + data = sd->P + sd->dP.dx; + if (node.w == 0) { + if (sd->object != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &data); + } + } + else { + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + data = transform_point(&tfm, data); + } + break; + } + case NODE_TEXCO_NORMAL: { + data = sd->N; + object_inverse_normal_transform(kg, sd, &data); + break; + } + case NODE_TEXCO_CAMERA: { + Transform tfm = kernel_data.cam.worldtocamera; + + if (sd->object != OBJECT_NONE) + data = transform_point(&tfm, sd->P + sd->dP.dx); + else + data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg)); + break; + } + case NODE_TEXCO_WINDOW: { + if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && + kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx); + else + data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); + data.z = 0.0f; + break; + } + case NODE_TEXCO_REFLECTION: { + if (sd->object != OBJECT_NONE) + data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I; + else + data = sd->I; + break; + } + case NODE_TEXCO_DUPLI_GENERATED: { + data = object_dupli_generated(kg, sd->object); + break; + } + case NODE_TEXCO_DUPLI_UV: { + data = object_dupli_uv(kg, sd->object); + break; + } + case NODE_TEXCO_VOLUME_GENERATED: { + data = sd->P + sd->dP.dx; + +# ifdef __VOLUME__ + if (sd->object != OBJECT_NONE) + data = volume_normalized_position(kg, sd, data); +# endif + break; + } + } + + stack_store_float3(stack, out_offset, data); #else - svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); + svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); #endif } -ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, - ShaderData *sd, - int path_flag, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_coord_bump_dy( + KernelGlobals *kg, ShaderData *sd, int path_flag, float *stack, uint4 node, int *offset) { #ifdef __RAY_DIFFERENTIALS__ - float3 data; - uint type = node.y; - uint out_offset = node.z; - - switch(type) { - case NODE_TEXCO_OBJECT: { - data = sd->P + sd->dP.dy; - if(node.w == 0) { - if(sd->object != OBJECT_NONE) { - object_inverse_position_transform(kg, sd, &data); - } - } - else { - Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); - data = transform_point(&tfm, data); - } - break; - } - case NODE_TEXCO_NORMAL: { - data = sd->N; - object_inverse_normal_transform(kg, sd, &data); - break; - } - case NODE_TEXCO_CAMERA: { - Transform tfm = kernel_data.cam.worldtocamera; - - if(sd->object != OBJECT_NONE) - data = transform_point(&tfm, sd->P + sd->dP.dy); - else - data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg)); - break; - } - case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) - data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy); - else - data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); - data.z = 0.0f; - break; - } - case NODE_TEXCO_REFLECTION: { - if(sd->object != OBJECT_NONE) - data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; - else - data = sd->I; - break; - } - case NODE_TEXCO_DUPLI_GENERATED: { - data = object_dupli_generated(kg, sd->object); - break; - } - case NODE_TEXCO_DUPLI_UV: { - data = object_dupli_uv(kg, sd->object); - break; - } - case NODE_TEXCO_VOLUME_GENERATED: { - data = sd->P + sd->dP.dy; - -#ifdef __VOLUME__ - if(sd->object != OBJECT_NONE) - data = volume_normalized_position(kg, sd, data); -#endif - break; - } - } - - stack_store_float3(stack, out_offset, data); + float3 data; + uint type = node.y; + uint out_offset = node.z; + + switch (type) { + case NODE_TEXCO_OBJECT: { + data = sd->P + sd->dP.dy; + if (node.w == 0) { + if (sd->object != OBJECT_NONE) { + object_inverse_position_transform(kg, sd, &data); + } + } + else { + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + data = transform_point(&tfm, data); + } + break; + } + case NODE_TEXCO_NORMAL: { + data = sd->N; + object_inverse_normal_transform(kg, sd, &data); + break; + } + case NODE_TEXCO_CAMERA: { + Transform tfm = kernel_data.cam.worldtocamera; + + if (sd->object != OBJECT_NONE) + data = transform_point(&tfm, sd->P + sd->dP.dy); + else + data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg)); + break; + } + case NODE_TEXCO_WINDOW: { + if ((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && + kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy); + else + data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); + data.z = 0.0f; + break; + } + case NODE_TEXCO_REFLECTION: { + if (sd->object != OBJECT_NONE) + data = 2.0f * dot(sd->N, sd->I) * sd->N - sd->I; + else + data = sd->I; + break; + } + case NODE_TEXCO_DUPLI_GENERATED: { + data = object_dupli_generated(kg, sd->object); + break; + } + case NODE_TEXCO_DUPLI_UV: { + data = object_dupli_uv(kg, sd->object); + break; + } + case NODE_TEXCO_VOLUME_GENERATED: { + data = sd->P + sd->dP.dy; + +# ifdef __VOLUME__ + if (sd->object != OBJECT_NONE) + data = volume_normalized_position(kg, sd, data); +# endif + break; + } + } + + stack_store_float3(stack, out_offset, data); #else - svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); + svm_node_tex_coord(kg, sd, path_flag, stack, node, offset); #endif } ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - uint color_offset, strength_offset, normal_offset, space; - decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space); - - float3 color = stack_load_float3(stack, color_offset); - color = 2.0f*make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f); - - bool is_backfacing = (sd->flag & SD_BACKFACING) != 0; - float3 N; - - if(space == NODE_NORMAL_MAP_TANGENT) { - /* tangent space */ - if(sd->object == OBJECT_NONE) { - stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); - return; - } - - /* first try to get tangent attribute */ - const AttributeDescriptor attr = find_attribute(kg, sd, node.z); - const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); - const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL); - - if(attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND || attr_normal.offset == ATTR_STD_NOT_FOUND) { - stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); - return; - } - - /* get _unnormalized_ interpolated normal and tangent */ - float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL); - float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL); - float3 normal; - - if(sd->shader & SHADER_SMOOTH_NORMAL) { - normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL); - } - else { - normal = sd->Ng; - - /* the normal is already inverted, which is too soon for the math here */ - if(is_backfacing) { - normal = -normal; - } - - object_inverse_normal_transform(kg, sd, &normal); - } - - /* apply normal map */ - float3 B = sign * cross(normal, tangent); - N = safe_normalize(color.x * tangent + color.y * B + color.z * normal); - - /* transform to world space */ - object_normal_transform(kg, sd, &N); - } - else { - /* strange blender convention */ - if(space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) { - color.y = -color.y; - color.z = -color.z; - } - - /* object, world space */ - N = color; - - if(space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT) - object_normal_transform(kg, sd, &N); - else - N = safe_normalize(N); - } - - /* invert normal for backfacing polygons */ - if(is_backfacing) { - N = -N; - } - - float strength = stack_load_float(stack, strength_offset); - - if(strength != 1.0f) { - strength = max(strength, 0.0f); - N = safe_normalize(sd->N + (N - sd->N)*strength); - } - - N = ensure_valid_reflection(sd->Ng, sd->I, N); - - if(is_zero(N)) { - N = sd->N; - } - - stack_store_float3(stack, normal_offset, N); + uint color_offset, strength_offset, normal_offset, space; + decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space); + + float3 color = stack_load_float3(stack, color_offset); + color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f); + + bool is_backfacing = (sd->flag & SD_BACKFACING) != 0; + float3 N; + + if (space == NODE_NORMAL_MAP_TANGENT) { + /* tangent space */ + if (sd->object == OBJECT_NONE) { + stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); + return; + } + + /* first try to get tangent attribute */ + const AttributeDescriptor attr = find_attribute(kg, sd, node.z); + const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); + const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL); + + if (attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND || + attr_normal.offset == ATTR_STD_NOT_FOUND) { + stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); + return; + } + + /* get _unnormalized_ interpolated normal and tangent */ + float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL); + float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL); + float3 normal; + + if (sd->shader & SHADER_SMOOTH_NORMAL) { + normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL); + } + else { + normal = sd->Ng; + + /* the normal is already inverted, which is too soon for the math here */ + if (is_backfacing) { + normal = -normal; + } + + object_inverse_normal_transform(kg, sd, &normal); + } + + /* apply normal map */ + float3 B = sign * cross(normal, tangent); + N = safe_normalize(color.x * tangent + color.y * B + color.z * normal); + + /* transform to world space */ + object_normal_transform(kg, sd, &N); + } + else { + /* strange blender convention */ + if (space == NODE_NORMAL_MAP_BLENDER_OBJECT || space == NODE_NORMAL_MAP_BLENDER_WORLD) { + color.y = -color.y; + color.z = -color.z; + } + + /* object, world space */ + N = color; + + if (space == NODE_NORMAL_MAP_OBJECT || space == NODE_NORMAL_MAP_BLENDER_OBJECT) + object_normal_transform(kg, sd, &N); + else + N = safe_normalize(N); + } + + /* invert normal for backfacing polygons */ + if (is_backfacing) { + N = -N; + } + + float strength = stack_load_float(stack, strength_offset); + + if (strength != 1.0f) { + strength = max(strength, 0.0f); + N = safe_normalize(sd->N + (N - sd->N) * strength); + } + + N = ensure_valid_reflection(sd->Ng, sd->I, N); + + if (is_zero(N)) { + N = sd->N; + } + + stack_store_float3(stack, normal_offset, N); } ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - uint tangent_offset, direction_type, axis; - decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL); - - float3 tangent; - float3 attribute_value; - const AttributeDescriptor desc = find_attribute(kg, sd, node.z); - if (desc.offset != ATTR_STD_NOT_FOUND) { - if(desc.type == NODE_ATTR_FLOAT2) { - float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL); - attribute_value.x = value.x; - attribute_value.y = value.y; - attribute_value.z = 0.0f; - } - else { - attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); - } - } - - - if(direction_type == NODE_TANGENT_UVMAP) { - /* UV map */ - if(desc.offset == ATTR_STD_NOT_FOUND) - tangent = make_float3(0.0f, 0.0f, 0.0f); - else - tangent = attribute_value; - } - else { - /* radial */ - float3 generated; - - if(desc.offset == ATTR_STD_NOT_FOUND) - generated = sd->P; - else - generated = attribute_value; - - if(axis == NODE_TANGENT_AXIS_X) - tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f)); - else if(axis == NODE_TANGENT_AXIS_Y) - tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f)); - else - tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f); - } - - object_normal_transform(kg, sd, &tangent); - tangent = cross(sd->N, normalize(cross(tangent, sd->N))); - stack_store_float3(stack, tangent_offset, tangent); + uint tangent_offset, direction_type, axis; + decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL); + + float3 tangent; + float3 attribute_value; + const AttributeDescriptor desc = find_attribute(kg, sd, node.z); + if (desc.offset != ATTR_STD_NOT_FOUND) { + if (desc.type == NODE_ATTR_FLOAT2) { + float2 value = primitive_surface_attribute_float2(kg, sd, desc, NULL, NULL); + attribute_value.x = value.x; + attribute_value.y = value.y; + attribute_value.z = 0.0f; + } + else { + attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + } + } + + if (direction_type == NODE_TANGENT_UVMAP) { + /* UV map */ + if (desc.offset == ATTR_STD_NOT_FOUND) + tangent = make_float3(0.0f, 0.0f, 0.0f); + else + tangent = attribute_value; + } + else { + /* radial */ + float3 generated; + + if (desc.offset == ATTR_STD_NOT_FOUND) + generated = sd->P; + else + generated = attribute_value; + + if (axis == NODE_TANGENT_AXIS_X) + tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f)); + else if (axis == NODE_TANGENT_AXIS_Y) + tangent = make_float3(-(generated.z - 0.5f), 0.0f, (generated.x - 0.5f)); + else + tangent = make_float3(-(generated.y - 0.5f), (generated.x - 0.5f), 0.0f); + } + + object_normal_transform(kg, sd, &tangent); + tangent = cross(sd->N, normalize(cross(tangent, sd->N))); + stack_store_float3(stack, tangent_offset, tangent); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h index 57729817bdc..290aa85c831 100644 --- a/intern/cycles/kernel/svm/svm_texture.h +++ b/intern/cycles/kernel/svm/svm_texture.h @@ -20,44 +20,44 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline float noise_turbulence(float3 p, float octaves, int hard) { - float fscale = 1.0f; - float amp = 1.0f; - float sum = 0.0f; - int i, n; + float fscale = 1.0f; + float amp = 1.0f; + float sum = 0.0f; + int i, n; - octaves = clamp(octaves, 0.0f, 16.0f); - n = float_to_int(octaves); + octaves = clamp(octaves, 0.0f, 16.0f); + n = float_to_int(octaves); - for(i = 0; i <= n; i++) { - float t = noise(fscale*p); + for (i = 0; i <= n; i++) { + float t = noise(fscale * p); - if(hard) - t = fabsf(2.0f*t - 1.0f); + if (hard) + t = fabsf(2.0f * t - 1.0f); - sum += t*amp; - amp *= 0.5f; - fscale *= 2.0f; - } + sum += t * amp; + amp *= 0.5f; + fscale *= 2.0f; + } - float rmd = octaves - floorf(octaves); + float rmd = octaves - floorf(octaves); - if(rmd != 0.0f) { - float t = noise(fscale*p); + if (rmd != 0.0f) { + float t = noise(fscale * p); - if(hard) - t = fabsf(2.0f*t - 1.0f); + if (hard) + t = fabsf(2.0f * t - 1.0f); - float sum2 = sum + t*amp; + float sum2 = sum + t * amp; - sum *= ((float)(1 << n)/(float)((1 << (n+1)) - 1)); - sum2 *= ((float)(1 << (n+1))/(float)((1 << (n+2)) - 1)); + sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1)); + sum2 *= ((float)(1 << (n + 1)) / (float)((1 << (n + 2)) - 1)); - return (1.0f - rmd)*sum + rmd*sum2; - } - else { - sum *= ((float)(1 << n)/(float)((1 << (n+1)) - 1)); - return sum; - } + return (1.0f - rmd) * sum + rmd * sum2; + } + else { + sum *= ((float)(1 << n) / (float)((1 << (n + 1)) - 1)); + return sum; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 8b15d7bf9f4..d31e4f93696 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -38,498 +38,505 @@ CCL_NAMESPACE_BEGIN * * Lower the number of group more often the node is used. */ -#define NODE_GROUP_LEVEL_0 0 -#define NODE_GROUP_LEVEL_1 1 -#define NODE_GROUP_LEVEL_2 2 -#define NODE_GROUP_LEVEL_3 3 -#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3 - -#define NODE_FEATURE_VOLUME (1 << 0) -#define NODE_FEATURE_HAIR (1 << 1) -#define NODE_FEATURE_BUMP (1 << 2) +#define NODE_GROUP_LEVEL_0 0 +#define NODE_GROUP_LEVEL_1 1 +#define NODE_GROUP_LEVEL_2 2 +#define NODE_GROUP_LEVEL_3 3 +#define NODE_GROUP_LEVEL_MAX NODE_GROUP_LEVEL_3 + +#define NODE_FEATURE_VOLUME (1 << 0) +#define NODE_FEATURE_HAIR (1 << 1) +#define NODE_FEATURE_BUMP (1 << 2) #define NODE_FEATURE_BUMP_STATE (1 << 3) /* TODO(sergey): Consider using something like ((uint)(-1)). * Need to check carefully operand types around usage of this * define first. */ -#define NODE_FEATURE_ALL (NODE_FEATURE_VOLUME|NODE_FEATURE_HAIR|NODE_FEATURE_BUMP|NODE_FEATURE_BUMP_STATE) +#define NODE_FEATURE_ALL \ + (NODE_FEATURE_VOLUME | NODE_FEATURE_HAIR | NODE_FEATURE_BUMP | NODE_FEATURE_BUMP_STATE) typedef enum ShaderNodeType { - NODE_END = 0, - NODE_CLOSURE_BSDF, - NODE_CLOSURE_EMISSION, - NODE_CLOSURE_BACKGROUND, - NODE_CLOSURE_SET_WEIGHT, - NODE_CLOSURE_WEIGHT, - NODE_MIX_CLOSURE, - NODE_JUMP_IF_ZERO, - NODE_JUMP_IF_ONE, - NODE_TEX_IMAGE, - NODE_TEX_IMAGE_BOX, - NODE_TEX_SKY, - NODE_GEOMETRY, - NODE_GEOMETRY_DUPLI, - NODE_LIGHT_PATH, - NODE_VALUE_F, - NODE_VALUE_V, - NODE_MIX, - NODE_ATTR, - NODE_CONVERT, - NODE_FRESNEL, - NODE_WIREFRAME, - NODE_WAVELENGTH, - NODE_BLACKBODY, - NODE_EMISSION_WEIGHT, - NODE_TEX_GRADIENT, - NODE_TEX_VORONOI, - NODE_TEX_MUSGRAVE, - NODE_TEX_WAVE, - NODE_TEX_MAGIC, - NODE_TEX_NOISE, - NODE_SHADER_JUMP, - NODE_SET_DISPLACEMENT, - NODE_GEOMETRY_BUMP_DX, - NODE_GEOMETRY_BUMP_DY, - NODE_SET_BUMP, - NODE_MATH, - NODE_VECTOR_MATH, - NODE_VECTOR_TRANSFORM, - NODE_MAPPING, - NODE_TEX_COORD, - NODE_TEX_COORD_BUMP_DX, - NODE_TEX_COORD_BUMP_DY, - NODE_ATTR_BUMP_DX, - NODE_ATTR_BUMP_DY, - NODE_TEX_ENVIRONMENT, - NODE_CLOSURE_HOLDOUT, - NODE_LAYER_WEIGHT, - NODE_CLOSURE_VOLUME, - NODE_SEPARATE_VECTOR, - NODE_COMBINE_VECTOR, - NODE_SEPARATE_HSV, - NODE_COMBINE_HSV, - NODE_HSV, - NODE_CAMERA, - NODE_INVERT, - NODE_NORMAL, - NODE_GAMMA, - NODE_TEX_CHECKER, - NODE_BRIGHTCONTRAST, - NODE_RGB_RAMP, - NODE_RGB_CURVES, - NODE_VECTOR_CURVES, - NODE_MIN_MAX, - NODE_LIGHT_FALLOFF, - NODE_OBJECT_INFO, - NODE_PARTICLE_INFO, - NODE_TEX_BRICK, - NODE_CLOSURE_SET_NORMAL, - NODE_AMBIENT_OCCLUSION, - NODE_TANGENT, - NODE_NORMAL_MAP, - NODE_HAIR_INFO, - NODE_UVMAP, - NODE_TEX_VOXEL, - NODE_ENTER_BUMP_EVAL, - NODE_LEAVE_BUMP_EVAL, - NODE_BEVEL, - NODE_DISPLACEMENT, - NODE_VECTOR_DISPLACEMENT, - NODE_PRINCIPLED_VOLUME, - NODE_IES, + NODE_END = 0, + NODE_CLOSURE_BSDF, + NODE_CLOSURE_EMISSION, + NODE_CLOSURE_BACKGROUND, + NODE_CLOSURE_SET_WEIGHT, + NODE_CLOSURE_WEIGHT, + NODE_MIX_CLOSURE, + NODE_JUMP_IF_ZERO, + NODE_JUMP_IF_ONE, + NODE_TEX_IMAGE, + NODE_TEX_IMAGE_BOX, + NODE_TEX_SKY, + NODE_GEOMETRY, + NODE_GEOMETRY_DUPLI, + NODE_LIGHT_PATH, + NODE_VALUE_F, + NODE_VALUE_V, + NODE_MIX, + NODE_ATTR, + NODE_CONVERT, + NODE_FRESNEL, + NODE_WIREFRAME, + NODE_WAVELENGTH, + NODE_BLACKBODY, + NODE_EMISSION_WEIGHT, + NODE_TEX_GRADIENT, + NODE_TEX_VORONOI, + NODE_TEX_MUSGRAVE, + NODE_TEX_WAVE, + NODE_TEX_MAGIC, + NODE_TEX_NOISE, + NODE_SHADER_JUMP, + NODE_SET_DISPLACEMENT, + NODE_GEOMETRY_BUMP_DX, + NODE_GEOMETRY_BUMP_DY, + NODE_SET_BUMP, + NODE_MATH, + NODE_VECTOR_MATH, + NODE_VECTOR_TRANSFORM, + NODE_MAPPING, + NODE_TEX_COORD, + NODE_TEX_COORD_BUMP_DX, + NODE_TEX_COORD_BUMP_DY, + NODE_ATTR_BUMP_DX, + NODE_ATTR_BUMP_DY, + NODE_TEX_ENVIRONMENT, + NODE_CLOSURE_HOLDOUT, + NODE_LAYER_WEIGHT, + NODE_CLOSURE_VOLUME, + NODE_SEPARATE_VECTOR, + NODE_COMBINE_VECTOR, + NODE_SEPARATE_HSV, + NODE_COMBINE_HSV, + NODE_HSV, + NODE_CAMERA, + NODE_INVERT, + NODE_NORMAL, + NODE_GAMMA, + NODE_TEX_CHECKER, + NODE_BRIGHTCONTRAST, + NODE_RGB_RAMP, + NODE_RGB_CURVES, + NODE_VECTOR_CURVES, + NODE_MIN_MAX, + NODE_LIGHT_FALLOFF, + NODE_OBJECT_INFO, + NODE_PARTICLE_INFO, + NODE_TEX_BRICK, + NODE_CLOSURE_SET_NORMAL, + NODE_AMBIENT_OCCLUSION, + NODE_TANGENT, + NODE_NORMAL_MAP, + NODE_HAIR_INFO, + NODE_UVMAP, + NODE_TEX_VOXEL, + NODE_ENTER_BUMP_EVAL, + NODE_LEAVE_BUMP_EVAL, + NODE_BEVEL, + NODE_DISPLACEMENT, + NODE_VECTOR_DISPLACEMENT, + NODE_PRINCIPLED_VOLUME, + NODE_IES, } ShaderNodeType; typedef enum NodeAttributeType { - NODE_ATTR_FLOAT = 0, - NODE_ATTR_FLOAT2, - NODE_ATTR_FLOAT3, - NODE_ATTR_MATRIX + NODE_ATTR_FLOAT = 0, + NODE_ATTR_FLOAT2, + NODE_ATTR_FLOAT3, + NODE_ATTR_MATRIX } NodeAttributeType; typedef enum NodeGeometry { - NODE_GEOM_P = 0, - NODE_GEOM_N, - NODE_GEOM_T, - NODE_GEOM_I, - NODE_GEOM_Ng, - NODE_GEOM_uv + NODE_GEOM_P = 0, + NODE_GEOM_N, + NODE_GEOM_T, + NODE_GEOM_I, + NODE_GEOM_Ng, + NODE_GEOM_uv } NodeGeometry; typedef enum NodeObjectInfo { - NODE_INFO_OB_LOCATION, - NODE_INFO_OB_INDEX, - NODE_INFO_MAT_INDEX, - NODE_INFO_OB_RANDOM + NODE_INFO_OB_LOCATION, + NODE_INFO_OB_INDEX, + NODE_INFO_MAT_INDEX, + NODE_INFO_OB_RANDOM } NodeObjectInfo; typedef enum NodeParticleInfo { - NODE_INFO_PAR_INDEX, - NODE_INFO_PAR_RANDOM, - NODE_INFO_PAR_AGE, - NODE_INFO_PAR_LIFETIME, - NODE_INFO_PAR_LOCATION, - NODE_INFO_PAR_ROTATION, - NODE_INFO_PAR_SIZE, - NODE_INFO_PAR_VELOCITY, - NODE_INFO_PAR_ANGULAR_VELOCITY + NODE_INFO_PAR_INDEX, + NODE_INFO_PAR_RANDOM, + NODE_INFO_PAR_AGE, + NODE_INFO_PAR_LIFETIME, + NODE_INFO_PAR_LOCATION, + NODE_INFO_PAR_ROTATION, + NODE_INFO_PAR_SIZE, + NODE_INFO_PAR_VELOCITY, + NODE_INFO_PAR_ANGULAR_VELOCITY } NodeParticleInfo; typedef enum NodeHairInfo { - NODE_INFO_CURVE_IS_STRAND, - NODE_INFO_CURVE_INTERCEPT, - NODE_INFO_CURVE_THICKNESS, - /*fade for minimum hair width transpency*/ - /*NODE_INFO_CURVE_FADE,*/ - NODE_INFO_CURVE_TANGENT_NORMAL, - NODE_INFO_CURVE_RANDOM, + NODE_INFO_CURVE_IS_STRAND, + NODE_INFO_CURVE_INTERCEPT, + NODE_INFO_CURVE_THICKNESS, + /*fade for minimum hair width transpency*/ + /*NODE_INFO_CURVE_FADE,*/ + NODE_INFO_CURVE_TANGENT_NORMAL, + NODE_INFO_CURVE_RANDOM, } NodeHairInfo; typedef enum NodeLightPath { - NODE_LP_camera = 0, - NODE_LP_shadow, - NODE_LP_diffuse, - NODE_LP_glossy, - NODE_LP_singular, - NODE_LP_reflection, - NODE_LP_transmission, - NODE_LP_volume_scatter, - NODE_LP_backfacing, - NODE_LP_ray_length, - NODE_LP_ray_depth, - NODE_LP_ray_diffuse, - NODE_LP_ray_glossy, - NODE_LP_ray_transparent, - NODE_LP_ray_transmission, + NODE_LP_camera = 0, + NODE_LP_shadow, + NODE_LP_diffuse, + NODE_LP_glossy, + NODE_LP_singular, + NODE_LP_reflection, + NODE_LP_transmission, + NODE_LP_volume_scatter, + NODE_LP_backfacing, + NODE_LP_ray_length, + NODE_LP_ray_depth, + NODE_LP_ray_diffuse, + NODE_LP_ray_glossy, + NODE_LP_ray_transparent, + NODE_LP_ray_transmission, } NodeLightPath; typedef enum NodeLightFalloff { - NODE_LIGHT_FALLOFF_QUADRATIC, - NODE_LIGHT_FALLOFF_LINEAR, - NODE_LIGHT_FALLOFF_CONSTANT + NODE_LIGHT_FALLOFF_QUADRATIC, + NODE_LIGHT_FALLOFF_LINEAR, + NODE_LIGHT_FALLOFF_CONSTANT } NodeLightFalloff; typedef enum NodeTexCoord { - NODE_TEXCO_NORMAL, - NODE_TEXCO_OBJECT, - NODE_TEXCO_CAMERA, - NODE_TEXCO_WINDOW, - NODE_TEXCO_REFLECTION, - NODE_TEXCO_DUPLI_GENERATED, - NODE_TEXCO_DUPLI_UV, - NODE_TEXCO_VOLUME_GENERATED + NODE_TEXCO_NORMAL, + NODE_TEXCO_OBJECT, + NODE_TEXCO_CAMERA, + NODE_TEXCO_WINDOW, + NODE_TEXCO_REFLECTION, + NODE_TEXCO_DUPLI_GENERATED, + NODE_TEXCO_DUPLI_UV, + NODE_TEXCO_VOLUME_GENERATED } NodeTexCoord; typedef enum NodeMix { - NODE_MIX_BLEND = 0, - NODE_MIX_ADD, - NODE_MIX_MUL, - NODE_MIX_SUB, - NODE_MIX_SCREEN, - NODE_MIX_DIV, - NODE_MIX_DIFF, - NODE_MIX_DARK, - NODE_MIX_LIGHT, - NODE_MIX_OVERLAY, - NODE_MIX_DODGE, - NODE_MIX_BURN, - NODE_MIX_HUE, - NODE_MIX_SAT, - NODE_MIX_VAL, - NODE_MIX_COLOR, - NODE_MIX_SOFT, - NODE_MIX_LINEAR, - NODE_MIX_CLAMP /* used for the clamp UI option */ + NODE_MIX_BLEND = 0, + NODE_MIX_ADD, + NODE_MIX_MUL, + NODE_MIX_SUB, + NODE_MIX_SCREEN, + NODE_MIX_DIV, + NODE_MIX_DIFF, + NODE_MIX_DARK, + NODE_MIX_LIGHT, + NODE_MIX_OVERLAY, + NODE_MIX_DODGE, + NODE_MIX_BURN, + NODE_MIX_HUE, + NODE_MIX_SAT, + NODE_MIX_VAL, + NODE_MIX_COLOR, + NODE_MIX_SOFT, + NODE_MIX_LINEAR, + NODE_MIX_CLAMP /* used for the clamp UI option */ } NodeMix; typedef enum NodeMath { - NODE_MATH_ADD, - NODE_MATH_SUBTRACT, - NODE_MATH_MULTIPLY, - NODE_MATH_DIVIDE, - NODE_MATH_SINE, - NODE_MATH_COSINE, - NODE_MATH_TANGENT, - NODE_MATH_ARCSINE, - NODE_MATH_ARCCOSINE, - NODE_MATH_ARCTANGENT, - NODE_MATH_POWER, - NODE_MATH_LOGARITHM, - NODE_MATH_MINIMUM, - NODE_MATH_MAXIMUM, - NODE_MATH_ROUND, - NODE_MATH_LESS_THAN, - NODE_MATH_GREATER_THAN, - NODE_MATH_MODULO, - NODE_MATH_ABSOLUTE, - NODE_MATH_ARCTAN2, - NODE_MATH_FLOOR, - NODE_MATH_CEIL, - NODE_MATH_FRACT, - NODE_MATH_SQRT, - NODE_MATH_CLAMP /* used for the clamp UI option */ + NODE_MATH_ADD, + NODE_MATH_SUBTRACT, + NODE_MATH_MULTIPLY, + NODE_MATH_DIVIDE, + NODE_MATH_SINE, + NODE_MATH_COSINE, + NODE_MATH_TANGENT, + NODE_MATH_ARCSINE, + NODE_MATH_ARCCOSINE, + NODE_MATH_ARCTANGENT, + NODE_MATH_POWER, + NODE_MATH_LOGARITHM, + NODE_MATH_MINIMUM, + NODE_MATH_MAXIMUM, + NODE_MATH_ROUND, + NODE_MATH_LESS_THAN, + NODE_MATH_GREATER_THAN, + NODE_MATH_MODULO, + NODE_MATH_ABSOLUTE, + NODE_MATH_ARCTAN2, + NODE_MATH_FLOOR, + NODE_MATH_CEIL, + NODE_MATH_FRACT, + NODE_MATH_SQRT, + NODE_MATH_CLAMP /* used for the clamp UI option */ } NodeMath; typedef enum NodeVectorMath { - NODE_VECTOR_MATH_ADD, - NODE_VECTOR_MATH_SUBTRACT, - NODE_VECTOR_MATH_AVERAGE, - NODE_VECTOR_MATH_DOT_PRODUCT, - NODE_VECTOR_MATH_CROSS_PRODUCT, - NODE_VECTOR_MATH_NORMALIZE + NODE_VECTOR_MATH_ADD, + NODE_VECTOR_MATH_SUBTRACT, + NODE_VECTOR_MATH_AVERAGE, + NODE_VECTOR_MATH_DOT_PRODUCT, + NODE_VECTOR_MATH_CROSS_PRODUCT, + NODE_VECTOR_MATH_NORMALIZE } NodeVectorMath; typedef enum NodeVectorTransformType { - NODE_VECTOR_TRANSFORM_TYPE_VECTOR, - NODE_VECTOR_TRANSFORM_TYPE_POINT, - NODE_VECTOR_TRANSFORM_TYPE_NORMAL + NODE_VECTOR_TRANSFORM_TYPE_VECTOR, + NODE_VECTOR_TRANSFORM_TYPE_POINT, + NODE_VECTOR_TRANSFORM_TYPE_NORMAL } NodeVectorTransformType; typedef enum NodeVectorTransformConvertSpace { - NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD, - NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT, - NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA + NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD, + NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT, + NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA } NodeVectorTransformConvertSpace; typedef enum NodeConvert { - NODE_CONVERT_FV, - NODE_CONVERT_FI, - NODE_CONVERT_CF, - NODE_CONVERT_CI, - NODE_CONVERT_VF, - NODE_CONVERT_VI, - NODE_CONVERT_IF, - NODE_CONVERT_IV + NODE_CONVERT_FV, + NODE_CONVERT_FI, + NODE_CONVERT_CF, + NODE_CONVERT_CI, + NODE_CONVERT_VF, + NODE_CONVERT_VI, + NODE_CONVERT_IF, + NODE_CONVERT_IV } NodeConvert; typedef enum NodeMusgraveType { - NODE_MUSGRAVE_MULTIFRACTAL, - NODE_MUSGRAVE_FBM, - NODE_MUSGRAVE_HYBRID_MULTIFRACTAL, - NODE_MUSGRAVE_RIDGED_MULTIFRACTAL, - NODE_MUSGRAVE_HETERO_TERRAIN + NODE_MUSGRAVE_MULTIFRACTAL, + NODE_MUSGRAVE_FBM, + NODE_MUSGRAVE_HYBRID_MULTIFRACTAL, + NODE_MUSGRAVE_RIDGED_MULTIFRACTAL, + NODE_MUSGRAVE_HETERO_TERRAIN } NodeMusgraveType; -typedef enum NodeWaveType { - NODE_WAVE_BANDS, - NODE_WAVE_RINGS -} NodeWaveType; +typedef enum NodeWaveType { NODE_WAVE_BANDS, NODE_WAVE_RINGS } NodeWaveType; typedef enum NodeWaveProfiles { - NODE_WAVE_PROFILE_SIN, - NODE_WAVE_PROFILE_SAW, + NODE_WAVE_PROFILE_SIN, + NODE_WAVE_PROFILE_SAW, } NodeWaveProfile; -typedef enum NodeSkyType { - NODE_SKY_OLD, - NODE_SKY_NEW -} NodeSkyType; +typedef enum NodeSkyType { NODE_SKY_OLD, NODE_SKY_NEW } NodeSkyType; typedef enum NodeGradientType { - NODE_BLEND_LINEAR, - NODE_BLEND_QUADRATIC, - NODE_BLEND_EASING, - NODE_BLEND_DIAGONAL, - NODE_BLEND_RADIAL, - NODE_BLEND_QUADRATIC_SPHERE, - NODE_BLEND_SPHERICAL + NODE_BLEND_LINEAR, + NODE_BLEND_QUADRATIC, + NODE_BLEND_EASING, + NODE_BLEND_DIAGONAL, + NODE_BLEND_RADIAL, + NODE_BLEND_QUADRATIC_SPHERE, + NODE_BLEND_SPHERICAL } NodeGradientType; typedef enum NodeVoronoiColoring { - NODE_VORONOI_INTENSITY, - NODE_VORONOI_CELLS + NODE_VORONOI_INTENSITY, + NODE_VORONOI_CELLS } NodeVoronoiColoring; typedef enum NodeVoronoiDistanceMetric { - NODE_VORONOI_DISTANCE, - NODE_VORONOI_MANHATTAN, - NODE_VORONOI_CHEBYCHEV, - NODE_VORONOI_MINKOWSKI + NODE_VORONOI_DISTANCE, + NODE_VORONOI_MANHATTAN, + NODE_VORONOI_CHEBYCHEV, + NODE_VORONOI_MINKOWSKI } NodeVoronoiDistanceMetric; typedef enum NodeVoronoiFeature { - NODE_VORONOI_F1, - NODE_VORONOI_F2, - NODE_VORONOI_F3, - NODE_VORONOI_F4, - NODE_VORONOI_F2F1 + NODE_VORONOI_F1, + NODE_VORONOI_F2, + NODE_VORONOI_F3, + NODE_VORONOI_F4, + NODE_VORONOI_F2F1 } NodeVoronoiFeature; typedef enum NodeBlendWeightType { - NODE_LAYER_WEIGHT_FRESNEL, - NODE_LAYER_WEIGHT_FACING + NODE_LAYER_WEIGHT_FRESNEL, + NODE_LAYER_WEIGHT_FACING } NodeBlendWeightType; typedef enum NodeTangentDirectionType { - NODE_TANGENT_RADIAL, - NODE_TANGENT_UVMAP + NODE_TANGENT_RADIAL, + NODE_TANGENT_UVMAP } NodeTangentDirectionType; typedef enum NodeTangentAxis { - NODE_TANGENT_AXIS_X, - NODE_TANGENT_AXIS_Y, - NODE_TANGENT_AXIS_Z + NODE_TANGENT_AXIS_X, + NODE_TANGENT_AXIS_Y, + NODE_TANGENT_AXIS_Z } NodeTangentAxis; typedef enum NodeNormalMapSpace { - NODE_NORMAL_MAP_TANGENT, - NODE_NORMAL_MAP_OBJECT, - NODE_NORMAL_MAP_WORLD, - NODE_NORMAL_MAP_BLENDER_OBJECT, - NODE_NORMAL_MAP_BLENDER_WORLD, + NODE_NORMAL_MAP_TANGENT, + NODE_NORMAL_MAP_OBJECT, + NODE_NORMAL_MAP_WORLD, + NODE_NORMAL_MAP_BLENDER_OBJECT, + NODE_NORMAL_MAP_BLENDER_WORLD, } NodeNormalMapSpace; typedef enum NodeImageColorSpace { - NODE_COLOR_SPACE_NONE = 0, - NODE_COLOR_SPACE_COLOR = 1, + NODE_COLOR_SPACE_NONE = 0, + NODE_COLOR_SPACE_COLOR = 1, } NodeImageColorSpace; typedef enum NodeImageProjection { - NODE_IMAGE_PROJ_FLAT = 0, - NODE_IMAGE_PROJ_BOX = 1, - NODE_IMAGE_PROJ_SPHERE = 2, - NODE_IMAGE_PROJ_TUBE = 3, + NODE_IMAGE_PROJ_FLAT = 0, + NODE_IMAGE_PROJ_BOX = 1, + NODE_IMAGE_PROJ_SPHERE = 2, + NODE_IMAGE_PROJ_TUBE = 3, } NodeImageProjection; typedef enum NodeEnvironmentProjection { - NODE_ENVIRONMENT_EQUIRECTANGULAR = 0, - NODE_ENVIRONMENT_MIRROR_BALL = 1, + NODE_ENVIRONMENT_EQUIRECTANGULAR = 0, + NODE_ENVIRONMENT_MIRROR_BALL = 1, } NodeEnvironmentProjection; typedef enum NodeBumpOffset { - NODE_BUMP_OFFSET_CENTER, - NODE_BUMP_OFFSET_DX, - NODE_BUMP_OFFSET_DY, + NODE_BUMP_OFFSET_CENTER, + NODE_BUMP_OFFSET_DX, + NODE_BUMP_OFFSET_DY, } NodeBumpOffset; typedef enum NodeTexVoxelSpace { - NODE_TEX_VOXEL_SPACE_OBJECT = 0, - NODE_TEX_VOXEL_SPACE_WORLD = 1, + NODE_TEX_VOXEL_SPACE_OBJECT = 0, + NODE_TEX_VOXEL_SPACE_WORLD = 1, } NodeTexVoxelSpace; typedef enum NodeAO { - NODE_AO_ONLY_LOCAL = (1 << 0), - NODE_AO_INSIDE = (1 << 1), - NODE_AO_GLOBAL_RADIUS = (1 << 2), + NODE_AO_ONLY_LOCAL = (1 << 0), + NODE_AO_INSIDE = (1 << 1), + NODE_AO_GLOBAL_RADIUS = (1 << 2), } NodeAO; typedef enum ShaderType { - SHADER_TYPE_SURFACE, - SHADER_TYPE_VOLUME, - SHADER_TYPE_DISPLACEMENT, - SHADER_TYPE_BUMP, + SHADER_TYPE_SURFACE, + SHADER_TYPE_VOLUME, + SHADER_TYPE_DISPLACEMENT, + SHADER_TYPE_BUMP, } ShaderType; typedef enum NodePrincipledHairParametrization { - NODE_PRINCIPLED_HAIR_REFLECTANCE = 0, - NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1, - NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2, - NODE_PRINCIPLED_HAIR_NUM, + NODE_PRINCIPLED_HAIR_REFLECTANCE = 0, + NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION = 1, + NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION = 2, + NODE_PRINCIPLED_HAIR_NUM, } NodePrincipledHairParametrization; /* Closure */ typedef enum ClosureType { - /* Special type, flags generic node as a non-BSDF. */ - CLOSURE_NONE_ID, - - CLOSURE_BSDF_ID, - - /* Diffuse */ - CLOSURE_BSDF_DIFFUSE_ID, - CLOSURE_BSDF_OREN_NAYAR_ID, - CLOSURE_BSDF_DIFFUSE_RAMP_ID, - CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID, - CLOSURE_BSDF_PRINCIPLED_SHEEN_ID, - CLOSURE_BSDF_DIFFUSE_TOON_ID, - - /* Glossy */ - CLOSURE_BSDF_REFLECTION_ID, - CLOSURE_BSDF_MICROFACET_GGX_ID, - CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID, - CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID, - CLOSURE_BSDF_MICROFACET_BECKMANN_ID, - CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID, - CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID, - CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID, - CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID, - CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID, - CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID, - CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_FRESNEL_ID, - CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID, - CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID, - CLOSURE_BSDF_ASHIKHMIN_VELVET_ID, - CLOSURE_BSDF_PHONG_RAMP_ID, - CLOSURE_BSDF_GLOSSY_TOON_ID, - CLOSURE_BSDF_HAIR_REFLECTION_ID, - - /* Transmission */ - CLOSURE_BSDF_TRANSLUCENT_ID, - CLOSURE_BSDF_REFRACTION_ID, - CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID, - CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID, - CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID, - CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID, - CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID, - CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID, - CLOSURE_BSDF_SHARP_GLASS_ID, - CLOSURE_BSDF_HAIR_PRINCIPLED_ID, - CLOSURE_BSDF_HAIR_TRANSMISSION_ID, - - /* Special cases */ - CLOSURE_BSDF_BSSRDF_ID, - CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID, - CLOSURE_BSDF_TRANSPARENT_ID, - - /* BSSRDF */ - CLOSURE_BSSRDF_CUBIC_ID, - CLOSURE_BSSRDF_GAUSSIAN_ID, - CLOSURE_BSSRDF_PRINCIPLED_ID, - CLOSURE_BSSRDF_BURLEY_ID, - CLOSURE_BSSRDF_RANDOM_WALK_ID, - CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID, - - /* Other */ - CLOSURE_HOLDOUT_ID, - - /* Volume */ - CLOSURE_VOLUME_ID, - CLOSURE_VOLUME_ABSORPTION_ID, - CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, - - CLOSURE_BSDF_PRINCIPLED_ID, - - NBUILTIN_CLOSURES + /* Special type, flags generic node as a non-BSDF. */ + CLOSURE_NONE_ID, + + CLOSURE_BSDF_ID, + + /* Diffuse */ + CLOSURE_BSDF_DIFFUSE_ID, + CLOSURE_BSDF_OREN_NAYAR_ID, + CLOSURE_BSDF_DIFFUSE_RAMP_ID, + CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID, + CLOSURE_BSDF_PRINCIPLED_SHEEN_ID, + CLOSURE_BSDF_DIFFUSE_TOON_ID, + + /* Glossy */ + CLOSURE_BSDF_REFLECTION_ID, + CLOSURE_BSDF_MICROFACET_GGX_ID, + CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID, + CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID, + CLOSURE_BSDF_MICROFACET_BECKMANN_ID, + CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID, + CLOSURE_BSDF_MICROFACET_MULTI_GGX_FRESNEL_ID, + CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID, + CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID, + CLOSURE_BSDF_MICROFACET_GGX_ANISO_FRESNEL_ID, + CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID, + CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_FRESNEL_ID, + CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID, + CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID, + CLOSURE_BSDF_ASHIKHMIN_VELVET_ID, + CLOSURE_BSDF_PHONG_RAMP_ID, + CLOSURE_BSDF_GLOSSY_TOON_ID, + CLOSURE_BSDF_HAIR_REFLECTION_ID, + + /* Transmission */ + CLOSURE_BSDF_TRANSLUCENT_ID, + CLOSURE_BSDF_REFRACTION_ID, + CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID, + CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID, + CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID, + CLOSURE_BSDF_MICROFACET_BECKMANN_GLASS_ID, + CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID, + CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID, + CLOSURE_BSDF_SHARP_GLASS_ID, + CLOSURE_BSDF_HAIR_PRINCIPLED_ID, + CLOSURE_BSDF_HAIR_TRANSMISSION_ID, + + /* Special cases */ + CLOSURE_BSDF_BSSRDF_ID, + CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID, + CLOSURE_BSDF_TRANSPARENT_ID, + + /* BSSRDF */ + CLOSURE_BSSRDF_CUBIC_ID, + CLOSURE_BSSRDF_GAUSSIAN_ID, + CLOSURE_BSSRDF_PRINCIPLED_ID, + CLOSURE_BSSRDF_BURLEY_ID, + CLOSURE_BSSRDF_RANDOM_WALK_ID, + CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID, + + /* Other */ + CLOSURE_HOLDOUT_ID, + + /* Volume */ + CLOSURE_VOLUME_ID, + CLOSURE_VOLUME_ABSORPTION_ID, + CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, + + CLOSURE_BSDF_PRINCIPLED_ID, + + NBUILTIN_CLOSURES } ClosureType; /* watch this, being lazy with memory usage */ #define CLOSURE_IS_BSDF(type) (type <= CLOSURE_BSDF_TRANSPARENT_ID) -#define CLOSURE_IS_BSDF_DIFFUSE(type) (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID) -#define CLOSURE_IS_BSDF_GLOSSY(type) ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID )|| (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID)) -#define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID) -#define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID) -#define CLOSURE_IS_BSDF_SINGULAR(type) (type == CLOSURE_BSDF_REFLECTION_ID || \ - type == CLOSURE_BSDF_REFRACTION_ID || \ - type == CLOSURE_BSDF_TRANSPARENT_ID) +#define CLOSURE_IS_BSDF_DIFFUSE(type) \ + (type >= CLOSURE_BSDF_DIFFUSE_ID && type <= CLOSURE_BSDF_DIFFUSE_TOON_ID) +#define CLOSURE_IS_BSDF_GLOSSY(type) \ + ((type >= CLOSURE_BSDF_REFLECTION_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) || \ + (type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID)) +#define CLOSURE_IS_BSDF_TRANSMISSION(type) \ + (type >= CLOSURE_BSDF_TRANSLUCENT_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID) +#define CLOSURE_IS_BSDF_BSSRDF(type) \ + (type == CLOSURE_BSDF_BSSRDF_ID || type == CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID) +#define CLOSURE_IS_BSDF_SINGULAR(type) \ + (type == CLOSURE_BSDF_REFLECTION_ID || type == CLOSURE_BSDF_REFRACTION_ID || \ + type == CLOSURE_BSDF_TRANSPARENT_ID) #define CLOSURE_IS_BSDF_TRANSPARENT(type) (type == CLOSURE_BSDF_TRANSPARENT_ID) -#define CLOSURE_IS_BSDF_MULTISCATTER(type) (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID ||\ - type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \ - type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) -#define CLOSURE_IS_BSDF_MICROFACET(type) ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) ||\ - (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) ||\ - (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID)) +#define CLOSURE_IS_BSDF_MULTISCATTER(type) \ + (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID || \ + type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ANISO_ID || \ + type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) +#define CLOSURE_IS_BSDF_MICROFACET(type) \ + ((type >= CLOSURE_BSDF_MICROFACET_GGX_ID && type <= CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID) || \ + (type >= CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID && \ + type <= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID) || \ + (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID)) #define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) -#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) -#define CLOSURE_IS_DISK_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID) -#define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) +#define CLOSURE_IS_BSSRDF(type) \ + (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID) +#define CLOSURE_IS_DISK_BSSRDF(type) \ + (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_BURLEY_ID) +#define CLOSURE_IS_VOLUME(type) \ + (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) #define CLOSURE_IS_VOLUME_SCATTER(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) #define CLOSURE_IS_VOLUME_ABSORPTION(type) (type == CLOSURE_VOLUME_ABSORPTION_ID) #define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID) #define CLOSURE_IS_PHASE(type) (type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) -#define CLOSURE_IS_GLASS(type) (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID) +#define CLOSURE_IS_GLASS(type) \ + (type >= CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID && type <= CLOSURE_BSDF_SHARP_GLASS_ID) #define CLOSURE_IS_PRINCIPLED(type) (type == CLOSURE_BSDF_PRINCIPLED_ID) #define CLOSURE_WEIGHT_CUTOFF 1e-5f CCL_NAMESPACE_END -#endif /* __SVM_TYPES_H__ */ +#endif /* __SVM_TYPES_H__ */ diff --git a/intern/cycles/kernel/svm/svm_value.h b/intern/cycles/kernel/svm/svm_value.h index 062aee2956e..5b76f2c8832 100644 --- a/intern/cycles/kernel/svm/svm_value.h +++ b/intern/cycles/kernel/svm/svm_value.h @@ -18,18 +18,21 @@ CCL_NAMESPACE_BEGIN /* Value Nodes */ -ccl_device void svm_node_value_f(KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset) +ccl_device void svm_node_value_f( + KernelGlobals *kg, ShaderData *sd, float *stack, uint ivalue, uint out_offset) { - stack_store_float(stack, out_offset, __uint_as_float(ivalue)); + stack_store_float(stack, out_offset, __uint_as_float(ivalue)); } -ccl_device void svm_node_value_v(KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset) +ccl_device void svm_node_value_v( + KernelGlobals *kg, ShaderData *sd, float *stack, uint out_offset, int *offset) { - /* read extra data */ - uint4 node1 = read_node(kg, offset); - float3 p = make_float3(__uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w)); + /* read extra data */ + uint4 node1 = read_node(kg, offset); + float3 p = make_float3( + __uint_as_float(node1.y), __uint_as_float(node1.z), __uint_as_float(node1.w)); - stack_store_float3(stack, out_offset, p); + stack_store_float3(stack, out_offset, p); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h index f6ec36ba41f..7ec0f07f2e4 100644 --- a/intern/cycles/kernel/svm/svm_vector_transform.h +++ b/intern/cycles/kernel/svm/svm_vector_transform.h @@ -18,83 +18,90 @@ CCL_NAMESPACE_BEGIN /* Vector Transform */ -ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device void svm_node_vector_transform(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint4 node) { - uint itype, ifrom, ito; - uint vector_in, vector_out; + uint itype, ifrom, ito; + uint vector_in, vector_out; - decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL); - decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL); + decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL); + decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL); - float3 in = stack_load_float3(stack, vector_in); + float3 in = stack_load_float3(stack, vector_in); - NodeVectorTransformType type = (NodeVectorTransformType)itype; - NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom; - NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito; + NodeVectorTransformType type = (NodeVectorTransformType)itype; + NodeVectorTransformConvertSpace from = (NodeVectorTransformConvertSpace)ifrom; + NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito; - Transform tfm; - bool is_object = (sd->object != OBJECT_NONE); - bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL); + Transform tfm; + bool is_object = (sd->object != OBJECT_NONE); + bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || + type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL); - /* From world */ - if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) { - if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { - tfm = kernel_data.cam.worldtocamera; - if(is_direction) - in = transform_direction(&tfm, in); - else - in = transform_point(&tfm, in); - } - else if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) { - if(is_direction) - object_inverse_dir_transform(kg, sd, &in); - else - object_inverse_position_transform(kg, sd, &in); - } - } + /* From world */ + if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD) { + if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { + tfm = kernel_data.cam.worldtocamera; + if (is_direction) + in = transform_direction(&tfm, in); + else + in = transform_point(&tfm, in); + } + else if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) { + if (is_direction) + object_inverse_dir_transform(kg, sd, &in); + else + object_inverse_position_transform(kg, sd, &in); + } + } - /* From camera */ - else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { - if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) { - tfm = kernel_data.cam.cameratoworld; - if(is_direction) - in = transform_direction(&tfm, in); - else - in = transform_point(&tfm, in); - } - if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) { - if(is_direction) - object_inverse_dir_transform(kg, sd, &in); - else - object_inverse_position_transform(kg, sd, &in); - } - } + /* From camera */ + else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { + if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || + to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) { + tfm = kernel_data.cam.cameratoworld; + if (is_direction) + in = transform_direction(&tfm, in); + else + in = transform_point(&tfm, in); + } + if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT && is_object) { + if (is_direction) + object_inverse_dir_transform(kg, sd, &in); + else + object_inverse_position_transform(kg, sd, &in); + } + } - /* From object */ - else if(from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) { - if((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) && is_object) { - if(is_direction) - object_dir_transform(kg, sd, &in); - else - object_position_transform(kg, sd, &in); - } - if(to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { - tfm = kernel_data.cam.worldtocamera; - if(is_direction) - in = transform_direction(&tfm, in); - else - in = transform_point(&tfm, in); - } - } + /* From object */ + else if (from == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_OBJECT) { + if ((to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_WORLD || + to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) && + is_object) { + if (is_direction) + object_dir_transform(kg, sd, &in); + else + object_position_transform(kg, sd, &in); + } + if (to == NODE_VECTOR_TRANSFORM_CONVERT_SPACE_CAMERA) { + tfm = kernel_data.cam.worldtocamera; + if (is_direction) + in = transform_direction(&tfm, in); + else + in = transform_point(&tfm, in); + } + } - /* Normalize Normal */ - if(type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL) - in = normalize(in); + /* Normalize Normal */ + if (type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL) + in = normalize(in); - /* Output */ - if(stack_valid(vector_out)) { - stack_store_float3(stack, vector_out, in); - } + /* Output */ + if (stack_valid(vector_out)) { + stack_store_float3(stack, vector_out, in); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index d661df54ead..c311aefaf38 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -18,143 +18,167 @@ CCL_NAMESPACE_BEGIN /* Voronoi */ -ccl_device void voronoi_neighbors(float3 p, NodeVoronoiDistanceMetric distance, float e, float da[4], float3 pa[4]) +ccl_device void voronoi_neighbors( + float3 p, NodeVoronoiDistanceMetric distance, float e, float da[4], float3 pa[4]) { - /* Compute the distance to and the position of the closest neighbors to p. - * - * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern). - * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will - * contain the distance to the closest point and its coordinates respectively. - */ - - da[0] = 1e10f; - da[1] = 1e10f; - da[2] = 1e10f; - da[3] = 1e10f; - - pa[0] = make_float3(0.0f, 0.0f, 0.0f); - pa[1] = make_float3(0.0f, 0.0f, 0.0f); - pa[2] = make_float3(0.0f, 0.0f, 0.0f); - pa[3] = make_float3(0.0f, 0.0f, 0.0f); - - int3 xyzi = quick_floor_to_int3(p); - - for(int xx = -1; xx <= 1; xx++) { - for(int yy = -1; yy <= 1; yy++) { - for(int zz = -1; zz <= 1; zz++) { - int3 ip = xyzi + make_int3(xx, yy, zz); - float3 fp = make_float3(ip.x, ip.y, ip.z); - float3 vp = fp + cellnoise3(fp); - - float d; - switch(distance) { - case NODE_VORONOI_DISTANCE: - d = len_squared(p - vp); - break; - case NODE_VORONOI_MANHATTAN: - d = reduce_add(fabs(vp - p)); - break; - case NODE_VORONOI_CHEBYCHEV: - d = max3(fabs(vp - p)); - break; - case NODE_VORONOI_MINKOWSKI: { - float3 n = fabs(vp - p); - if(e == 0.5f) { - d = sqr(reduce_add(sqrt(n))); - } - else { - d = powf(reduce_add(pow3(n, e)), 1.0f/e); - } - break; - } - } - - /* To keep the shortest four distances and associated points we have to keep them in sorted order. */ - if(d < da[0]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = da[0]; - da[0] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = pa[0]; - pa[0] = vp; - } - else if(d < da[1]) { - da[3] = da[2]; - da[2] = da[1]; - da[1] = d; - - pa[3] = pa[2]; - pa[2] = pa[1]; - pa[1] = vp; - } - else if(d < da[2]) { - da[3] = da[2]; - da[2] = d; - - pa[3] = pa[2]; - pa[2] = vp; - } - else if(d < da[3]) { - da[3] = d; - pa[3] = vp; - } - } - } - } + /* Compute the distance to and the position of the closest neighbors to p. + * + * The neighbors are randomly placed, 1 each in a 3x3x3 grid (Worley pattern). + * The distances and points are returned in ascending order, i.e. da[0] and pa[0] will + * contain the distance to the closest point and its coordinates respectively. + */ + + da[0] = 1e10f; + da[1] = 1e10f; + da[2] = 1e10f; + da[3] = 1e10f; + + pa[0] = make_float3(0.0f, 0.0f, 0.0f); + pa[1] = make_float3(0.0f, 0.0f, 0.0f); + pa[2] = make_float3(0.0f, 0.0f, 0.0f); + pa[3] = make_float3(0.0f, 0.0f, 0.0f); + + int3 xyzi = quick_floor_to_int3(p); + + for (int xx = -1; xx <= 1; xx++) { + for (int yy = -1; yy <= 1; yy++) { + for (int zz = -1; zz <= 1; zz++) { + int3 ip = xyzi + make_int3(xx, yy, zz); + float3 fp = make_float3(ip.x, ip.y, ip.z); + float3 vp = fp + cellnoise3(fp); + + float d; + switch (distance) { + case NODE_VORONOI_DISTANCE: + d = len_squared(p - vp); + break; + case NODE_VORONOI_MANHATTAN: + d = reduce_add(fabs(vp - p)); + break; + case NODE_VORONOI_CHEBYCHEV: + d = max3(fabs(vp - p)); + break; + case NODE_VORONOI_MINKOWSKI: { + float3 n = fabs(vp - p); + if (e == 0.5f) { + d = sqr(reduce_add(sqrt(n))); + } + else { + d = powf(reduce_add(pow3(n, e)), 1.0f / e); + } + break; + } + } + + /* To keep the shortest four distances and associated points we have to keep them in sorted order. */ + if (d < da[0]) { + da[3] = da[2]; + da[2] = da[1]; + da[1] = da[0]; + da[0] = d; + + pa[3] = pa[2]; + pa[2] = pa[1]; + pa[1] = pa[0]; + pa[0] = vp; + } + else if (d < da[1]) { + da[3] = da[2]; + da[2] = da[1]; + da[1] = d; + + pa[3] = pa[2]; + pa[2] = pa[1]; + pa[1] = vp; + } + else if (d < da[2]) { + da[3] = da[2]; + da[2] = d; + + pa[3] = pa[2]; + pa[2] = vp; + } + else if (d < da[3]) { + da[3] = d; + pa[3] = vp; + } + } + } + } } -ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_tex_voronoi( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint4 node2 = read_node(kg, offset); - - uint co_offset, coloring, distance, feature; - uint scale_offset, e_offset, fac_offset, color_offset; - - decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature); - decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset); - - float3 co = stack_load_float3(stack, co_offset); - float scale = stack_load_float_default(stack, scale_offset, node2.x); - float exponent = stack_load_float_default(stack, e_offset, node2.y); - - float dist[4]; - float3 neighbor[4]; - voronoi_neighbors(co*scale, (NodeVoronoiDistanceMetric)distance, exponent, dist, neighbor); - - float3 color; - float fac; - if(coloring == NODE_VORONOI_INTENSITY) { - switch(feature) { - case NODE_VORONOI_F1: fac = dist[0]; break; - case NODE_VORONOI_F2: fac = dist[1]; break; - case NODE_VORONOI_F3: fac = dist[2]; break; - case NODE_VORONOI_F4: fac = dist[3]; break; - case NODE_VORONOI_F2F1: fac = dist[1] - dist[0]; break; - } - - color = make_float3(fac, fac, fac); - } - else { - /* NODE_VORONOI_CELLS */ - switch(feature) { - case NODE_VORONOI_F1: color = neighbor[0]; break; - case NODE_VORONOI_F2: color = neighbor[1]; break; - case NODE_VORONOI_F3: color = neighbor[2]; break; - case NODE_VORONOI_F4: color = neighbor[3]; break; - /* Usefulness of this vector is questionable. Note F2 >= F1 but the - * individual vector components might not be. */ - case NODE_VORONOI_F2F1: color = fabs(neighbor[1] - neighbor[0]); break; - } - - color = cellnoise3(color); - fac = average(color); - } - - if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, fac); - if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, color); + uint4 node2 = read_node(kg, offset); + + uint co_offset, coloring, distance, feature; + uint scale_offset, e_offset, fac_offset, color_offset; + + decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature); + decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset); + + float3 co = stack_load_float3(stack, co_offset); + float scale = stack_load_float_default(stack, scale_offset, node2.x); + float exponent = stack_load_float_default(stack, e_offset, node2.y); + + float dist[4]; + float3 neighbor[4]; + voronoi_neighbors(co * scale, (NodeVoronoiDistanceMetric)distance, exponent, dist, neighbor); + + float3 color; + float fac; + if (coloring == NODE_VORONOI_INTENSITY) { + switch (feature) { + case NODE_VORONOI_F1: + fac = dist[0]; + break; + case NODE_VORONOI_F2: + fac = dist[1]; + break; + case NODE_VORONOI_F3: + fac = dist[2]; + break; + case NODE_VORONOI_F4: + fac = dist[3]; + break; + case NODE_VORONOI_F2F1: + fac = dist[1] - dist[0]; + break; + } + + color = make_float3(fac, fac, fac); + } + else { + /* NODE_VORONOI_CELLS */ + switch (feature) { + case NODE_VORONOI_F1: + color = neighbor[0]; + break; + case NODE_VORONOI_F2: + color = neighbor[1]; + break; + case NODE_VORONOI_F3: + color = neighbor[2]; + break; + case NODE_VORONOI_F4: + color = neighbor[3]; + break; + /* Usefulness of this vector is questionable. Note F2 >= F1 but the + * individual vector components might not be. */ + case NODE_VORONOI_F2F1: + color = fabs(neighbor[1] - neighbor[0]); + break; + } + + color = cellnoise3(color); + fac = average(color); + } + + if (stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, fac); + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, color); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index 43b433683e0..26d8cc71d3b 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -19,37 +19,34 @@ CCL_NAMESPACE_BEGIN /* TODO(sergey): Think of making it more generic volume-type attribute * sampler. */ -ccl_device void svm_node_tex_voxel(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node, - int *offset) +ccl_device void svm_node_tex_voxel( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint co_offset, density_out_offset, color_out_offset, space; - decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space); + uint co_offset, density_out_offset, color_out_offset, space; + decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space); #ifdef __VOLUME__ - int id = node.y; - float3 co = stack_load_float3(stack, co_offset); - if(space == NODE_TEX_VOXEL_SPACE_OBJECT) { - co = volume_normalized_position(kg, sd, co); - } - else { - kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD); - Transform tfm; - tfm.x = read_node_float(kg, offset); - tfm.y = read_node_float(kg, offset); - tfm.z = read_node_float(kg, offset); - co = transform_point(&tfm, co); - } + int id = node.y; + float3 co = stack_load_float3(stack, co_offset); + if (space == NODE_TEX_VOXEL_SPACE_OBJECT) { + co = volume_normalized_position(kg, sd, co); + } + else { + kernel_assert(space == NODE_TEX_VOXEL_SPACE_WORLD); + Transform tfm; + tfm.x = read_node_float(kg, offset); + tfm.y = read_node_float(kg, offset); + tfm.z = read_node_float(kg, offset); + co = transform_point(&tfm, co); + } - float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE); + float4 r = kernel_tex_image_interp_3d(kg, id, co.x, co.y, co.z, INTERPOLATION_NONE); #else - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #endif - if(stack_valid(density_out_offset)) - stack_store_float(stack, density_out_offset, r.w); - if(stack_valid(color_out_offset)) - stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z)); + if (stack_valid(density_out_offset)) + stack_store_float(stack, density_out_offset, r.w); + if (stack_valid(color_out_offset)) + stack_store_float3(stack, color_out_offset, make_float3(r.x, r.y, r.z)); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h index 80b63dc80cd..003ad7dc63a 100644 --- a/intern/cycles/kernel/svm/svm_wave.h +++ b/intern/cycles/kernel/svm/svm_wave.h @@ -18,48 +18,58 @@ CCL_NAMESPACE_BEGIN /* Wave */ -ccl_device_noinline float svm_wave(NodeWaveType type, NodeWaveProfile profile, float3 p, float detail, float distortion, float dscale) +ccl_device_noinline float svm_wave(NodeWaveType type, + NodeWaveProfile profile, + float3 p, + float detail, + float distortion, + float dscale) { - float n; + float n; - if(type == NODE_WAVE_BANDS) - n = (p.x + p.y + p.z) * 10.0f; - else /* NODE_WAVE_RINGS */ - n = len(p) * 20.0f; + if (type == NODE_WAVE_BANDS) + n = (p.x + p.y + p.z) * 10.0f; + else /* NODE_WAVE_RINGS */ + n = len(p) * 20.0f; - if(distortion != 0.0f) - n += distortion * noise_turbulence(p*dscale, detail, 0); + if (distortion != 0.0f) + n += distortion * noise_turbulence(p * dscale, detail, 0); - if(profile == NODE_WAVE_PROFILE_SIN) { - return 0.5f + 0.5f * sinf(n); - } - else { /* NODE_WAVE_PROFILE_SAW */ - n /= M_2PI_F; - n -= (int) n; - return (n < 0.0f)? n + 1.0f: n; - } + if (profile == NODE_WAVE_PROFILE_SIN) { + return 0.5f + 0.5f * sinf(n); + } + else { /* NODE_WAVE_PROFILE_SAW */ + n /= M_2PI_F; + n -= (int)n; + return (n < 0.0f) ? n + 1.0f : n; + } } -ccl_device void svm_node_tex_wave(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) +ccl_device void svm_node_tex_wave( + KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint4 node2 = read_node(kg, offset); + uint4 node2 = read_node(kg, offset); - uint type; - uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset, fac_offset; + uint type; + uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset, + fac_offset; - decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset); - decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset); + decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset); + decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset); - float3 co = stack_load_float3(stack, co_offset); - float scale = stack_load_float_default(stack, scale_offset, node2.x); - float detail = stack_load_float_default(stack, detail_offset, node2.y); - float distortion = stack_load_float_default(stack, distortion_offset, node2.z); - float dscale = stack_load_float_default(stack, dscale_offset, node2.w); + float3 co = stack_load_float3(stack, co_offset); + float scale = stack_load_float_default(stack, scale_offset, node2.x); + float detail = stack_load_float_default(stack, detail_offset, node2.y); + float distortion = stack_load_float_default(stack, distortion_offset, node2.z); + float dscale = stack_load_float_default(stack, dscale_offset, node2.w); - float f = svm_wave((NodeWaveType)type, (NodeWaveProfile)node.w, co*scale, detail, distortion, dscale); + float f = svm_wave( + (NodeWaveType)type, (NodeWaveProfile)node.w, co * scale, detail, distortion, dscale); - if(stack_valid(fac_offset)) stack_store_float(stack, fac_offset, f); - if(stack_valid(color_offset)) stack_store_float3(stack, color_offset, make_float3(f, f, f)); + if (stack_valid(fac_offset)) + stack_store_float(stack, fac_offset, f); + if (stack_valid(color_offset)) + stack_store_float3(stack, color_offset, make_float3(f, f, f)); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h index e935fd20690..d6144802559 100644 --- a/intern/cycles/kernel/svm/svm_wavelength.h +++ b/intern/cycles/kernel/svm/svm_wavelength.h @@ -10,13 +10,13 @@ * modification, are permitted provided that the following conditions are * met: * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * * Neither the name of Sony Pictures Imageworks nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -35,64 +35,64 @@ CCL_NAMESPACE_BEGIN /* Wavelength to RGB */ // CIE colour matching functions xBar, yBar, and zBar for -// wavelengths from 380 through 780 nanometers, every 5 -// nanometers. For a wavelength lambda in this range: -// cie_colour_match[(lambda - 380) / 5][0] = xBar -// cie_colour_match[(lambda - 380) / 5][1] = yBar -// cie_colour_match[(lambda - 380) / 5][2] = zBar +// wavelengths from 380 through 780 nanometers, every 5 +// nanometers. For a wavelength lambda in this range: +// cie_colour_match[(lambda - 380) / 5][0] = xBar +// cie_colour_match[(lambda - 380) / 5][1] = yBar +// cie_colour_match[(lambda - 380) / 5][2] = zBar ccl_static_constant float cie_colour_match[81][3] = { - {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f}, - {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f}, - {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f}, - {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f}, - {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f}, - {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f}, - {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f}, - {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f}, - {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f}, - {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f}, - {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f}, - {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f}, - {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f}, - {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f}, - {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f}, - {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f}, - {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f}, - {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f}, - {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f}, - {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f}, - {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f}, - {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f}, - {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f}, - {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f}, - {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f}, - {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f}, - {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f} -}; + {0.0014f, 0.0000f, 0.0065f}, {0.0022f, 0.0001f, 0.0105f}, {0.0042f, 0.0001f, 0.0201f}, + {0.0076f, 0.0002f, 0.0362f}, {0.0143f, 0.0004f, 0.0679f}, {0.0232f, 0.0006f, 0.1102f}, + {0.0435f, 0.0012f, 0.2074f}, {0.0776f, 0.0022f, 0.3713f}, {0.1344f, 0.0040f, 0.6456f}, + {0.2148f, 0.0073f, 1.0391f}, {0.2839f, 0.0116f, 1.3856f}, {0.3285f, 0.0168f, 1.6230f}, + {0.3483f, 0.0230f, 1.7471f}, {0.3481f, 0.0298f, 1.7826f}, {0.3362f, 0.0380f, 1.7721f}, + {0.3187f, 0.0480f, 1.7441f}, {0.2908f, 0.0600f, 1.6692f}, {0.2511f, 0.0739f, 1.5281f}, + {0.1954f, 0.0910f, 1.2876f}, {0.1421f, 0.1126f, 1.0419f}, {0.0956f, 0.1390f, 0.8130f}, + {0.0580f, 0.1693f, 0.6162f}, {0.0320f, 0.2080f, 0.4652f}, {0.0147f, 0.2586f, 0.3533f}, + {0.0049f, 0.3230f, 0.2720f}, {0.0024f, 0.4073f, 0.2123f}, {0.0093f, 0.5030f, 0.1582f}, + {0.0291f, 0.6082f, 0.1117f}, {0.0633f, 0.7100f, 0.0782f}, {0.1096f, 0.7932f, 0.0573f}, + {0.1655f, 0.8620f, 0.0422f}, {0.2257f, 0.9149f, 0.0298f}, {0.2904f, 0.9540f, 0.0203f}, + {0.3597f, 0.9803f, 0.0134f}, {0.4334f, 0.9950f, 0.0087f}, {0.5121f, 1.0000f, 0.0057f}, + {0.5945f, 0.9950f, 0.0039f}, {0.6784f, 0.9786f, 0.0027f}, {0.7621f, 0.9520f, 0.0021f}, + {0.8425f, 0.9154f, 0.0018f}, {0.9163f, 0.8700f, 0.0017f}, {0.9786f, 0.8163f, 0.0014f}, + {1.0263f, 0.7570f, 0.0011f}, {1.0567f, 0.6949f, 0.0010f}, {1.0622f, 0.6310f, 0.0008f}, + {1.0456f, 0.5668f, 0.0006f}, {1.0026f, 0.5030f, 0.0003f}, {0.9384f, 0.4412f, 0.0002f}, + {0.8544f, 0.3810f, 0.0002f}, {0.7514f, 0.3210f, 0.0001f}, {0.6424f, 0.2650f, 0.0000f}, + {0.5419f, 0.2170f, 0.0000f}, {0.4479f, 0.1750f, 0.0000f}, {0.3608f, 0.1382f, 0.0000f}, + {0.2835f, 0.1070f, 0.0000f}, {0.2187f, 0.0816f, 0.0000f}, {0.1649f, 0.0610f, 0.0000f}, + {0.1212f, 0.0446f, 0.0000f}, {0.0874f, 0.0320f, 0.0000f}, {0.0636f, 0.0232f, 0.0000f}, + {0.0468f, 0.0170f, 0.0000f}, {0.0329f, 0.0119f, 0.0000f}, {0.0227f, 0.0082f, 0.0000f}, + {0.0158f, 0.0057f, 0.0000f}, {0.0114f, 0.0041f, 0.0000f}, {0.0081f, 0.0029f, 0.0000f}, + {0.0058f, 0.0021f, 0.0000f}, {0.0041f, 0.0015f, 0.0000f}, {0.0029f, 0.0010f, 0.0000f}, + {0.0020f, 0.0007f, 0.0000f}, {0.0014f, 0.0005f, 0.0000f}, {0.0010f, 0.0004f, 0.0000f}, + {0.0007f, 0.0002f, 0.0000f}, {0.0005f, 0.0002f, 0.0000f}, {0.0003f, 0.0001f, 0.0000f}, + {0.0002f, 0.0001f, 0.0000f}, {0.0002f, 0.0001f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, + {0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}}; -ccl_device void svm_node_wavelength(KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out) +ccl_device void svm_node_wavelength( + KernelGlobals *kg, ShaderData *sd, float *stack, uint wavelength, uint color_out) { - float lambda_nm = stack_load_float(stack, wavelength); - float ii = (lambda_nm-380.0f) * (1.0f/5.0f); // scaled 0..80 - int i = float_to_int(ii); - float3 color; + float lambda_nm = stack_load_float(stack, wavelength); + float ii = (lambda_nm - 380.0f) * (1.0f / 5.0f); // scaled 0..80 + int i = float_to_int(ii); + float3 color; - if(i < 0 || i >= 80) { - color = make_float3(0.0f, 0.0f, 0.0f); - } - else { - ii -= i; - ccl_constant float *c = cie_colour_match[i]; - color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii); - } + if (i < 0 || i >= 80) { + color = make_float3(0.0f, 0.0f, 0.0f); + } + else { + ii -= i; + ccl_constant float *c = cie_colour_match[i]; + color = interp(make_float3(c[0], c[1], c[2]), make_float3(c[3], c[4], c[5]), ii); + } - color = xyz_to_rgb(kg, color); - color *= 1.0f/2.52f; // Empirical scale from lg to make all comps <= 1 + color = xyz_to_rgb(kg, color); + color *= 1.0f / 2.52f; // Empirical scale from lg to make all comps <= 1 - /* Clamp to zero if values are smaller */ - color = max(color, make_float3(0.0f, 0.0f, 0.0f)); + /* Clamp to zero if values are smaller */ + color = max(color, make_float3(0.0f, 0.0f, 0.0f)); - stack_store_float3(stack, color_out, color); + stack_store_float3(stack, color_out, color); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 35df9e8a0e7..55e61d0e8c7 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -34,103 +34,97 @@ CCL_NAMESPACE_BEGIN /* Wireframe Node */ -ccl_device_inline float wireframe(KernelGlobals *kg, - ShaderData *sd, - float size, - int pixel_size, - float3 *P) +ccl_device_inline float wireframe( + KernelGlobals *kg, ShaderData *sd, float size, int pixel_size, float3 *P) { #ifdef __HAIR__ - if(sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE) + if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE) #else - if(sd->prim != PRIM_NONE) + if (sd->prim != PRIM_NONE) #endif - { - float3 Co[3]; - float pixelwidth = 1.0f; + { + float3 Co[3]; + float pixelwidth = 1.0f; - /* Triangles */ - int np = 3; + /* Triangles */ + int np = 3; - if(sd->type & PRIMITIVE_TRIANGLE) - triangle_vertices(kg, sd->prim, Co); - else - motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co); + if (sd->type & PRIMITIVE_TRIANGLE) + triangle_vertices(kg, sd->prim, Co); + else + motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co); - if(!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { - object_position_transform(kg, sd, &Co[0]); - object_position_transform(kg, sd, &Co[1]); - object_position_transform(kg, sd, &Co[2]); - } + if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { + object_position_transform(kg, sd, &Co[0]); + object_position_transform(kg, sd, &Co[1]); + object_position_transform(kg, sd, &Co[2]); + } - if(pixel_size) { - // Project the derivatives of P to the viewing plane defined - // by I so we have a measure of how big is a pixel at this point - float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I); - float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I); - // Take the average of both axis' length - pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f; - } + if (pixel_size) { + // Project the derivatives of P to the viewing plane defined + // by I so we have a measure of how big is a pixel at this point + float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I); + float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I); + // Take the average of both axis' length + pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f; + } - // Use half the width as the neighbor face will render the - // other half. And take the square for fast comparison - pixelwidth *= 0.5f * size; - pixelwidth *= pixelwidth; - for(int i = 0; i < np; i++) { - int i2 = i ? i - 1 : np - 1; - float3 dir = *P - Co[i]; - float3 edge = Co[i] - Co[i2]; - float3 crs = cross(edge, dir); - // At this point dot(crs, crs) / dot(edge, edge) is - // the square of area / length(edge) == square of the - // distance to the edge. - if(dot(crs, crs) < (dot(edge, edge) * pixelwidth)) - return 1.0f; - } - } - return 0.0f; + // Use half the width as the neighbor face will render the + // other half. And take the square for fast comparison + pixelwidth *= 0.5f * size; + pixelwidth *= pixelwidth; + for (int i = 0; i < np; i++) { + int i2 = i ? i - 1 : np - 1; + float3 dir = *P - Co[i]; + float3 edge = Co[i] - Co[i2]; + float3 crs = cross(edge, dir); + // At this point dot(crs, crs) / dot(edge, edge) is + // the square of area / length(edge) == square of the + // distance to the edge. + if (dot(crs, crs) < (dot(edge, edge) * pixelwidth)) + return 1.0f; + } + } + return 0.0f; } -ccl_device void svm_node_wireframe(KernelGlobals *kg, - ShaderData *sd, - float *stack, - uint4 node) +ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - uint in_size = node.y; - uint out_fac = node.z; - uint use_pixel_size, bump_offset; - decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL); + uint in_size = node.y; + uint out_fac = node.z; + uint use_pixel_size, bump_offset; + decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL); - /* Input Data */ - float size = stack_load_float(stack, in_size); - int pixel_size = (int)use_pixel_size; + /* Input Data */ + float size = stack_load_float(stack, in_size); + int pixel_size = (int)use_pixel_size; - /* Calculate wireframe */ + /* Calculate wireframe */ #ifdef __SPLIT_KERNEL__ - /* TODO(sergey): This is because sd is actually a global space, - * which makes it difficult to re-use same wireframe() function. - * - * With OpenCL 2.0 it's possible to avoid this change, but for until - * then we'll be living with such an exception. - */ - float3 P = sd->P; - float f = wireframe(kg, sd, size, pixel_size, &P); + /* TODO(sergey): This is because sd is actually a global space, + * which makes it difficult to re-use same wireframe() function. + * + * With OpenCL 2.0 it's possible to avoid this change, but for until + * then we'll be living with such an exception. + */ + float3 P = sd->P; + float f = wireframe(kg, sd, size, pixel_size, &P); #else - float f = wireframe(kg, sd, size, pixel_size, &sd->P); + float f = wireframe(kg, sd, size, pixel_size, &sd->P); #endif - /* TODO(sergey): Think of faster way to calculate derivatives. */ - if(bump_offset == NODE_BUMP_OFFSET_DX) { - float3 Px = sd->P - sd->dP.dx; - f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx); - } - else if(bump_offset == NODE_BUMP_OFFSET_DY) { - float3 Py = sd->P - sd->dP.dy; - f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy); - } + /* TODO(sergey): Think of faster way to calculate derivatives. */ + if (bump_offset == NODE_BUMP_OFFSET_DX) { + float3 Px = sd->P - sd->dP.dx; + f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx); + } + else if (bump_offset == NODE_BUMP_OFFSET_DY) { + float3 Py = sd->P - sd->dP.dy; + f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy); + } - if(stack_valid(out_fac)) - stack_store_float(stack, out_fac, f); + if (stack_valid(out_fac)) + stack_store_float(stack, out_fac, f); } CCL_NAMESPACE_END |