Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt220
-rw-r--r--intern/cycles/kernel/bake/bake.h23
-rw-r--r--intern/cycles/kernel/bvh/bvh.h817
-rw-r--r--intern/cycles/kernel/bvh/embree.h176
-rw-r--r--intern/cycles/kernel/bvh/local.h8
-rw-r--r--intern/cycles/kernel/bvh/metal.h37
-rw-r--r--intern/cycles/kernel/bvh/shadow_all.h85
-rw-r--r--intern/cycles/kernel/bvh/traversal.h21
-rw-r--r--intern/cycles/kernel/bvh/util.h49
-rw-r--r--intern/cycles/kernel/bvh/volume.h22
-rw-r--r--intern/cycles/kernel/bvh/volume_all.h62
-rw-r--r--intern/cycles/kernel/camera/camera.h31
-rw-r--r--intern/cycles/kernel/camera/projection.h26
-rw-r--r--intern/cycles/kernel/closure/alloc.h37
-rw-r--r--intern/cycles/kernel/closure/bsdf.h805
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h258
-rw-r--r--intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h130
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse.h68
-rw-r--r--intern/cycles/kernel/closure/bsdf_diffuse_ramp.h41
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair.h88
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair_principled.h299
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h204
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_beckmann.h240
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_glass.h152
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet_util.h12
-rw-r--r--intern/cycles/kernel/closure/bsdf_oren_nayar.h43
-rw-r--r--intern/cycles/kernel/closure/bsdf_phong_ramp.h43
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_diffuse.h46
-rw-r--r--intern/cycles/kernel/closure/bsdf_principled_sheen.h55
-rw-r--r--intern/cycles/kernel/closure/bsdf_reflection.h37
-rw-r--r--intern/cycles/kernel/closure/bsdf_refraction.h56
-rw-r--r--intern/cycles/kernel/closure/bsdf_toon.h100
-rw-r--r--intern/cycles/kernel/closure/bsdf_transparent.h33
-rw-r--r--intern/cycles/kernel/closure/bsdf_util.h56
-rw-r--r--intern/cycles/kernel/closure/bssrdf.h123
-rw-r--r--intern/cycles/kernel/closure/emissive.h8
-rw-r--r--intern/cycles/kernel/closure/volume.h82
-rw-r--r--intern/cycles/kernel/data_arrays.h2
-rw-r--r--intern/cycles/kernel/data_template.h17
-rw-r--r--intern/cycles/kernel/device/cpu/bvh.h582
-rw-r--r--intern/cycles/kernel/device/cpu/compat.h36
-rw-r--r--intern/cycles/kernel/device/cpu/globals.h19
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch_impl.h10
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h8
-rw-r--r--intern/cycles/kernel/device/gpu/parallel_active_index.h33
-rw-r--r--intern/cycles/kernel/device/metal/bvh.h360
-rw-r--r--intern/cycles/kernel/device/metal/compat.h90
-rw-r--r--intern/cycles/kernel/device/metal/context_begin.h37
-rw-r--r--intern/cycles/kernel/device/metal/kernel.metal506
-rw-r--r--intern/cycles/kernel/device/oneapi/compat.h82
-rw-r--r--intern/cycles/kernel/device/oneapi/dll_interface_template.h53
-rw-r--r--intern/cycles/kernel/device/oneapi/globals.h9
-rw-r--r--intern/cycles/kernel/device/oneapi/image.h26
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.cpp566
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel.h21
-rw-r--r--intern/cycles/kernel/device/oneapi/kernel_templates.h2
-rw-r--r--intern/cycles/kernel/device/optix/bvh.h659
-rw-r--r--intern/cycles/kernel/device/optix/compat.h1
-rw-r--r--intern/cycles/kernel/device/optix/kernel.cu436
-rw-r--r--intern/cycles/kernel/film/adaptive_sampling.h52
-rw-r--r--intern/cycles/kernel/film/aov_passes.h33
-rw-r--r--intern/cycles/kernel/film/cryptomatte_passes.h (renamed from intern/cycles/kernel/film/id_passes.h)28
-rw-r--r--intern/cycles/kernel/film/data_passes.h160
-rw-r--r--intern/cycles/kernel/film/denoising_passes.h146
-rw-r--r--intern/cycles/kernel/film/light_passes.h (renamed from intern/cycles/kernel/film/accumulate.h)340
-rw-r--r--intern/cycles/kernel/film/read.h19
-rw-r--r--intern/cycles/kernel/film/write.h (renamed from intern/cycles/kernel/film/write_passes.h)27
-rw-r--r--intern/cycles/kernel/geom/attribute.h32
-rw-r--r--intern/cycles/kernel/geom/curve_intersect.h77
-rw-r--r--intern/cycles/kernel/geom/motion_triangle_intersect.h4
-rw-r--r--intern/cycles/kernel/geom/motion_triangle_shader.h6
-rw-r--r--intern/cycles/kernel/geom/object.h113
-rw-r--r--intern/cycles/kernel/geom/point_intersect.h30
-rw-r--r--intern/cycles/kernel/geom/primitive.h10
-rw-r--r--intern/cycles/kernel/geom/shader_data.h26
-rw-r--r--intern/cycles/kernel/geom/subd_triangle.h80
-rw-r--r--intern/cycles/kernel/geom/triangle.h36
-rw-r--r--intern/cycles/kernel/geom/triangle_intersect.h4
-rw-r--r--intern/cycles/kernel/geom/volume.h2
-rw-r--r--intern/cycles/kernel/integrator/displacement_shader.h40
-rw-r--r--intern/cycles/kernel/integrator/guiding.h547
-rw-r--r--intern/cycles/kernel/integrator/init_from_bake.h80
-rw-r--r--intern/cycles/kernel/integrator/init_from_camera.h34
-rw-r--r--intern/cycles/kernel/integrator/intersect_closest.h17
-rw-r--r--intern/cycles/kernel/integrator/intersect_shadow.h2
-rw-r--r--intern/cycles/kernel/integrator/intersect_volume_stack.h7
-rw-r--r--intern/cycles/kernel/integrator/mnee.h52
-rw-r--r--intern/cycles/kernel/integrator/path_state.h79
-rw-r--r--intern/cycles/kernel/integrator/shade_background.h127
-rw-r--r--intern/cycles/kernel/integrator/shade_light.h16
-rw-r--r--intern/cycles/kernel/integrator/shade_shadow.h24
-rw-r--r--intern/cycles/kernel/integrator/shade_surface.h262
-rw-r--r--intern/cycles/kernel/integrator/shade_volume.h303
-rw-r--r--intern/cycles/kernel/integrator/shader_eval.h952
-rw-r--r--intern/cycles/kernel/integrator/shadow_catcher.h23
-rw-r--r--intern/cycles/kernel/integrator/shadow_state_template.h18
-rw-r--r--intern/cycles/kernel/integrator/state.h10
-rw-r--r--intern/cycles/kernel/integrator/state_flow.h6
-rw-r--r--intern/cycles/kernel/integrator/state_template.h45
-rw-r--r--intern/cycles/kernel/integrator/state_util.h2
-rw-r--r--intern/cycles/kernel/integrator/subsurface.h22
-rw-r--r--intern/cycles/kernel/integrator/subsurface_disk.h54
-rw-r--r--intern/cycles/kernel/integrator/subsurface_random_walk.h142
-rw-r--r--intern/cycles/kernel/integrator/surface_shader.h860
-rw-r--r--intern/cycles/kernel/integrator/volume_shader.h519
-rw-r--r--intern/cycles/kernel/integrator/volume_stack.h4
-rw-r--r--intern/cycles/kernel/light/background.h4
-rw-r--r--intern/cycles/kernel/light/light.h10
-rw-r--r--intern/cycles/kernel/light/sample.h34
-rw-r--r--intern/cycles/kernel/osl/CMakeLists.txt13
-rw-r--r--intern/cycles/kernel/osl/background.cpp73
-rw-r--r--intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp63
-rw-r--r--intern/cycles/kernel/osl/bsdf_phong_ramp.cpp64
-rw-r--r--intern/cycles/kernel/osl/bssrdf.cpp100
-rw-r--r--intern/cycles/kernel/osl/closures.cpp1156
-rw-r--r--intern/cycles/kernel/osl/closures.h141
-rw-r--r--intern/cycles/kernel/osl/closures_setup.h1111
-rw-r--r--intern/cycles/kernel/osl/closures_template.h258
-rw-r--r--intern/cycles/kernel/osl/emissive.cpp50
-rw-r--r--intern/cycles/kernel/osl/globals.cpp59
-rw-r--r--intern/cycles/kernel/osl/globals.h12
-rw-r--r--intern/cycles/kernel/osl/osl.h (renamed from intern/cycles/kernel/osl/shader.h)35
-rw-r--r--intern/cycles/kernel/osl/services.cpp214
-rw-r--r--intern/cycles/kernel/osl/services.h3
-rw-r--r--intern/cycles/kernel/osl/shader.cpp417
-rw-r--r--intern/cycles/kernel/osl/shaders/CMakeLists.txt5
-rw-r--r--intern/cycles/kernel/osl/shaders/node_color_blend.h264
-rw-r--r--intern/cycles/kernel/osl/shaders/node_geometry.osl2
-rw-r--r--intern/cycles/kernel/osl/shaders/node_mix_color.osl57
-rw-r--r--intern/cycles/kernel/osl/shaders/node_mix_float.osl11
-rw-r--r--intern/cycles/kernel/osl/shaders/node_mix_vector.osl14
-rw-r--r--intern/cycles/kernel/osl/shaders/node_mix_vector_non_uniform.osl14
-rw-r--r--intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl68
-rw-r--r--intern/cycles/kernel/osl/types.h20
-rw-r--r--intern/cycles/kernel/sample/jitter.h231
-rw-r--r--intern/cycles/kernel/sample/pattern.h122
-rw-r--r--intern/cycles/kernel/sample/sobol_burley.h133
-rw-r--r--intern/cycles/kernel/sample/util.h35
-rw-r--r--intern/cycles/kernel/svm/ao.h6
-rw-r--r--intern/cycles/kernel/svm/aov.h16
-rw-r--r--intern/cycles/kernel/svm/attribute.h14
-rw-r--r--intern/cycles/kernel/svm/bevel.h16
-rw-r--r--intern/cycles/kernel/svm/bump.h17
-rw-r--r--intern/cycles/kernel/svm/closure.h106
-rw-r--r--intern/cycles/kernel/svm/closure_principled.h99
-rw-r--r--intern/cycles/kernel/svm/color_util.h12
-rw-r--r--intern/cycles/kernel/svm/displace.h13
-rw-r--r--intern/cycles/kernel/svm/geometry.h10
-rw-r--r--intern/cycles/kernel/svm/mix.h86
-rw-r--r--intern/cycles/kernel/svm/musgrave.h68
-rw-r--r--intern/cycles/kernel/svm/node_types_template.h4
-rw-r--r--intern/cycles/kernel/svm/svm.h12
-rw-r--r--intern/cycles/kernel/svm/tex_coord.h20
-rw-r--r--intern/cycles/kernel/svm/types.h4
-rw-r--r--intern/cycles/kernel/svm/wireframe.h16
-rw-r--r--intern/cycles/kernel/tables.h53
-rw-r--r--intern/cycles/kernel/types.h245
-rw-r--r--intern/cycles/kernel/util/color.h15
-rw-r--r--intern/cycles/kernel/util/differential.h62
-rw-r--r--intern/cycles/kernel/util/profiling.h6
160 files changed, 10354 insertions, 8959 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index fab2497e34f..e7a94f00ffb 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -42,6 +42,7 @@ set(SRC_KERNEL_DEVICE_ONEAPI
)
set(SRC_KERNEL_DEVICE_CPU_HEADERS
+ device/cpu/bvh.h
device/cpu/compat.h
device/cpu/image.h
device/cpu/globals.h
@@ -71,11 +72,13 @@ set(SRC_KERNEL_DEVICE_HIP_HEADERS
)
set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
+ device/optix/bvh.h
device/optix/compat.h
device/optix/globals.h
)
set(SRC_KERNEL_DEVICE_METAL_HEADERS
+ device/metal/bvh.h
device/metal/compat.h
device/metal/context_begin.h
device/metal/context_end.h
@@ -216,8 +219,6 @@ set(SRC_KERNEL_BVH_HEADERS
bvh/util.h
bvh/volume.h
bvh/volume_all.h
- bvh/embree.h
- bvh/metal.h
)
set(SRC_KERNEL_CAMERA_HEADERS
@@ -226,27 +227,30 @@ set(SRC_KERNEL_CAMERA_HEADERS
)
set(SRC_KERNEL_FILM_HEADERS
- film/accumulate.h
film/adaptive_sampling.h
- film/id_passes.h
- film/passes.h
+ film/aov_passes.h
+ film/data_passes.h
+ film/denoising_passes.h
+ film/cryptomatte_passes.h
+ film/light_passes.h
film/read.h
- film/write_passes.h
+ film/write.h
)
set(SRC_KERNEL_INTEGRATOR_HEADERS
+ integrator/displacement_shader.h
integrator/init_from_bake.h
integrator/init_from_camera.h
integrator/intersect_closest.h
integrator/intersect_shadow.h
integrator/intersect_subsurface.h
integrator/intersect_volume_stack.h
+ integrator/guiding.h
integrator/megakernel.h
integrator/mnee.h
integrator/path_state.h
integrator/shade_background.h
integrator/shade_light.h
- integrator/shader_eval.h
integrator/shade_shadow.h
integrator/shade_surface.h
integrator/shade_volume.h
@@ -259,6 +263,8 @@ set(SRC_KERNEL_INTEGRATOR_HEADERS
integrator/subsurface_disk.h
integrator/subsurface.h
integrator/subsurface_random_walk.h
+ integrator/surface_shader.h
+ integrator/volume_shader.h
integrator/volume_stack.h
)
@@ -275,6 +281,8 @@ set(SRC_KERNEL_SAMPLE_HEADERS
sample/mapping.h
sample/mis.h
sample/pattern.h
+ sample/sobol_burley.h
+ sample/util.h
)
set(SRC_KERNEL_UTIL_HEADERS
@@ -318,6 +326,7 @@ set(SRC_UTIL_HEADERS
../util/math_float2.h
../util/math_float3.h
../util/math_float4.h
+ ../util/math_float8.h
../util/math_int2.h
../util/math_int3.h
../util/math_int4.h
@@ -326,6 +335,7 @@ set(SRC_UTIL_HEADERS
../util/rect.h
../util/static_assert.h
../util/transform.h
+ ../util/transform_inverse.h
../util/texture.h
../util/types.h
../util/types_float2.h
@@ -342,6 +352,7 @@ set(SRC_UTIL_HEADERS
../util/types_int3_impl.h
../util/types_int4.h
../util/types_int4_impl.h
+ ../util/types_spectrum.h
../util/types_uchar2.h
../util/types_uchar2_impl.h
../util/types_uchar3.h
@@ -355,8 +366,6 @@ set(SRC_UTIL_HEADERS
../util/types_uint4.h
../util/types_uint4_impl.h
../util/types_ushort4.h
- ../util/types_vector3.h
- ../util/types_vector3_impl.h
)
set(LIB
@@ -523,7 +532,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
endif()
if(DEFINED cuda_nvcc_executable AND DEFINED cuda_toolkit_root_dir)
# Compile regular kernel
- CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
+ cycles_cuda_kernel_add(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
if(WITH_CYCLES_CUDA_BUILD_SERIAL)
set(prev_arch ${arch})
@@ -538,8 +547,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
cycles_set_solution_folder(cycles_kernel_cuda)
endif()
-####################################################### START
-
# HIP module
# TODO: Re-enable HIP and figure out compiler crash
@@ -608,14 +615,13 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP AND FALSE)
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
# Compile regular kernel
- CYCLES_HIP_KERNEL_ADD(${arch} kernel "" "${hip_sources}" FALSE)
+ cycles_hip_kernel_add(${arch} kernel "" "${hip_sources}" FALSE)
endforeach()
add_custom_target(cycles_kernel_hip ALL DEPENDS ${hip_fatbins})
cycles_set_solution_folder(cycles_kernel_hip)
endif()
-####################################################### END
# OptiX PTX modules
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
@@ -694,11 +700,11 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
endmacro()
- CYCLES_OPTIX_KERNEL_ADD(
+ cycles_optix_kernel_add(
kernel_optix
"device/optix/kernel.cu"
"")
- CYCLES_OPTIX_KERNEL_ADD(
+ cycles_optix_kernel_add(
kernel_optix_shader_raytrace
"device/optix/kernel_shader_raytrace.cu"
"--keep-device-functions")
@@ -707,11 +713,20 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
cycles_set_solution_folder(cycles_kernel_optix)
endif()
+# oneAPI module
+
if(WITH_CYCLES_DEVICE_ONEAPI)
+ if(WITH_CYCLES_ONEAPI_BINARIES)
+ set(cycles_kernel_oneapi_lib_suffix "_aot")
+ else()
+ set(cycles_kernel_oneapi_lib_suffix "_jit")
+ endif()
+
if(WIN32)
- set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.dll)
+ set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.dll)
+ set(cycles_kernel_oneapi_linker_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.lib)
else()
- set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi.so)
+ set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/libcycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.so)
endif()
set(cycles_oneapi_kernel_sources
@@ -722,14 +737,20 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
${SRC_UTIL_HEADERS}
)
+ set (SYCL_OFFLINE_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for device binaries compilation (expect ~8GB peak memory usage per instance).")
+ if (WITH_CYCLES_ONEAPI_BINARIES)
+ message(STATUS "${SYCL_OFFLINE_COMPILER_PARALLEL_JOBS} instance(s) of oneAPI offline compiler will be used.")
+ endif()
# SYCL_CPP_FLAGS is a variable that the user can set to pass extra compiler options
set(sycl_compiler_flags
${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI}
-fsycl
-fsycl-unnamed-lambda
-fdelayed-template-parsing
- -mllvm -inlinedefault-threshold=300
- -mllvm -inlinehint-threshold=400
+ -mllvm -inlinedefault-threshold=250
+ -mllvm -inlinehint-threshold=350
+ -fsycl-device-code-split=per_kernel
+ -fsycl-max-parallel-link-jobs=${SYCL_OFFLINE_COMPILER_PARALLEL_JOBS}
-shared
-DWITH_ONEAPI
-ffast-math
@@ -740,11 +761,6 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
${SYCL_CPP_FLAGS}
)
-
- if (WITH_CYCLES_ONEAPI_SYCL_HOST_ENABLED)
- list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_ENABLED)
- endif()
-
# Set defaults for spir64 and spir64_gen options
if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
@@ -752,13 +768,13 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
if (NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
SET (CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
endif()
- # enabling zebin (graphics binary format with improved compatibility) on Windows only while support on Linux isn't available yet
- if(WIN32)
- string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
- endif()
+ # Enable zebin, a graphics binary format with improved compatibility.
+ string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")
if (WITH_CYCLES_ONEAPI_BINARIES)
+ # AoT binaries aren't currently reused when calling sycl::build.
+ list (APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
# Iterate over all targest and their options
list (JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
list (APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
@@ -787,18 +803,10 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
get_filename_component(sycl_compiler_root ${SYCL_COMPILER} DIRECTORY)
get_filename_component(sycl_compiler_compiler_name ${SYCL_COMPILER} NAME_WE)
- if(NOT OCLOC_INSTALL_DIR)
- get_filename_component(OCLOC_INSTALL_DIR "${sycl_compiler_root}/../lib/ocloc" ABSOLUTE)
- endif()
- if(WITH_CYCLES_ONEAPI_BINARIES AND NOT EXISTS ${OCLOC_INSTALL_DIR})
- message(FATAL_ERROR "WITH_CYCLES_ONEAPI_BINARIES requires ocloc but ${OCLOC_INSTALL_DIR} directory doesn't exist."
- " A different ocloc directory can be set using OCLOC_INSTALL_DIR cmake variable.")
- endif()
-
if(UNIX AND NOT APPLE)
if(NOT WITH_CXX11_ABI)
check_library_exists(sycl
- _ZN2cl4sycl7handler22verifyUsedKernelBundleERKSs ${sycl_compiler_root}/../lib SYCL_NO_CXX11_ABI)
+ _ZN4sycl3_V17handler22verifyUsedKernelBundleERKSs ${sycl_compiler_root}/../lib SYCL_NO_CXX11_ABI)
if(SYCL_NO_CXX11_ABI)
list(APPEND sycl_compiler_flags -D_GLIBCXX_USE_CXX11_ABI=0)
endif()
@@ -807,6 +815,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
if(WIN32)
list(APPEND sycl_compiler_flags
+ -fuse-ld=link
-fms-extensions
-fms-compatibility
-D_WINDLL
@@ -817,36 +826,46 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
-D_CRT_SECURE_NO_DEPRECATE
-DONEAPI_EXPORT)
- if(sycl_compiler_compiler_name MATCHES "dpcpp")
- # The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
- add_custom_command(
- OUTPUT ${cycles_kernel_oneapi_lib}
- COMMAND "${sycl_compiler_root}/../../env/vars.bat"
- COMMAND ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
- DEPENDS ${cycles_oneapi_kernel_sources})
- else()
- # The open source SYCL compiler just goes by clang++ and does not have such a script.
- # Set the variables manually.
- string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR})
- if(NOT CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION) # case for Ninja on Windows
- get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY)
- string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir})
- get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE)
- else()
- set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
- endif()
- list(APPEND sycl_compiler_flags
- -L "${MSVC_TOOLS_DIR}/lib/x64"
- -L "${WINDOWS_KIT_DIR}/um/x64"
- -L "${WINDOWS_KIT_DIR}/ucrt/x64")
- add_custom_command(
- OUTPUT ${cycles_kernel_oneapi_lib}
- COMMAND ${CMAKE_COMMAND} -E env
- "LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib
- "PATH=${OCLOC_INSTALL_DIR};${sycl_compiler_root}"
- ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
- DEPENDS ${cycles_oneapi_kernel_sources})
+ string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR})
+ # Version Folder between Redist and Tools can mismatch sometimes
+ if(NOT EXISTS ${MSVC_TOOLS_DIR})
+ get_filename_component(cmake_ar_dir ${CMAKE_AR} DIRECTORY)
+ get_filename_component(MSVC_TOOLS_DIR "${cmake_ar_dir}/../../../" ABSOLUTE)
endif()
+ if(CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION)
+ set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
+ else() # case for Ninja on Windows
+ get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY)
+ string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir})
+ get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE)
+ endif()
+ list(APPEND sycl_compiler_flags
+ -L "${MSVC_TOOLS_DIR}/lib/x64"
+ -L "${WINDOWS_KIT_DIR}/um/x64"
+ -L "${WINDOWS_KIT_DIR}/ucrt/x64")
+
+ set(sycl_compiler_flags_Release ${sycl_compiler_flags})
+ set(sycl_compiler_flags_Debug ${sycl_compiler_flags})
+ set(sycl_compiler_flags_RelWithDebInfo ${sycl_compiler_flags})
+ set(sycl_compiler_flags_MinSizeRel ${sycl_compiler_flags})
+ list(APPEND sycl_compiler_flags_RelWithDebInfo -g)
+ list(APPEND sycl_compiler_flags_Debug
+ -g
+ -D_DEBUG
+ -nostdlib -Xclang --dependent-lib=msvcrtd)
+
+ add_custom_command(
+ OUTPUT ${cycles_kernel_oneapi_lib} ${cycles_kernel_oneapi_linker_lib}
+ COMMAND ${CMAKE_COMMAND} -E env
+ "LIB=${sycl_compiler_root}/../lib" # for compiler to find sycl.lib
+ "PATH=${OCLOC_INSTALL_DIR}\;${sycl_compiler_root}"
+ ${SYCL_COMPILER}
+ "$<$<CONFIG:Release>:${sycl_compiler_flags_Release}>"
+ "$<$<CONFIG:RelWithDebInfo>:${sycl_compiler_flags_RelWithDebInfo}>"
+ "$<$<CONFIG:Debug>:${sycl_compiler_flags_Debug}>"
+ "$<$<CONFIG:MinSizeRel>:${sycl_compiler_flags_Release}>"
+ COMMAND_EXPAND_LISTS
+ DEPENDS ${cycles_oneapi_kernel_sources})
else()
list(APPEND sycl_compiler_flags -fPIC)
@@ -858,55 +877,36 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
# libpi_level_zero.so can be placed next to it and get found.
list(APPEND sycl_compiler_flags -Wl,-rpath,'$$ORIGIN')
- # The oneAPI distribution calls the compiler "dpcpp" and comes with a script that sets environment variables.
- if(sycl_compiler_compiler_name MATCHES "dpcpp")
- add_custom_command(
- OUTPUT ${cycles_kernel_oneapi_lib}
- COMMAND bash -c \"source ${sycl_compiler_root}/../../env/vars.sh&&${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}\"
- DEPENDS ${cycles_oneapi_kernel_sources})
- else()
- # The open source SYCL compiler just goes by clang++ and does not have such a script.
- # Set the variables manually.
- if(NOT IGC_INSTALL_DIR)
- get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
- endif()
- add_custom_command(
- OUTPUT ${cycles_kernel_oneapi_lib}
- COMMAND ${CMAKE_COMMAND} -E env
- "LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib"
- "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld
- ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
- DEPENDS ${cycles_oneapi_kernel_sources})
+ if(NOT IGC_INSTALL_DIR)
+ get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
endif()
+ add_custom_command(
+ OUTPUT ${cycles_kernel_oneapi_lib}
+ COMMAND ${CMAKE_COMMAND} -E env
+ "LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib"
+ "PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}" # env PATH is for compiler to find ld
+ ${SYCL_COMPILER} $<$<CONFIG:Debug>:-g>$<$<CONFIG:RelWithDebInfo>:-g> ${sycl_compiler_flags}
+ DEPENDS ${cycles_oneapi_kernel_sources})
+ endif()
+
+ if(NOT WITH_BLENDER)
+ # For the Cycles standalone put libraries next to the Cycles application.
+ set(cycles_oneapi_target_path ${CYCLES_INSTALL_PATH})
+ else()
+ # For Blender put the libraries next to the Blender executable.
+ #
+ # Note that the installation path in the delayed_install is relative to the versioned folder,
+ # which means we need to go one level up.
+ set(cycles_oneapi_target_path "../")
endif()
# install dynamic libraries required at runtime
if(WIN32)
- set(SYCL_RUNTIME_DEPENDENCIES
- sycl.dll
- pi_level_zero.dll
- )
- if(NOT WITH_BLENDER)
- # For the Cycles standalone put libraries next to the Cycles application.
- delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH})
- else()
- # For Blender put the libraries next to the Blender executable.
- #
- # Note that the installation path in the delayed_install is relative to the versioned folder,
- # which means we need to go one level up.
- delayed_install("${sycl_compiler_root}" "${SYCL_RUNTIME_DEPENDENCIES}" "../")
- endif()
+ delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path})
elseif(UNIX AND NOT APPLE)
- file(GLOB SYCL_RUNTIME_DEPENDENCIES
- ${sycl_compiler_root}/../lib/libsycl.so
- ${sycl_compiler_root}/../lib/libsycl.so.[0-9]
- ${sycl_compiler_root}/../lib/libsycl.so.[0-9].[0-9].[0-9]-[0-9]
- )
- list(APPEND SYCL_RUNTIME_DEPENDENCIES ${sycl_compiler_root}/../lib/libpi_level_zero.so)
- delayed_install("" "${SYCL_RUNTIME_DEPENDENCIES}" ${CYCLES_INSTALL_PATH}/lib)
+ delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}/lib)
endif()
- delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cycles_kernel_oneapi_lib}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
endif()
@@ -956,8 +956,8 @@ endif()
# Warnings to avoid using doubles in the kernel.
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_C_COMPILER_ID MATCHES "Clang")
- ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
- ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
+ add_check_cxx_compiler_flag(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
+ add_check_cxx_compiler_flag(CMAKE_CXX_FLAGS _has_cxxflag_double_promotion "-Werror=double-promotion")
unset(_has_cxxflag_float_conversion)
unset(_has_cxxflag_double_promotion)
endif()
diff --git a/intern/cycles/kernel/bake/bake.h b/intern/cycles/kernel/bake/bake.h
index ec87990b699..384ca9168f0 100644
--- a/intern/cycles/kernel/bake/bake.h
+++ b/intern/cycles/kernel/bake/bake.h
@@ -4,10 +4,13 @@
#pragma once
#include "kernel/camera/projection.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/displacement_shader.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/geom/geom.h"
+#include "kernel/util/color.h"
+
CCL_NAMESPACE_BEGIN
ccl_device void kernel_displace_evaluate(KernelGlobals kg,
@@ -23,7 +26,7 @@ ccl_device void kernel_displace_evaluate(KernelGlobals kg,
/* Evaluate displacement shader. */
const float3 P = sd.P;
- shader_eval_displacement(kg, INTEGRATOR_STATE_NULL, &sd);
+ displacement_shader_eval(kg, INTEGRATOR_STATE_NULL, &sd);
float3 D = sd.P - P;
object_inverse_dir_transform(kg, &sd, &D);
@@ -62,10 +65,10 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
/* Evaluate shader.
* This is being evaluated for all BSDFs, so path flag does not contain a specific type. */
const uint32_t path_flag = PATH_RAY_EMISSION;
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT &
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT &
~(KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_NODE_LIGHT_PATH)>(
kg, INTEGRATOR_STATE_NULL, &sd, NULL, path_flag);
- float3 color = shader_background_eval(&sd);
+ Spectrum color = surface_shader_background(&sd);
#ifdef __KERNEL_DEBUG_NAN__
if (!isfinite_safe(color)) {
@@ -76,10 +79,12 @@ ccl_device void kernel_background_evaluate(KernelGlobals kg,
/* Ensure finite color, avoiding possible numerical instabilities in the path tracing kernels. */
color = ensure_finite(color);
+ float3 color_rgb = spectrum_to_rgb(color);
+
/* Write output. */
- output[offset * 3 + 0] += color.x;
- output[offset * 3 + 1] += color.y;
- output[offset * 3 + 2] += color.z;
+ output[offset * 3 + 0] += color_rgb.x;
+ output[offset * 3 + 1] += color_rgb.y;
+ output[offset * 3 + 2] += color_rgb.z;
}
ccl_device void kernel_curve_shadow_transparency_evaluate(
@@ -95,12 +100,12 @@ ccl_device void kernel_curve_shadow_transparency_evaluate(
shader_setup_from_curve(kg, &sd, in.object, in.prim, __float_as_int(in.v), in.u);
/* Evaluate transparency. */
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW &
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW &
~(KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_NODE_LIGHT_PATH)>(
kg, INTEGRATOR_STATE_NULL, &sd, NULL, PATH_RAY_SHADOW);
/* Write output. */
- output[offset] = clamp(average(shader_bsdf_transparency(kg, &sd)), 0.0f, 1.0f);
+ output[offset] = clamp(average(surface_shader_transparency(kg, &sd)), 0.0f, 1.0f);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 9972de86c47..29789a15b28 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -1,40 +1,47 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
-/* BVH
- *
- * Bounding volume hierarchy for ray tracing. We compile different variations
- * of the same BVH traversal function for faster rendering when some types of
- * primitives are not needed, using #includes to work around the lack of
- * C++ templates in OpenCL.
- *
- * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
- * the code has been extended and modified to support more primitives and work
- * with CPU/CUDA/OpenCL. */
-
#pragma once
-#ifdef __EMBREE__
-# include "kernel/bvh/embree.h"
-#endif
-
-#ifdef __METALRT__
-# include "kernel/bvh/metal.h"
-#endif
-
#include "kernel/bvh/types.h"
#include "kernel/bvh/util.h"
#include "kernel/integrator/state_util.h"
+/* Device specific acceleration structures for ray tracing. */
+
+#if defined(__EMBREE__)
+# include "kernel/device/cpu/bvh.h"
+# define __BVH2__
+#elif defined(__METALRT__)
+# include "kernel/device/metal/bvh.h"
+#elif defined(__KERNEL_OPTIX__)
+# include "kernel/device/optix/bvh.h"
+#else
+# define __BVH2__
+#endif
+
CCL_NAMESPACE_BEGIN
-#if !defined(__KERNEL_GPU_RAYTRACING__)
+#ifdef __BVH2__
-/* Regular BVH traversal */
+/* BVH2
+ *
+ * Bounding volume hierarchy for ray tracing, when no native acceleration
+ * structure is available for the device.
+ *
+ * We compile different variations of the same BVH traversal function for
+ * faster rendering when some types of primitives are not needed, using #includes
+ * to work around the lack of C++ templates in OpenCL.
+ *
+ * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
+ * the code has been extended and modified to support more primitives and work
+ * with CPU and various GPU kernel languages. */
# include "kernel/bvh/nodes.h"
+/* Regular BVH traversal */
+
# define BVH_FUNCTION_NAME bvh_intersect
# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
# include "kernel/bvh/traversal.h"
@@ -57,260 +64,20 @@ CCL_NAMESPACE_BEGIN
# include "kernel/bvh/traversal.h"
# endif
-/* Subsurface scattering BVH traversal */
-
-# if defined(__BVH_LOCAL__)
-# define BVH_FUNCTION_NAME bvh_intersect_local
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/local.h"
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_local_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/local.h"
-# endif
-# endif /* __BVH_LOCAL__ */
-
-/* Volume BVH traversal */
-
-# if defined(__VOLUME__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/volume.h"
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/volume.h"
-# endif
-# endif /* __VOLUME__ */
-
-/* Record all intersections - Shadow BVH traversal */
-
-# if defined(__SHADOW_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
-# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-
-# if defined(__HAIR__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
-# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-# endif
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-# endif
-
-# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
-# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD
-# include "kernel/bvh/shadow_all.h"
-# endif
-
-# endif /* __SHADOW_RECORD_ALL__ */
-
-/* Record all intersections - Volume BVH traversal. */
-
-# if defined(__VOLUME_RECORD_ALL__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all
-# define BVH_FUNCTION_FEATURES BVH_HAIR
-# include "kernel/bvh/volume_all.h"
-
-# if defined(__OBJECT_MOTION__)
-# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
-# include "kernel/bvh/volume_all.h"
-# endif
-# endif /* __VOLUME_RECORD_ALL__ */
-
-# undef BVH_FEATURE
-# undef BVH_NAME_JOIN
-# undef BVH_NAME_EVAL
-# undef BVH_FUNCTION_FULL_NAME
-
-#endif /* !defined(__KERNEL_GPU_RAYTRACING__) */
-
-ccl_device_inline bool scene_intersect_valid(ccl_private const Ray *ray)
-{
- /* NOTE: Due to some vectorization code non-finite origin point might
- * cause lots of false-positive intersections which will overflow traversal
- * stack.
- * This code is a quick way to perform early output, to avoid crashes in
- * such cases.
- * From production scenes so far it seems it's enough to test first element
- * only.
- * Scene intersection may also called with empty rays for conditional trace
- * calls that evaluate to false, so filter those out.
- */
- return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
-}
-
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
ccl_private const Ray *ray,
const uint visibility,
ccl_private Intersection *isect)
{
-#ifdef __KERNEL_OPTIX__
- uint p0 = 0;
- uint p1 = 0;
- uint p2 = 0;
- uint p3 = 0;
- uint p4 = visibility;
- uint p5 = PRIMITIVE_NONE;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- uint ray_mask = visibility & 0xFF;
- uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
- else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
- }
-
- optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
- ray->P,
- ray->D,
- ray->tmin,
- ray->tmax,
- ray->time,
- ray_mask,
- ray_flags,
- 0, /* SBT offset for PG_HITD */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- isect->t = __uint_as_float(p0);
- isect->u = __uint_as_float(p1);
- isect->v = __uint_as_float(p2);
- isect->prim = p3;
- isect->object = p4;
- isect->type = p5;
-
- return p5 != PRIMITIVE_NONE;
-#elif defined(__METALRT__)
-
- if (!scene_intersect_valid(ray)) {
- isect->t = ray->tmax;
- isect->type = PRIMITIVE_NONE;
- return false;
- }
-
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
- isect->t = ray->tmax;
- isect->type = PRIMITIVE_NONE;
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
- return false;
- }
-
- if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
- isect->t = ray->tmax;
- isect->type = PRIMITIVE_NONE;
- kernel_assert(!"Invalid ift_default");
- return false;
- }
-# endif
-
- metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
- metalrt_intersector_type metalrt_intersect;
-
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
-
- MetalRTIntersectionPayload payload;
- payload.self = ray->self;
- payload.u = 0.0f;
- payload.v = 0.0f;
- payload.visibility = visibility;
-
- typename metalrt_intersector_type::result_type intersection;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- /* No further intersector setup required: Default MetalRT behavior is any-hit. */
- }
- else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- /* No further intersector setup required: Shadow ray early termination is controlled by the
- * intersection handler */
- }
-
-# if defined(__METALRT_MOTION__)
- payload.time = ray->time;
- intersection = metalrt_intersect.intersect(r,
- metal_ancillaries->accel_struct,
- ray_mask,
- ray->time,
- metal_ancillaries->ift_default,
- payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
-# endif
-
- if (intersection.type == intersection_type::none) {
- isect->t = ray->tmax;
- isect->type = PRIMITIVE_NONE;
-
- return false;
- }
-
- isect->t = intersection.distance;
-
- isect->prim = payload.prim;
- isect->type = payload.type;
- isect->object = intersection.user_instance_id;
-
- isect->t = intersection.distance;
- if (intersection.type == intersection_type::triangle) {
- isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
- intersection.triangle_barycentric_coord.x;
- isect->v = intersection.triangle_barycentric_coord.x;
- }
- else {
- isect->u = payload.u;
- isect->v = payload.v;
- }
-
- return isect->type != PRIMITIVE_NONE;
-
-#else
-
- if (!scene_intersect_valid(ray)) {
+ if (!intersection_ray_valid(ray)) {
return false;
}
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
- isect->t = ray->tmax;
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
- IntersectContext rtc_ctx(&ctx);
- RTCRayHit ray_hit;
- ctx.ray = ray;
- kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
- rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
- if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
- ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
- kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
- return true;
- }
- return false;
+ return kernel_embree_intersect(kg, ray, visibility, isect);
}
-# endif /* __EMBREE__ */
+# endif
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
@@ -322,7 +89,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
return bvh_intersect_motion(kg, ray, isect, visibility);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
@@ -331,10 +98,22 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
# endif /* __HAIR__ */
return bvh_intersect(kg, ray, isect, visibility);
-#endif /* __KERNEL_OPTIX__ */
}
-#ifdef __BVH_LOCAL__
+/* Single object BVH traversal, for SSS/AO/bevel. */
+
+# ifdef __BVH_LOCAL__
+
+# define BVH_FUNCTION_NAME bvh_intersect_local
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/local.h"
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_local_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/local.h"
+# endif
+
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
ccl_private const Ray *ray,
ccl_private LocalIntersection *local_isect,
@@ -342,108 +121,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
ccl_private uint *lcg_state,
int max_hits)
{
-# ifdef __KERNEL_OPTIX__
- uint p0 = pointer_pack_to_uint_0(lcg_state);
- uint p1 = pointer_pack_to_uint_1(lcg_state);
- uint p2 = pointer_pack_to_uint_0(local_isect);
- uint p3 = pointer_pack_to_uint_1(local_isect);
- uint p4 = local_object;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
- uint p5 = max_hits;
-
- if (local_isect) {
- local_isect->num_hits = 0; /* Initialize hit count to zero. */
- }
- optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
- ray->P,
- ray->D,
- ray->tmin,
- ray->tmax,
- ray->time,
- 0xFF,
- /* Need to always call into __anyhit__kernel_optix_local_hit. */
- OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
- 2, /* SBT offset for PG_HITL */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- return p5;
-# elif defined(__METALRT__)
- if (!scene_intersect_valid(ray)) {
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- return false;
- }
-
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
- return false;
- }
-
- if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- kernel_assert(!"Invalid ift_local");
- return false;
- }
-# endif
-
- metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
- metalrt_intersector_type metalrt_intersect;
-
- metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
-
- MetalRTIntersectionLocalPayload payload;
- payload.self = ray->self;
- payload.local_object = local_object;
- payload.max_hits = max_hits;
- payload.local_isect.num_hits = 0;
- if (lcg_state) {
- payload.has_lcg_state = true;
- payload.lcg_state = *lcg_state;
- }
- payload.result = false;
-
- typename metalrt_intersector_type::result_type intersection;
-
-# if defined(__METALRT_MOTION__)
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
-# endif
-
- if (lcg_state) {
- *lcg_state = payload.lcg_state;
- }
- *local_isect = payload.local_isect;
-
- return payload.result;
-
-# else
-
- if (!scene_intersect_valid(ray)) {
+ if (!intersection_ray_valid(ray)) {
if (local_isect) {
local_isect->num_hits = 0;
}
@@ -452,58 +130,9 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
- const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
- SD_OBJECT_TRANSFORM_APPLIED);
- CCLIntersectContext ctx(
- kg, has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
- ctx.lcg_state = lcg_state;
- ctx.max_hits = max_hits;
- ctx.ray = ray;
- ctx.local_isect = local_isect;
- if (local_isect) {
- local_isect->num_hits = 0;
- }
- ctx.local_object_id = local_object;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
-
- /* If this object has its own BVH, use it. */
- if (has_bvh) {
- RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
- if (geom) {
- float3 P = ray->P;
- float3 dir = ray->D;
- float3 idir = ray->D;
- Transform ob_itfm;
- rtc_ray.tfar = ray->tmax *
- bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
- /* bvh_instance_motion_push() returns the inverse transform but
- * it's not needed here. */
- (void)ob_itfm;
-
- rtc_ray.org_x = P.x;
- rtc_ray.org_y = P.y;
- rtc_ray.org_z = P.z;
- rtc_ray.dir_x = dir.x;
- rtc_ray.dir_y = dir.y;
- rtc_ray.dir_z = dir.z;
- RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
- kernel_assert(scene);
- if (scene) {
- rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
- }
- }
- }
- else {
- rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
- }
-
- /* rtcOccluded1 sets tfar to -inf if a hit was found. */
- return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
- ;
+ return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
}
-# endif /* __EMBREE__ */
+# endif
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
@@ -511,122 +140,44 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
}
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
-# endif /* __KERNEL_OPTIX__ */
}
-#endif
+# endif
-#ifdef __SHADOW_RECORD_ALL__
-ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
- IntegratorShadowState state,
- ccl_private const Ray *ray,
- uint visibility,
- uint max_hits,
- ccl_private uint *num_recorded_hits,
- ccl_private float *throughput)
-{
-# ifdef __KERNEL_OPTIX__
- uint p0 = state;
- uint p1 = __float_as_uint(1.0f); /* Throughput. */
- uint p2 = 0; /* Number of hits. */
- uint p3 = max_hits;
- uint p4 = visibility;
- uint p5 = false;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
+/* Transparent shadow BVH traversal, recording multiple intersections. */
- optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
- ray->P,
- ray->D,
- ray->tmin,
- ray->tmax,
- ray->time,
- ray_mask,
- /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
- OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
- 1, /* SBT offset for PG_HITS */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- *num_recorded_hits = uint16_unpack_from_uint_0(p2);
- *throughput = __uint_as_float(p1);
-
- return p5;
-# elif defined(__METALRT__)
-
- if (!scene_intersect_valid(ray)) {
- return false;
- }
+# ifdef __SHADOW_RECORD_ALL__
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
- return false;
- }
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all
+# define BVH_FUNCTION_FEATURES BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
- if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
- kernel_assert(!"Invalid ift_shadow");
- return false;
- }
+# if defined(__HAIR__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
# endif
- metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
- metalrt_intersector_type metalrt_intersect;
-
- metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
-
- MetalRTIntersectionShadowPayload payload;
- payload.self = ray->self;
- payload.visibility = visibility;
- payload.max_hits = max_hits;
- payload.num_hits = 0;
- payload.num_recorded_hits = 0;
- payload.throughput = 1.0f;
- payload.result = false;
- payload.state = state;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
-
- typename metalrt_intersector_type::result_type intersection;
-
-# if defined(__METALRT_MOTION__)
- payload.time = ray->time;
- intersection = metalrt_intersect.intersect(r,
- metal_ancillaries->accel_struct,
- ray_mask,
- ray->time,
- metal_ancillaries->ift_shadow,
- payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
# endif
- *num_recorded_hits = payload.num_recorded_hits;
- *throughput = payload.throughput;
-
- return payload.result;
+# if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
+# define BVH_FUNCTION_FEATURES BVH_HAIR | BVH_MOTION | BVH_POINTCLOUD
+# include "kernel/bvh/shadow_all.h"
+# endif
-# else
- if (!scene_intersect_valid(ray)) {
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowState state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ if (!intersection_ray_valid(ray)) {
*num_recorded_hits = 0;
*throughput = 1.0f;
return false;
@@ -634,21 +185,10 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
# ifdef __EMBREE__
if (kernel_data.device_bvh) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
- Intersection *isect_array = (Intersection *)state->shadow_isect;
- ctx.isect_s = isect_array;
- ctx.max_hits = max_hits;
- ctx.ray = ray;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, visibility);
- rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
-
- *num_recorded_hits = ctx.num_recorded_hits;
- *throughput = ctx.throughput;
- return ctx.opaque_hit;
+ return kernel_embree_intersect_shadow_all(
+ kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
}
-# endif /* __EMBREE__ */
+# endif
# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
@@ -662,7 +202,7 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
return bvh_intersect_shadow_all_motion(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
# ifdef __HAIR__
if (kernel_data.bvh.have_curves) {
@@ -673,132 +213,29 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
return bvh_intersect_shadow_all(
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
-# endif /* __KERNEL_OPTIX__ */
}
-#endif /* __SHADOW_RECORD_ALL__ */
-
-#ifdef __VOLUME__
-ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
- ccl_private const Ray *ray,
- ccl_private Intersection *isect,
- const uint visibility)
-{
-# ifdef __KERNEL_OPTIX__
- uint p0 = 0;
- uint p1 = 0;
- uint p2 = 0;
- uint p3 = 0;
- uint p4 = visibility;
- uint p5 = PRIMITIVE_NONE;
- uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
- uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
-
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
-
- optixTrace(scene_intersect_valid(ray) ? kernel_data.device_bvh : 0,
- ray->P,
- ray->D,
- ray->tmin,
- ray->tmax,
- ray->time,
- ray_mask,
- /* Need to always call into __anyhit__kernel_optix_volume_test. */
- OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
- 3, /* SBT offset for PG_HITV */
- 0,
- 0,
- p0,
- p1,
- p2,
- p3,
- p4,
- p5,
- p6,
- p7);
-
- isect->t = __uint_as_float(p0);
- isect->u = __uint_as_float(p1);
- isect->v = __uint_as_float(p2);
- isect->prim = p3;
- isect->object = p4;
- isect->type = p5;
-
- return p5 != PRIMITIVE_NONE;
-# elif defined(__METALRT__)
-
- if (!scene_intersect_valid(ray)) {
- return false;
- }
-# if defined(__KERNEL_DEBUG__)
- if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
- kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
- return false;
- }
-
- if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
- kernel_assert(!"Invalid ift_default");
- return false;
- }
-# endif
-
- metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
- metalrt_intersector_type metalrt_intersect;
-
- metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
- if (!kernel_data.bvh.have_curves) {
- metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
- }
+# endif /* __SHADOW_RECORD_ALL__ */
- MetalRTIntersectionPayload payload;
- payload.self = ray->self;
- payload.visibility = visibility;
+/* Volume BVH traversal, for initializing or updating the volume stack. */
- typename metalrt_intersector_type::result_type intersection;
+# if defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__)
- uint ray_mask = visibility & 0xFF;
- if (0 == ray_mask && (visibility & ~0xFF) != 0) {
- ray_mask = 0xFF;
- }
+# define BVH_FUNCTION_NAME bvh_intersect_volume
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/volume.h"
-# if defined(__METALRT_MOTION__)
- payload.time = ray->time;
- intersection = metalrt_intersect.intersect(r,
- metal_ancillaries->accel_struct,
- ray_mask,
- ray->time,
- metal_ancillaries->ift_default,
- payload);
-# else
- intersection = metalrt_intersect.intersect(
- r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/volume.h"
# endif
- if (intersection.type == intersection_type::none) {
- return false;
- }
-
- isect->prim = payload.prim;
- isect->type = payload.type;
- isect->object = intersection.user_instance_id;
-
- isect->t = intersection.distance;
- if (intersection.type == intersection_type::triangle) {
- isect->u = 1.0f - intersection.triangle_barycentric_coord.y -
- intersection.triangle_barycentric_coord.x;
- isect->v = intersection.triangle_barycentric_coord.x;
- }
- else {
- isect->u = payload.u;
- isect->v = payload.v;
- }
-
- return isect->type != PRIMITIVE_NONE;
-
-# else
- if (!scene_intersect_valid(ray)) {
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint visibility)
+{
+ if (!intersection_ray_valid(ray)) {
return false;
}
@@ -809,44 +246,56 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume(kg, ray, isect, visibility);
-# endif /* __KERNEL_OPTIX__ */
}
-#endif /* __VOLUME__ */
+# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
-#ifdef __VOLUME_RECORD_ALL__
-ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals kg,
- ccl_private const Ray *ray,
- ccl_private Intersection *isect,
- const uint max_hits,
- const uint visibility)
+/* Volume BVH traversal, for initializing or updating the volume stack.
+ * Variation that records multiple intersections at once. */
+
+# if defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__)
+
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all
+# define BVH_FUNCTION_FEATURES BVH_HAIR
+# include "kernel/bvh/volume_all.h"
+
+# if defined(__OBJECT_MOTION__)
+# define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
+# define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR
+# include "kernel/bvh/volume_all.h"
+# endif
+
+ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint max_hits,
+ const uint visibility)
{
- if (!scene_intersect_valid(ray)) {
+ if (!intersection_ray_valid(ray)) {
return false;
}
-# ifdef __EMBREE__
+# ifdef __EMBREE__
if (kernel_data.device_bvh) {
- CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
- ctx.isect_s = isect;
- ctx.max_hits = max_hits;
- ctx.num_hits = 0;
- ctx.ray = ray;
- IntersectContext rtc_ctx(&ctx);
- RTCRay rtc_ray;
- kernel_embree_setup_ray(*ray, rtc_ray, visibility);
- rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
- return ctx.num_hits;
+ return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
}
-# endif /* __EMBREE__ */
+# endif
-# ifdef __OBJECT_MOTION__
+# ifdef __OBJECT_MOTION__
if (kernel_data.bvh.have_motion) {
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
}
-# endif /* __OBJECT_MOTION__ */
+# endif /* __OBJECT_MOTION__ */
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
}
-#endif /* __VOLUME_RECORD_ALL__ */
+
+# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
+
+# undef BVH_FEATURE
+# undef BVH_NAME_JOIN
+# undef BVH_NAME_EVAL
+# undef BVH_FUNCTION_FULL_NAME
+
+#endif /* __BVH2__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/embree.h b/intern/cycles/kernel/bvh/embree.h
deleted file mode 100644
index fecbccac2f8..00000000000
--- a/intern/cycles/kernel/bvh/embree.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2022 Blender Foundation. */
-
-#pragma once
-
-#include <embree3/rtcore_ray.h>
-#include <embree3/rtcore_scene.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/bvh/util.h"
-
-#include "util/vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-struct CCLIntersectContext {
- typedef enum {
- RAY_REGULAR = 0,
- RAY_SHADOW_ALL = 1,
- RAY_LOCAL = 2,
- RAY_SSS = 3,
- RAY_VOLUME_ALL = 4,
- } RayType;
-
- KernelGlobals kg;
- RayType type;
-
- /* For avoiding self intersections */
- const Ray *ray;
-
- /* for shadow rays */
- Intersection *isect_s;
- uint max_hits;
- uint num_hits;
- uint num_recorded_hits;
- float throughput;
- float max_t;
- bool opaque_hit;
-
- /* for SSS Rays: */
- LocalIntersection *local_isect;
- int local_object_id;
- uint *lcg_state;
-
- CCLIntersectContext(KernelGlobals kg_, RayType type_)
- {
- kg = kg_;
- type = type_;
- ray = NULL;
- max_hits = 1;
- num_hits = 0;
- num_recorded_hits = 0;
- throughput = 1.0f;
- max_t = FLT_MAX;
- opaque_hit = false;
- isect_s = NULL;
- local_isect = NULL;
- local_object_id = -1;
- lcg_state = NULL;
- }
-};
-
-class IntersectContext {
- public:
- IntersectContext(CCLIntersectContext *ctx)
- {
- rtcInitIntersectContext(&context);
- userRayExt = ctx;
- }
- RTCIntersectContext context;
- CCLIntersectContext *userRayExt;
-};
-
-ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
- RTCRay &rtc_ray,
- const uint visibility)
-{
- rtc_ray.org_x = ray.P.x;
- rtc_ray.org_y = ray.P.y;
- rtc_ray.org_z = ray.P.z;
- rtc_ray.dir_x = ray.D.x;
- rtc_ray.dir_y = ray.D.y;
- rtc_ray.dir_z = ray.D.z;
- rtc_ray.tnear = ray.tmin;
- rtc_ray.tfar = ray.tmax;
- rtc_ray.time = ray.time;
- rtc_ray.mask = visibility;
-}
-
-ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
- RTCRayHit &rayhit,
- const uint visibility)
-{
- kernel_embree_setup_ray(ray, rayhit.ray, visibility);
- rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
- rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
-}
-
-ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
- const RTCHit *hit,
- const Ray *ray)
-{
- bool status = false;
- if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
- const int oID = hit->instID[0] / 2;
- if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
- const int pID = hit->primID +
- (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
- status = intersection_skip_self_shadow(ray->self, oID, pID);
- }
- }
- else {
- const int oID = hit->geomID / 2;
- if ((ray->self.object == oID) || (ray->self.light_object == oID)) {
- const int pID = hit->primID + (intptr_t)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
- status = intersection_skip_self_shadow(ray->self, oID, pID);
- }
- }
-
- return status;
-}
-
-ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
- const RTCRay *ray,
- const RTCHit *hit,
- Intersection *isect)
-{
- isect->t = ray->tfar;
- if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
- isect->prim = hit->primID +
- (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
- isect->object = hit->instID[0] / 2;
- }
- else {
- isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
- isect->object = hit->geomID / 2;
- }
-
- const bool is_hair = hit->geomID & 1;
- if (is_hair) {
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
- isect->type = segment.type;
- isect->prim = segment.prim;
- isect->u = hit->u;
- isect->v = hit->v;
- }
- else {
- isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
- isect->u = 1.0f - hit->v - hit->u;
- isect->v = hit->u;
- }
-}
-
-ccl_device_inline void kernel_embree_convert_sss_hit(
- KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
-{
- isect->u = 1.0f - hit->v - hit->u;
- isect->v = hit->u;
- isect->t = ray->tfar;
- RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
- rtcGetGeometry(kernel_data.device_bvh, object * 2));
- isect->prim = hit->primID +
- (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
- isect->object = object;
- isect->type = kernel_data_fetch(objects, object).primitive_type;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/bvh/local.h b/intern/cycles/kernel/bvh/local.h
index 017a241ef4a..add61adc126 100644
--- a/intern/cycles/kernel/bvh/local.h
+++ b/intern/cycles/kernel/bvh/local.h
@@ -59,14 +59,10 @@ ccl_device_inline
const int object_flag = kernel_data_fetch(object_flag, local_object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
- const float t_world_to_instance = bvh_instance_motion_push(
- kg, local_object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
#else
- const float t_world_to_instance = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
#endif
- isect_t *= t_world_to_instance;
- tmin *= t_world_to_instance;
object = local_object;
}
diff --git a/intern/cycles/kernel/bvh/metal.h b/intern/cycles/kernel/bvh/metal.h
deleted file mode 100644
index 04289e259a7..00000000000
--- a/intern/cycles/kernel/bvh/metal.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2021-2022 Blender Foundation */
-
-struct MetalRTIntersectionPayload {
- RaySelfPrimitives self;
- uint visibility;
- float u, v;
- int prim;
- int type;
-#if defined(__METALRT_MOTION__)
- float time;
-#endif
-};
-
-struct MetalRTIntersectionLocalPayload {
- RaySelfPrimitives self;
- uint local_object;
- uint lcg_state;
- short max_hits;
- bool has_lcg_state;
- bool result;
- LocalIntersection local_isect;
-};
-
-struct MetalRTIntersectionShadowPayload {
- RaySelfPrimitives self;
- uint visibility;
-#if defined(__METALRT_MOTION__)
- float time;
-#endif
- int state;
- float throughput;
- short max_hits;
- short num_hits;
- short num_recorded_hits;
- bool result;
-};
diff --git a/intern/cycles/kernel/bvh/shadow_all.h b/intern/cycles/kernel/bvh/shadow_all.h
index db3c91569aa..b31ba479e4f 100644
--- a/intern/cycles/kernel/bvh/shadow_all.h
+++ b/intern/cycles/kernel/bvh/shadow_all.h
@@ -53,23 +53,11 @@ ccl_device_inline
int object = OBJECT_NONE;
uint num_hits = 0;
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
/* Max distance in world space. May be dynamically reduced when max number of
* recorded hits is exceeded and we no longer need to find hits beyond the max
* distance found. */
- float t_max_world = ray->tmax;
-
- /* Current maximum distance to the intersection.
- * Is calculated as a ray length, transformed to an object space when entering
- * instance node. */
- float t_max_current = ray->tmax;
-
- /* Conversion from world to local space for the current instance if any, 1.0
- * otherwise. */
- float t_world_to_instance = 1.0f;
+ const float tmax = ray->tmax;
+ float tmax_hits = tmax;
*r_num_recorded_hits = 0;
*r_throughput = 1.0f;
@@ -90,7 +78,7 @@ ccl_device_inline
#endif
idir,
tmin,
- t_max_current,
+ tmax,
node_addr,
visibility,
dist);
@@ -158,16 +146,8 @@ ccl_device_inline
switch (type & PRIMITIVE_ALL) {
case PRIMITIVE_TRIANGLE: {
- hit = triangle_intersect(kg,
- &isect,
- P,
- dir,
- tmin,
- t_max_current,
- visibility,
- prim_object,
- prim,
- prim_addr);
+ hit = triangle_intersect(
+ kg, &isect, P, dir, tmin, tmax, visibility, prim_object, prim, prim_addr);
break;
}
#if BVH_FEATURE(BVH_MOTION)
@@ -177,7 +157,7 @@ ccl_device_inline
P,
dir,
tmin,
- t_max_current,
+ tmax,
ray->time,
visibility,
prim_object,
@@ -200,16 +180,8 @@ ccl_device_inline
}
const int curve_type = kernel_data_fetch(prim_type, prim_addr);
- hit = curve_intersect(kg,
- &isect,
- P,
- dir,
- tmin,
- t_max_current,
- prim_object,
- prim,
- ray->time,
- curve_type);
+ hit = curve_intersect(
+ kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, curve_type);
break;
}
@@ -226,16 +198,8 @@ ccl_device_inline
}
const int point_type = kernel_data_fetch(prim_type, prim_addr);
- hit = point_intersect(kg,
- &isect,
- P,
- dir,
- tmin,
- t_max_current,
- prim_object,
- prim,
- ray->time,
- point_type);
+ hit = point_intersect(
+ kg, &isect, P, dir, tmin, tmax, prim_object, prim, ray->time, point_type);
break;
}
#endif /* BVH_FEATURE(BVH_POINTCLOUD) */
@@ -247,9 +211,6 @@ ccl_device_inline
/* shadow ray early termination */
if (hit) {
- /* Convert intersection distance to world space. */
- isect.t /= t_world_to_instance;
-
/* detect if this surface has a shader with transparent shadows */
/* todo: optimize so primitive visibility flag indicates if
* the primitive has a transparent shadow shader? */
@@ -268,7 +229,7 @@ ccl_device_inline
/* Always use baked shadow transparency for curves. */
if (isect.type & PRIMITIVE_CURVE) {
*r_throughput *= intersection_curve_shadow_transparency(
- kg, isect.object, isect.prim, isect.u);
+ kg, isect.object, isect.prim, isect.type, isect.u);
if (*r_throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
return true;
@@ -281,7 +242,7 @@ ccl_device_inline
if (record_intersection) {
/* Test if we need to record this transparent intersection. */
const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
- if (*r_num_recorded_hits < max_record_hits || isect.t < t_max_world) {
+ if (*r_num_recorded_hits < max_record_hits || isect.t < tmax_hits) {
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
const uint num_recorded_hits = min(*r_num_recorded_hits, max_record_hits);
@@ -303,7 +264,7 @@ ccl_device_inline
}
/* Limit the ray distance and stop counting hits beyond this. */
- t_max_world = max(isect.t, max_t);
+ tmax_hits = max(isect.t, max_t);
}
integrator_state_write_shadow_isect(state, &isect, isect_index);
@@ -321,16 +282,11 @@ ccl_device_inline
object = kernel_data_fetch(prim_object, -prim_addr - 1);
#if BVH_FEATURE(BVH_MOTION)
- t_world_to_instance = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
- /* Convert intersection to object space. */
- t_max_current *= t_world_to_instance;
- tmin *= t_world_to_instance;
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -344,18 +300,9 @@ ccl_device_inline
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
-#if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#endif
-
- /* Restore world space ray length. */
- tmin = ray->tmin;
- t_max_current = ray->tmax;
+ bvh_instance_pop(ray, &P, &dir, &idir);
object = OBJECT_NONE;
- t_world_to_instance = 1.0f;
node_addr = traversal_stack[stack_ptr];
--stack_ptr;
}
diff --git a/intern/cycles/kernel/bvh/traversal.h b/intern/cycles/kernel/bvh/traversal.h
index 0ff38bf02de..f3744aca5c0 100644
--- a/intern/cycles/kernel/bvh/traversal.h
+++ b/intern/cycles/kernel/bvh/traversal.h
@@ -43,13 +43,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
- float tmin = ray->tmin;
+ const float tmin = ray->tmin;
int object = OBJECT_NONE;
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
@@ -223,15 +219,11 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
object = kernel_data_fetch(prim_object, -prim_addr - 1);
#if BVH_FEATURE(BVH_MOTION)
- const float t_world_to_instance = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
- isect->t *= t_world_to_instance;
- tmin *= t_world_to_instance;
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -245,12 +237,7 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals kg,
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-#if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#endif
- tmin = ray->tmin;
+ bvh_instance_pop(ray, &P, &dir, &idir);
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
diff --git a/intern/cycles/kernel/bvh/util.h b/intern/cycles/kernel/bvh/util.h
index 385e904d20f..9ba787550c5 100644
--- a/intern/cycles/kernel/bvh/util.h
+++ b/intern/cycles/kernel/bvh/util.h
@@ -5,6 +5,21 @@
CCL_NAMESPACE_BEGIN
+ccl_device_inline bool intersection_ray_valid(ccl_private const Ray *ray)
+{
+ /* NOTE: Due to some vectorization code non-finite origin point might
+ * cause lots of false-positive intersections which will overflow traversal
+ * stack.
+ * This code is a quick way to perform early output, to avoid crashes in
+ * such cases.
+ * From production scenes so far it seems it's enough to test first element
+ * only.
+ * Scene intersection may also called with empty rays for conditional trace
+ * calls that evaluate to false, so filter those out.
+ */
+ return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
+}
+
/* Offset intersection distance by the smallest possible amount, to skip
* intersections at this distance. This works in cases where the ray start
* position is unchanged and only tmin is updated, since for self
@@ -18,7 +33,31 @@ ccl_device_forceinline float intersection_t_offset(const float t)
return __uint_as_float(bits);
}
-#if defined(__KERNEL_CPU__)
+/* Ray offset to avoid self intersection.
+ *
+ * This function can be used to compute a modified ray start position for rays
+ * leaving from a surface. This is from:
+ * "A Fast and Robust Method for Avoiding Self-Intersection"
+ * Ray Tracing Gems, chapter 6.
+ */
+ccl_device_inline float3 ray_offset(const float3 P, const float3 Ng)
+{
+ const float int_scale = 256.0f;
+ const int3 of_i = make_int3(
+ (int)(int_scale * Ng.x), (int)(int_scale * Ng.y), (int)(int_scale * Ng.z));
+
+ const float3 p_i = make_float3(
+ __int_as_float(__float_as_int(P.x) + ((P.x < 0) ? -of_i.x : of_i.x)),
+ __int_as_float(__float_as_int(P.y) + ((P.y < 0) ? -of_i.y : of_i.y)),
+ __int_as_float(__float_as_int(P.z) + ((P.z < 0) ? -of_i.z : of_i.z)));
+ const float origin = 1.0f / 32.0f;
+ const float float_scale = 1.0f / 65536.0f;
+ return make_float3(fabsf(P.x) < origin ? P.x + float_scale * Ng.x : p_i.x,
+ fabsf(P.y) < origin ? P.y + float_scale * Ng.y : p_i.y,
+ fabsf(P.z) < origin ? P.z + float_scale * Ng.z : p_i.z);
+}
+
+#ifndef __KERNEL_GPU__
ccl_device int intersections_compare(const void *a, const void *b)
{
const Intersection *isect_a = (const Intersection *)a;
@@ -151,10 +190,8 @@ ccl_device_inline int intersection_find_attribute(KernelGlobals kg,
/* Cut-off value to stop transparent shadow tracing when practically opaque. */
#define CURVE_SHADOW_TRANSPARENCY_CUTOFF 0.001f
-ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
- const int object,
- const int prim,
- const float u)
+ccl_device_inline float intersection_curve_shadow_transparency(
+ KernelGlobals kg, const int object, const int prim, const int type, const float u)
{
/* Find attribute. */
const int offset = intersection_find_attribute(kg, object, ATTR_STD_SHADOW_TRANSPARENCY);
@@ -165,7 +202,7 @@ ccl_device_inline float intersection_curve_shadow_transparency(KernelGlobals kg,
/* Interpolate transparency between curve keys. */
const KernelCurve kcurve = kernel_data_fetch(curves, prim);
- const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(kcurve.type);
+ const int k0 = kcurve.first_key + PRIMITIVE_UNPACK_SEGMENT(type);
const int k1 = k0 + 1;
const float f0 = kernel_data_fetch(attributes_float, offset + k0);
diff --git a/intern/cycles/kernel/bvh/volume.h b/intern/cycles/kernel/bvh/volume.h
index bd4e508ecac..664c692dd3d 100644
--- a/intern/cycles/kernel/bvh/volume.h
+++ b/intern/cycles/kernel/bvh/volume.h
@@ -46,13 +46,9 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
- float tmin = ray->tmin;
+ const float tmin = ray->tmin;
int object = OBJECT_NONE;
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
@@ -189,15 +185,11 @@ ccl_device_inline
int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
- const float t_world_to_instance = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
- isect->t *= t_world_to_instance;
- tmin *= t_world_to_instance;
-
++stack_ptr;
kernel_assert(stack_ptr < BVH_STACK_SIZE);
traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
@@ -218,13 +210,7 @@ ccl_device_inline
kernel_assert(object != OBJECT_NONE);
/* instance pop */
-#if BVH_FEATURE(BVH_MOTION)
- isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
-#else
- isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
-#endif
-
- tmin = ray->tmin;
+ bvh_instance_pop(ray, &P, &dir, &idir);
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
diff --git a/intern/cycles/kernel/bvh/volume_all.h b/intern/cycles/kernel/bvh/volume_all.h
index c6eeb07a14d..721eb555d4d 100644
--- a/intern/cycles/kernel/bvh/volume_all.h
+++ b/intern/cycles/kernel/bvh/volume_all.h
@@ -47,14 +47,10 @@ ccl_device_inline
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
- float tmin = ray->tmin;
+ const float tmin = ray->tmin;
int object = OBJECT_NONE;
float isect_t = ray->tmax;
-#if BVH_FEATURE(BVH_MOTION)
- Transform ob_itfm;
-#endif
-
int num_hits_in_instance = 0;
uint num_hits = 0;
@@ -159,18 +155,6 @@ ccl_device_inline
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
- if (object != OBJECT_NONE) {
-#if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-#else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-#endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
return num_hits;
}
}
@@ -212,18 +196,6 @@ ccl_device_inline
num_hits_in_instance++;
isect_array->t = isect_t;
if (num_hits == max_hits) {
- if (object != OBJECT_NONE) {
-# if BVH_FEATURE(BVH_MOTION)
- float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
-# else
- Transform itfm = object_fetch_transform(
- kg, object, OBJECT_INVERSE_TRANSFORM);
- float t_fac = 1.0f / len(transform_direction(&itfm, dir));
-# endif
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
return num_hits;
}
}
@@ -242,15 +214,11 @@ ccl_device_inline
int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_HAS_VOLUME) {
#if BVH_FEATURE(BVH_MOTION)
- const float t_world_to_instance = bvh_instance_motion_push(
- kg, object, ray, &P, &dir, &idir, &ob_itfm);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir);
#else
- const float t_world_to_instance = bvh_instance_push(kg, object, ray, &P, &dir, &idir);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir);
#endif
- isect_t *= t_world_to_instance;
- tmin *= t_world_to_instance;
-
num_hits_in_instance = 0;
isect_array->t = isect_t;
@@ -274,29 +242,7 @@ ccl_device_inline
kernel_assert(object != OBJECT_NONE);
/* Instance pop. */
- if (num_hits_in_instance) {
- float t_fac;
-#if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
-#else
- bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
-#endif
- /* Scale isect->t to adjust for instancing. */
- for (int i = 0; i < num_hits_in_instance; i++) {
- (isect_array - i - 1)->t *= t_fac;
- }
- }
- else {
-#if BVH_FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
-#else
- bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
-#endif
- }
-
- tmin = ray->tmin;
- isect_t = ray->tmax;
- isect_array->t = isect_t;
+ bvh_instance_pop(ray, &P, &dir, &idir);
object = OBJECT_NONE;
node_addr = traversal_stack[stack_ptr];
diff --git a/intern/cycles/kernel/camera/camera.h b/intern/cycles/kernel/camera/camera.h
index 926ccf7b86f..27876677281 100644
--- a/intern/cycles/kernel/camera/camera.h
+++ b/intern/cycles/kernel/camera/camera.h
@@ -45,7 +45,6 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
float3 raster = make_float3(raster_x, raster_y, 0.0f);
float3 Pcamera = transform_perspective(&rastertocamera, raster);
-#ifdef __CAMERA_MOTION__
if (kernel_data.cam.have_perspective_motion) {
/* TODO(sergey): Currently we interpolate projected coordinate which
* gives nice looking result and which is simple, but is in fact a bit
@@ -63,7 +62,6 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
Pcamera = interp(Pcamera, Pcamera_post, (ray->time - 0.5f) * 2.0f);
}
}
-#endif
float3 P = zero_float3();
float3 D = Pcamera;
@@ -87,14 +85,12 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
/* transform ray from camera to world */
Transform cameratoworld = kernel_data.cam.cameratoworld;
-#ifdef __CAMERA_MOTION__
if (kernel_data.cam.num_motion_steps) {
transform_motion_array_interpolate(&cameratoworld,
kernel_data_array(camera_motion),
kernel_data.cam.num_motion_steps,
ray->time);
}
-#endif
P = transform_point(&cameratoworld, P);
D = normalize(transform_direction(&cameratoworld, D));
@@ -159,7 +155,6 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
#endif
}
-#ifdef __CAMERA_CLIPPING__
/* clipping */
float z_inv = 1.0f / normalize(Pcamera).z;
float nearclip = kernel_data.cam.nearclip * z_inv;
@@ -167,10 +162,6 @@ ccl_device void camera_sample_perspective(KernelGlobals kg,
ray->dP += nearclip * ray->dD;
ray->tmin = 0.0f;
ray->tmax = kernel_data.cam.cliplength * z_inv;
-#else
- ray->tmin = 0.0f;
- ray->tmax = FLT_MAX;
-#endif
}
/* Orthographic Camera */
@@ -209,14 +200,12 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg,
/* transform ray from camera to world */
Transform cameratoworld = kernel_data.cam.cameratoworld;
-#ifdef __CAMERA_MOTION__
if (kernel_data.cam.num_motion_steps) {
transform_motion_array_interpolate(&cameratoworld,
kernel_data_array(camera_motion),
kernel_data.cam.num_motion_steps,
ray->time);
}
-#endif
ray->P = transform_point(&cameratoworld, P);
ray->D = normalize(transform_direction(&cameratoworld, D));
@@ -231,22 +220,15 @@ ccl_device void camera_sample_orthographic(KernelGlobals kg,
ray->dD = differential_zero_compact();
#endif
-#ifdef __CAMERA_CLIPPING__
/* clipping */
ray->tmin = 0.0f;
ray->tmax = kernel_data.cam.cliplength;
-#else
- ray->tmin = 0.0f;
- ray->tmax = FLT_MAX;
-#endif
}
/* Panorama Camera */
ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
-#ifdef __CAMERA_MOTION__
ccl_global const DecomposedTransform *cam_motion,
-#endif
float raster_x,
float raster_y,
float lens_u,
@@ -290,12 +272,10 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
/* transform ray from camera to world */
Transform cameratoworld = cam->cameratoworld;
-#ifdef __CAMERA_MOTION__
if (cam->num_motion_steps) {
transform_motion_array_interpolate(
&cameratoworld, cam_motion, cam->num_motion_steps, ray->time);
}
-#endif
/* Stereo transform */
bool use_stereo = cam->interocular_offset != 0.0f;
@@ -348,17 +328,12 @@ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
ray->dP = differential_make_compact(dP);
#endif
-#ifdef __CAMERA_CLIPPING__
/* clipping */
float nearclip = cam->nearclip;
ray->P += nearclip * ray->D;
ray->dP += nearclip * ray->dD;
ray->tmin = 0.0f;
ray->tmax = cam->cliplength;
-#else
- ray->tmin = 0.0f;
- ray->tmax = FLT_MAX;
-#endif
}
/* Common */
@@ -378,7 +353,6 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
float raster_x = x + lookup_table_read(kg, filter_u, filter_table_offset, FILTER_TABLE_SIZE);
float raster_y = y + lookup_table_read(kg, filter_v, filter_table_offset, FILTER_TABLE_SIZE);
-#ifdef __CAMERA_MOTION__
/* motion blur */
if (kernel_data.cam.shuttertime == -1.0f) {
ray->time = 0.5f;
@@ -416,7 +390,6 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
}
}
}
-#endif
/* sample */
if (kernel_data.cam.type == CAMERA_PERSPECTIVE) {
@@ -426,12 +399,8 @@ ccl_device_inline void camera_sample(KernelGlobals kg,
camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
}
else {
-#ifdef __CAMERA_MOTION__
ccl_global const DecomposedTransform *cam_motion = kernel_data_array(camera_motion);
camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
-#else
- camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray);
-#endif
}
}
diff --git a/intern/cycles/kernel/camera/projection.h b/intern/cycles/kernel/camera/projection.h
index c9fe3a6c7fb..1d16aa35abe 100644
--- a/intern/cycles/kernel/camera/projection.h
+++ b/intern/cycles/kernel/camera/projection.h
@@ -201,11 +201,35 @@ ccl_device float2 direction_to_mirrorball(float3 dir)
return make_float2(u, v);
}
+/* Single face of a equiangular cube map projection as described in
+ https://blog.google/products/google-ar-vr/bringing-pixels-front-and-center-vr-video/ */
+ccl_device float3 equiangular_cubemap_face_to_direction(float u, float v)
+{
+ u = (1.0f - u);
+
+ u = tanf(u * M_PI_2_F - M_PI_4_F);
+ v = tanf(v * M_PI_2_F - M_PI_4_F);
+
+ return make_float3(1.0f, u, v);
+}
+
+ccl_device float2 direction_to_equiangular_cubemap_face(float3 dir)
+{
+ float u = atan2f(dir.y, dir.x) * 2.0f / M_PI_F + 0.5f;
+ float v = atan2f(dir.z, dir.x) * 2.0f / M_PI_F + 0.5f;
+
+ u = 1.0f - u;
+
+ return make_float2(u, v);
+}
+
ccl_device_inline float3 panorama_to_direction(ccl_constant KernelCamera *cam, float u, float v)
{
switch (cam->panorama_type) {
case PANORAMA_EQUIRECTANGULAR:
return equirectangular_range_to_direction(u, v, cam->equirectangular_range);
+ case PANORAMA_EQUIANGULAR_CUBEMAP_FACE:
+ return equiangular_cubemap_face_to_direction(u, v);
case PANORAMA_MIRRORBALL:
return mirrorball_to_direction(u, v);
case PANORAMA_FISHEYE_EQUIDISTANT:
@@ -230,6 +254,8 @@ ccl_device_inline float2 direction_to_panorama(ccl_constant KernelCamera *cam, f
switch (cam->panorama_type) {
case PANORAMA_EQUIRECTANGULAR:
return direction_to_equirectangular_range(dir, cam->equirectangular_range);
+ case PANORAMA_EQUIANGULAR_CUBEMAP_FACE:
+ return direction_to_equiangular_cubemap_face(dir);
case PANORAMA_MIRRORBALL:
return direction_to_mirrorball(dir);
case PANORAMA_FISHEYE_EQUIDISTANT:
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index 933c07a5102..f1af3b12269 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -8,7 +8,7 @@ CCL_NAMESPACE_BEGIN
ccl_device ccl_private ShaderClosure *closure_alloc(ccl_private ShaderData *sd,
int size,
ClosureType type,
- float3 weight)
+ Spectrum weight)
{
kernel_assert(size <= sizeof(ShaderClosure));
@@ -49,49 +49,23 @@ ccl_device ccl_private void *closure_alloc_extra(ccl_private ShaderData *sd, int
ccl_device_inline ccl_private ShaderClosure *bsdf_alloc(ccl_private ShaderData *sd,
int size,
- float3 weight)
+ Spectrum weight)
{
kernel_assert(isfinite_safe(weight));
- const float sample_weight = fabsf(average(weight));
-
- /* Use comparison this way to help dealing with non-finite weight: if the average is not finite
- * we will not allocate new closure. */
- if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
- ccl_private ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
- if (sc == NULL) {
- return NULL;
- }
-
- sc->sample_weight = sample_weight;
-
- return sc;
- }
-
- return NULL;
-}
-
-#ifdef __OSL__
-ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
- int size,
- float3 weight,
- void *data)
-{
- kernel_assert(isfinite_safe(weight));
+ /* No negative weights allowed. */
+ weight = max(weight, zero_float3());
const float sample_weight = fabsf(average(weight));
/* Use comparison this way to help dealing with non-finite weight: if the average is not finite
* we will not allocate new closure. */
if (sample_weight >= CLOSURE_WEIGHT_CUTOFF) {
- ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ ccl_private ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
if (!sc) {
return NULL;
}
- memcpy((void *)sc, data, size);
-
- sc->weight = weight;
sc->sample_weight = sample_weight;
return sc;
@@ -99,6 +73,5 @@ ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd,
return NULL;
}
-#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 246cc8290d5..fbda52d74c4 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -70,7 +70,11 @@ ccl_device_inline float bsdf_get_roughness_squared(ccl_private const ShaderClosu
* Yining Karl Li and Brent Burley. */
ccl_device_inline float bump_shadowing_term(float3 Ng, float3 N, float3 I)
{
- float g = safe_divide(dot(Ng, I), dot(N, I) * dot(Ng, N));
+ const float cosNI = dot(N, I);
+ if (cosNI < 0.0f) {
+ Ng = -Ng;
+ }
+ float g = safe_divide(dot(Ng, I), cosNI * dot(Ng, N));
/* If the incoming light is on the unshadowed side, return full brightness. */
if (g >= 1.0f) {
@@ -99,15 +103,22 @@ ccl_device_inline float shift_cos_in(float cos_in, const float frequency_multipl
return val;
}
+ccl_device_inline bool bsdf_is_transmission(ccl_private const ShaderClosure *sc,
+ const float3 omega_in)
+{
+ return dot(sc->N, omega_in) < 0.0f;
+}
+
ccl_device_inline int bsdf_sample(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private const ShaderClosure *sc,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness,
+ ccl_private float *eta)
{
/* For curves use the smooth normal, particularly for ribbons the geometric
* normal gives too much darkening otherwise. */
@@ -116,119 +127,45 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
switch (sc->type) {
case CLOSURE_BSDF_DIFFUSE_ID:
- label = bsdf_diffuse_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
-#ifdef __SVM__
+#if defined(__SVM__) || defined(__OSL__)
case CLOSURE_BSDF_OREN_NAYAR_ID:
- label = bsdf_oren_nayar_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_oren_nayar_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
# ifdef __OSL__
case CLOSURE_BSDF_PHONG_RAMP_ID:
- label = bsdf_phong_ramp_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_phong_ramp_sample(
+ sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
- label = bsdf_diffuse_ramp_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_diffuse_ramp_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
# endif
case CLOSURE_BSDF_TRANSLUCENT_ID:
- label = bsdf_translucent_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_translucent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_REFLECTION_ID:
- label = bsdf_reflection_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_reflection_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, eta);
+ *sampled_roughness = zero_float2();
break;
case CLOSURE_BSDF_REFRACTION_ID:
- label = bsdf_refraction_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_refraction_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, eta);
+ *sampled_roughness = zero_float2();
break;
case CLOSURE_BSDF_TRANSPARENT_ID:
- label = bsdf_transparent_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_transparent_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = zero_float2();
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
@@ -236,169 +173,65 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_V2_ID:
case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- label = bsdf_microfacet_ggx_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_microfacet_ggx_sample(
+ sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- label = bsdf_microfacet_ggx_glass_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_microfacet_ggx_glass_sample(
+ sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- label = bsdf_microfacet_beckmann_sample(kg,
- sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_microfacet_beckmann_sample(
+ kg, sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness, eta);
break;
case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- label = bsdf_ashikhmin_shirley_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_ashikhmin_shirley_sample(
+ sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- label = bsdf_ashikhmin_velvet_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_ashikhmin_velvet_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- label = bsdf_diffuse_toon_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_diffuse_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_GLOSSY_TOON_ID:
- label = bsdf_glossy_toon_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_glossy_toon_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ // double check if this is valid
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- label = bsdf_hair_reflection_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_hair_reflection_sample(
+ sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- label = bsdf_hair_transmission_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_hair_transmission_sample(
+ sc, Ng, sd->I, randu, randv, eval, omega_in, pdf, sampled_roughness);
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
label = bsdf_principled_hair_sample(
- kg, sc, sd, randu, randv, eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
+ kg, sc, sd, randu, randv, eval, omega_in, pdf, sampled_roughness, eta);
break;
-# ifdef __PRINCIPLED__
case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
- label = bsdf_principled_diffuse_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ label = bsdf_principled_diffuse_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
break;
case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
case CLOSURE_BSDF_PRINCIPLED_SHEEN_V2_ID:
- label = bsdf_principled_sheen_sample(sc,
- Ng,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
- break;
-# endif /* __PRINCIPLED__ */
+ label = bsdf_principled_sheen_sample(sc, Ng, sd->I, randu, randv, eval, omega_in, pdf);
+ *sampled_roughness = one_float2();
+ *eta = 1.0f;
+ break;
#endif
default:
label = LABEL_NONE;
@@ -420,11 +253,12 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
const float frequency_multiplier =
kernel_data_fetch(objects, sd->object).shadow_terminator_shading_offset;
if (frequency_multiplier > 1.0f) {
- *eval *= shift_cos_in(dot(*omega_in, sc->N), frequency_multiplier);
+ const float cosNI = dot(*omega_in, sc->N);
+ *eval *= shift_cos_in(cosNI, frequency_multiplier);
}
if (label & LABEL_DIFFUSE) {
if (!isequal(sc->N, sd->N)) {
- *eval *= bump_shadowing_term((label & LABEL_TRANSMIT) ? -sd->N : sd->N, sc->N, *omega_in);
+ *eval *= bump_shadowing_term(sd->N, sc->N, *omega_in);
}
}
}
@@ -437,189 +271,350 @@ ccl_device_inline int bsdf_sample(KernelGlobals kg,
return label;
}
+ccl_device_inline void bsdf_roughness_eta(const KernelGlobals kg,
+ ccl_private const ShaderClosure *sc,
+ ccl_private float2 *roughness,
+ ccl_private float *eta)
+{
+ bool refractive = false;
+ float alpha = 1.0f;
+ switch (sc->type) {
+ case CLOSURE_BSDF_DIFFUSE_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+#ifdef __SVM__
+ case CLOSURE_BSDF_OREN_NAYAR_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+# ifdef __OSL__
+ case CLOSURE_BSDF_PHONG_RAMP_ID:
+ alpha = phong_ramp_exponent_to_roughness(((ccl_private const PhongRampBsdf *)sc)->exponent);
+ *roughness = make_float2(alpha, alpha);
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+# endif
+ case CLOSURE_BSDF_TRANSLUCENT_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_REFLECTION_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ *roughness = zero_float2();
+ *eta = bsdf->ior;
+ break;
+ }
+ case CLOSURE_BSDF_REFRACTION_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ *roughness = zero_float2();
+ // do we need to inverse eta??
+ *eta = bsdf->ior;
+ break;
+ }
+ case CLOSURE_BSDF_TRANSPARENT_ID:
+ *roughness = zero_float2();
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_V2_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_V2_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
+ refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+ *eta = refractive ? 1.0f / bsdf->ior : bsdf->ior;
+ break;
+ }
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
+ *eta = bsdf->ior;
+ break;
+ }
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
+ refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+ *eta = refractive ? 1.0f / bsdf->ior : bsdf->ior;
+ } break;
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ *roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
+ *eta = 1.0f;
+ break;
+ }
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
+ // double check if this is valid
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ *roughness = make_float2(((ccl_private HairBsdf *)sc)->roughness1,
+ ((ccl_private HairBsdf *)sc)->roughness2);
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+ *roughness = make_float2(((ccl_private HairBsdf *)sc)->roughness1,
+ ((ccl_private HairBsdf *)sc)->roughness2);
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+ alpha = ((ccl_private PrincipledHairBSDF *)sc)->m0_roughness;
+ *roughness = make_float2(alpha, alpha);
+ *eta = ((ccl_private PrincipledHairBSDF *)sc)->eta;
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_V2_ID:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+#endif
+ default:
+ *roughness = one_float2();
+ *eta = 1.0f;
+ break;
+ }
+}
+
+ccl_device_inline int bsdf_label(const KernelGlobals kg,
+ ccl_private const ShaderClosure *sc,
+ const float3 omega_in)
+{
+ /* For curves use the smooth normal, particularly for ribbons the geometric
+ * normal gives too much darkening otherwise. */
+ int label;
+ switch (sc->type) {
+ case CLOSURE_BSDF_DIFFUSE_ID:
+ case CLOSURE_BSSRDF_BURLEY_ID:
+ case CLOSURE_BSSRDF_RANDOM_WALK_ID:
+ case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID:
+ label = LABEL_REFLECT | LABEL_DIFFUSE;
+ break;
+#ifdef __SVM__
+ case CLOSURE_BSDF_OREN_NAYAR_ID:
+ label = LABEL_REFLECT | LABEL_DIFFUSE;
+ break;
+# ifdef __OSL__
+ case CLOSURE_BSDF_PHONG_RAMP_ID:
+ label = LABEL_REFLECT | LABEL_GLOSSY;
+ break;
+ case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
+ label = LABEL_REFLECT | LABEL_DIFFUSE;
+ break;
+# endif
+ case CLOSURE_BSDF_TRANSLUCENT_ID:
+ label = LABEL_TRANSMIT | LABEL_DIFFUSE;
+ break;
+ case CLOSURE_BSDF_REFLECTION_ID:
+ label = LABEL_REFLECT | LABEL_SINGULAR;
+ break;
+ case CLOSURE_BSDF_REFRACTION_ID:
+ label = LABEL_TRANSMIT | LABEL_SINGULAR;
+ break;
+ case CLOSURE_BSDF_TRANSPARENT_ID:
+ label = LABEL_TRANSMIT | LABEL_TRANSPARENT;
+ break;
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_V2_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_V2_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ label = (bsdf->alpha_x * bsdf->alpha_y <= 1e-7f) ? LABEL_REFLECT | LABEL_SINGULAR :
+ LABEL_REFLECT | LABEL_GLOSSY;
+ break;
+ }
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID: {
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ label = (bsdf->alpha_x * bsdf->alpha_y <= 1e-7f) ? LABEL_TRANSMIT | LABEL_SINGULAR :
+ LABEL_TRANSMIT | LABEL_GLOSSY;
+ break;
+ }
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+ label = (bsdf_is_transmission(sc, omega_in)) ? LABEL_TRANSMIT | LABEL_GLOSSY :
+ LABEL_REFLECT | LABEL_GLOSSY;
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ label = LABEL_REFLECT | LABEL_GLOSSY;
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+ label = LABEL_REFLECT | LABEL_DIFFUSE;
+ break;
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+ label = LABEL_REFLECT | LABEL_DIFFUSE;
+ break;
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
+ label = LABEL_REFLECT | LABEL_GLOSSY;
+ break;
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ label = LABEL_REFLECT | LABEL_GLOSSY;
+ break;
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+ label = LABEL_TRANSMIT | LABEL_GLOSSY;
+ break;
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+ if (bsdf_is_transmission(sc, omega_in))
+ label = LABEL_TRANSMIT | LABEL_GLOSSY;
+ else
+ label = LABEL_REFLECT | LABEL_GLOSSY;
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ label = LABEL_REFLECT | LABEL_DIFFUSE;
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_V2_ID:
+ label = LABEL_REFLECT | LABEL_DIFFUSE;
+ break;
+#endif
+ default:
+ label = LABEL_NONE;
+ break;
+ }
+
+ /* Test if BSDF sample should be treated as transparent for background. */
+ if (label & LABEL_TRANSMIT) {
+ float threshold_squared = kernel_data.background.transparent_roughness_squared_threshold;
+
+ if (threshold_squared >= 0.0f) {
+ if (bsdf_get_specular_roughness_squared(sc) <= threshold_squared) {
+ label |= LABEL_TRANSMIT_TRANSPARENT;
+ }
+ }
+ }
+ return label;
+}
+
#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_inline
#endif
- float3
+ Spectrum
bsdf_eval(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private const ShaderClosure *sc,
const float3 omega_in,
- const bool is_transmission,
ccl_private float *pdf)
{
- float3 eval = zero_float3();
+ Spectrum eval = zero_spectrum();
- if (!is_transmission) {
- switch (sc->type) {
- case CLOSURE_BSDF_DIFFUSE_ID:
- eval = bsdf_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
-#ifdef __SVM__
- case CLOSURE_BSDF_OREN_NAYAR_ID:
- eval = bsdf_oren_nayar_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
+ switch (sc->type) {
+ case CLOSURE_BSDF_DIFFUSE_ID:
+ eval = bsdf_diffuse_eval(sc, sd->I, omega_in, pdf);
+ break;
+#if defined(__SVM__) || defined(__OSL__)
+ case CLOSURE_BSDF_OREN_NAYAR_ID:
+ eval = bsdf_oren_nayar_eval(sc, sd->I, omega_in, pdf);
+ break;
# ifdef __OSL__
- case CLOSURE_BSDF_PHONG_RAMP_ID:
- eval = bsdf_phong_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
- eval = bsdf_diffuse_ramp_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
+ case CLOSURE_BSDF_PHONG_RAMP_ID:
+ eval = bsdf_phong_ramp_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_DIFFUSE_RAMP_ID:
+ eval = bsdf_diffuse_ramp_eval(sc, sd->I, omega_in, pdf);
+ break;
# endif
- case CLOSURE_BSDF_TRANSLUCENT_ID:
- eval = bsdf_translucent_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFLECTION_ID:
- eval = bsdf_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFRACTION_ID:
- eval = bsdf_refraction_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_TRANSPARENT_ID:
- eval = bsdf_transparent_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_V2_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_V2_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- eval = bsdf_microfacet_ggx_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- eval = bsdf_microfacet_ggx_glass_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- eval = bsdf_microfacet_beckmann_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- eval = bsdf_ashikhmin_shirley_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- eval = bsdf_ashikhmin_velvet_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- eval = bsdf_diffuse_toon_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_GLOSSY_TOON_ID:
- eval = bsdf_glossy_toon_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
- eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- eval = bsdf_hair_reflection_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
-# ifdef __PRINCIPLED__
- case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
- eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
- case CLOSURE_BSDF_PRINCIPLED_SHEEN_V2_ID:
- eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf);
- break;
-# endif /* __PRINCIPLED__ */
+ case CLOSURE_BSDF_TRANSLUCENT_ID:
+ eval = bsdf_translucent_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_REFLECTION_ID:
+ eval = bsdf_reflection_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_REFRACTION_ID:
+ eval = bsdf_refraction_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_TRANSPARENT_ID:
+ eval = bsdf_transparent_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_GGX_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_V2_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_V2_ID:
+ case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
+ eval = bsdf_microfacet_ggx_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
+ case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
+ eval = bsdf_microfacet_ggx_glass_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
+ case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
+ eval = bsdf_microfacet_beckmann_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
+ eval = bsdf_ashikhmin_shirley_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
+ eval = bsdf_ashikhmin_velvet_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_DIFFUSE_TOON_ID:
+ eval = bsdf_diffuse_toon_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_GLOSSY_TOON_ID:
+ eval = bsdf_glossy_toon_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
+ eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_REFLECTION_ID:
+ eval = bsdf_hair_reflection_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
+ eval = bsdf_hair_transmission_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
+ eval = bsdf_principled_diffuse_eval(sc, sd->I, omega_in, pdf);
+ break;
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
+ case CLOSURE_BSDF_PRINCIPLED_SHEEN_V2_ID:
+ eval = bsdf_principled_sheen_eval(sc, sd->I, omega_in, pdf);
+ break;
#endif
- default:
- break;
- }
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
- if (!isequal(sc->N, sd->N)) {
- eval *= bump_shadowing_term(sd->N, sc->N, omega_in);
- }
- }
- /* Shadow terminator offset. */
- const float frequency_multiplier =
- kernel_data_fetch(objects, sd->object).shadow_terminator_shading_offset;
- if (frequency_multiplier > 1.0f) {
- eval *= shift_cos_in(dot(omega_in, sc->N), frequency_multiplier);
- }
+ default:
+ break;
}
- else {
- switch (sc->type) {
- case CLOSURE_BSDF_DIFFUSE_ID:
- eval = bsdf_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
-#ifdef __SVM__
- case CLOSURE_BSDF_OREN_NAYAR_ID:
- eval = bsdf_oren_nayar_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_TRANSLUCENT_ID:
- eval = bsdf_translucent_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFLECTION_ID:
- eval = bsdf_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_REFRACTION_ID:
- eval = bsdf_refraction_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_TRANSPARENT_ID:
- eval = bsdf_transparent_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_GGX_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_V2_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_V2_ID:
- case CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID:
- eval = bsdf_microfacet_ggx_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID:
- case CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID:
- eval = bsdf_microfacet_ggx_glass_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_MICROFACET_BECKMANN_ID:
- case CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID:
- eval = bsdf_microfacet_beckmann_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID:
- eval = bsdf_ashikhmin_shirley_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID:
- eval = bsdf_ashikhmin_velvet_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_DIFFUSE_TOON_ID:
- eval = bsdf_diffuse_toon_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_GLOSSY_TOON_ID:
- eval = bsdf_glossy_toon_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_PRINCIPLED_ID:
- eval = bsdf_principled_hair_eval(kg, sd, sc, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_REFLECTION_ID:
- eval = bsdf_hair_reflection_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
- eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
-# ifdef __PRINCIPLED__
- case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
- eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
- case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
- case CLOSURE_BSDF_PRINCIPLED_SHEEN_V2_ID:
- eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf);
- break;
-# endif /* __PRINCIPLED__ */
-#endif
- default:
- break;
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+ if (!isequal(sc->N, sd->N)) {
+ eval *= bump_shadowing_term(sd->N, sc->N, omega_in);
}
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
- if (!isequal(sc->N, sd->N)) {
- eval *= bump_shadowing_term(-sd->N, sc->N, omega_in);
- }
+ }
+
+ /* Shadow terminator offset. */
+ const float frequency_multiplier =
+ kernel_data_fetch(objects, sd->object).shadow_terminator_shading_offset;
+ if (frequency_multiplier > 1.0f) {
+ const float cosNI = dot(omega_in, sc->N);
+ if (cosNI >= 0.0f) {
+ eval *= shift_cos_in(cosNI, frequency_multiplier);
}
}
+
#ifdef WITH_CYCLES_DEBUG
kernel_assert(*pdf >= 0.0f);
kernel_assert(eval.x >= 0.0f && eval.y >= 0.0f && eval.z >= 0.0f);
@@ -630,7 +625,7 @@ ccl_device_inline
ccl_device void bsdf_blur(KernelGlobals kg, ccl_private ShaderClosure *sc, float roughness)
{
/* TODO: do we want to blur volume closures? */
-#ifdef __SVM__
+#if defined(__SVM__) || defined(__OSL__)
switch (sc->type) {
case CLOSURE_BSDF_MICROFACET_GGX_ID:
case CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID:
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index 47066542122..14a4094d485 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -39,11 +39,10 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
return 2.0f / (roughness * roughness) - 2.0f;
}
-ccl_device_forceinline float3
-bsdf_ashikhmin_shirley_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device_forceinline Spectrum bsdf_ashikhmin_shirley_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
float3 N = bsdf->N;
@@ -53,68 +52,58 @@ bsdf_ashikhmin_shirley_eval_reflect(ccl_private const ShaderClosure *sc,
float out = 0.0f;
- if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
+ if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f || !(NdotI > 0.0f && NdotO > 0.0f)) {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
+ }
+
+ NdotI = fmaxf(NdotI, 1e-6f);
+ NdotO = fmaxf(NdotO, 1e-6f);
+ float3 H = normalize(omega_in + I);
+ float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
+ float HdotN = fmaxf(dot(H, N), 1e-6f);
+
+ /* pump from original paper
+ * (first derivative disc., but cancels the HdotI in the pdf nicely) */
+ float pump = 1.0f / fmaxf(1e-6f, (HdotI * fmaxf(NdotO, NdotI)));
+ /* pump from d-brdf paper */
+ /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */
+
+ float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+ float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
+
+ if (n_x == n_y) {
+ /* isotropic */
+ float e = n_x;
+ float lobe = powf(HdotN, e);
+ float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
+
+ out = NdotO * norm * lobe * pump;
+ /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper). */
+ *pdf = norm * lobe / HdotI;
}
- if (NdotI > 0.0f && NdotO > 0.0f) {
- NdotI = fmaxf(NdotI, 1e-6f);
- NdotO = fmaxf(NdotO, 1e-6f);
- float3 H = normalize(omega_in + I);
- float HdotI = fmaxf(fabsf(dot(H, I)), 1e-6f);
- float HdotN = fmaxf(dot(H, N), 1e-6f);
-
- /* pump from original paper
- * (first derivative disc., but cancels the HdotI in the pdf nicely) */
- float pump = 1.0f / fmaxf(1e-6f, (HdotI * fmaxf(NdotO, NdotI)));
- /* pump from d-brdf paper */
- /*float pump = 1.0f / fmaxf(1e-4f, ((NdotO + NdotI) * (NdotO*NdotI))); */
-
- float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
- float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
-
- if (n_x == n_y) {
- /* isotropic */
- float e = n_x;
- float lobe = powf(HdotN, e);
- float norm = (n_x + 1.0f) / (8.0f * M_PI_F);
-
- out = NdotO * norm * lobe * pump;
- /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper). */
- *pdf = norm * lobe / HdotI;
+ else {
+ /* anisotropic */
+ float3 X, Y;
+ make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+
+ float HdotX = dot(H, X);
+ float HdotY = dot(H, Y);
+ float lobe;
+ if (HdotN < 1.0f) {
+ float e = (n_x * HdotX * HdotX + n_y * HdotY * HdotY) / (1.0f - HdotN * HdotN);
+ lobe = powf(HdotN, e);
}
else {
- /* anisotropic */
- float3 X, Y;
- make_orthonormals_tangent(N, bsdf->T, &X, &Y);
-
- float HdotX = dot(H, X);
- float HdotY = dot(H, Y);
- float lobe;
- if (HdotN < 1.0f) {
- float e = (n_x * HdotX * HdotX + n_y * HdotY * HdotY) / (1.0f - HdotN * HdotN);
- lobe = powf(HdotN, e);
- }
- else {
- lobe = 1.0f;
- }
- float norm = sqrtf((n_x + 1.0f) * (n_y + 1.0f)) / (8.0f * M_PI_F);
-
- out = NdotO * norm * lobe * pump;
- *pdf = norm * lobe / HdotI;
+ lobe = 1.0f;
}
- }
+ float norm = sqrtf((n_x + 1.0f) * (n_y + 1.0f)) / (8.0f * M_PI_F);
- return make_float3(out, out, out);
-}
+ out = NdotO * norm * lobe * pump;
+ *pdf = norm * lobe / HdotI;
+ }
-ccl_device float3 bsdf_ashikhmin_shirley_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_spectrum(out);
}
ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x,
@@ -133,100 +122,95 @@ ccl_device_inline void bsdf_ashikhmin_shirley_sample_first_quadrant(float n_x,
ccl_device int bsdf_ashikhmin_shirley_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ *sampled_roughness = make_float2(bsdf->alpha_x, bsdf->alpha_y);
float3 N = bsdf->N;
int label = LABEL_REFLECT | LABEL_GLOSSY;
float NdotI = dot(N, I);
- if (NdotI > 0.0f) {
+ if (!(NdotI > 0.0f)) {
+ *pdf = 0.0f;
+ *eval = zero_spectrum();
+ return LABEL_NONE;
+ }
- float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
- float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
+ float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_x);
+ float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(bsdf->alpha_y);
- /* get x,y basis on the surface for anisotropy */
- float3 X, Y;
+ /* get x,y basis on the surface for anisotropy */
+ float3 X, Y;
- if (n_x == n_y)
- make_orthonormals(N, &X, &Y);
- else
- make_orthonormals_tangent(N, bsdf->T, &X, &Y);
-
- /* sample spherical coords for h in tangent space */
- float phi;
- float cos_theta;
- if (n_x == n_y) {
- /* isotropic sampling */
- phi = M_2PI_F * randu;
- cos_theta = powf(randv, 1.0f / (n_x + 1.0f));
+ if (n_x == n_y)
+ make_orthonormals(N, &X, &Y);
+ else
+ make_orthonormals_tangent(N, bsdf->T, &X, &Y);
+
+ /* sample spherical coords for h in tangent space */
+ float phi;
+ float cos_theta;
+ if (n_x == n_y) {
+ /* isotropic sampling */
+ phi = M_2PI_F * randu;
+ cos_theta = powf(randv, 1.0f / (n_x + 1.0f));
+ }
+ else {
+ /* anisotropic sampling */
+ if (randu < 0.25f) { /* first quadrant */
+ float remapped_randu = 4.0f * randu;
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
}
- else {
- /* anisotropic sampling */
- if (randu < 0.25f) { /* first quadrant */
- float remapped_randu = 4.0f * randu;
- bsdf_ashikhmin_shirley_sample_first_quadrant(
- n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- }
- else if (randu < 0.5f) { /* second quadrant */
- float remapped_randu = 4.0f * (.5f - randu);
- bsdf_ashikhmin_shirley_sample_first_quadrant(
- n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- phi = M_PI_F - phi;
- }
- else if (randu < 0.75f) { /* third quadrant */
- float remapped_randu = 4.0f * (randu - 0.5f);
- bsdf_ashikhmin_shirley_sample_first_quadrant(
- n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- phi = M_PI_F + phi;
- }
- else { /* fourth quadrant */
- float remapped_randu = 4.0f * (1.0f - randu);
- bsdf_ashikhmin_shirley_sample_first_quadrant(
- n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
- phi = 2.0f * M_PI_F - phi;
- }
+ else if (randu < 0.5f) { /* second quadrant */
+ float remapped_randu = 4.0f * (.5f - randu);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = M_PI_F - phi;
}
-
- /* get half vector in tangent space */
- float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta * cos_theta));
- float cos_phi = cosf(phi);
- float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */
- float3 h = make_float3(sin_theta * cos_phi, sin_theta * sin_phi, cos_theta);
-
- /* half vector to world space */
- float3 H = h.x * X + h.y * Y + h.z * N;
- float HdotI = dot(H, I);
- if (HdotI < 0.0f)
- H = -H;
-
- /* reflect I on H to get omega_in */
- *omega_in = -I + (2.0f * HdotI) * H;
-
- if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
- /* Some high number for MIS. */
- *pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
- label = LABEL_REFLECT | LABEL_SINGULAR;
+ else if (randu < 0.75f) { /* third quadrant */
+ float remapped_randu = 4.0f * (randu - 0.5f);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = M_PI_F + phi;
}
- else {
- /* leave the rest to eval_reflect */
- *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf);
+ else { /* fourth quadrant */
+ float remapped_randu = 4.0f * (1.0f - randu);
+ bsdf_ashikhmin_shirley_sample_first_quadrant(
+ n_x, n_y, remapped_randu, randv, &phi, &cos_theta);
+ phi = 2.0f * M_PI_F - phi;
}
+ }
-#ifdef __RAY_DIFFERENTIALS__
- /* just do the reflection thing for now */
- *domega_in_dx = (2.0f * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2.0f * dot(N, dIdy)) * N - dIdy;
-#endif
+ /* get half vector in tangent space */
+ float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - cos_theta * cos_theta));
+ float cos_phi = cosf(phi);
+ float sin_phi = sinf(phi); /* no sqrt(1-cos^2) here b/c it causes artifacts */
+ float3 h = make_float3(sin_theta * cos_phi, sin_theta * sin_phi, cos_theta);
+
+ /* half vector to world space */
+ float3 H = h.x * X + h.y * Y + h.z * N;
+ float HdotI = dot(H, I);
+ if (HdotI < 0.0f)
+ H = -H;
+
+ /* reflect I on H to get omega_in */
+ *omega_in = -I + (2.0f * HdotI) * H;
+
+ if (fmaxf(bsdf->alpha_x, bsdf->alpha_y) <= 1e-4f) {
+ /* Some high number for MIS. */
+ *pdf = 1e6f;
+ *eval = make_spectrum(1e6f);
+ label = LABEL_REFLECT | LABEL_SINGULAR;
+ }
+ else {
+ /* leave the rest to eval */
+ *eval = bsdf_ashikhmin_shirley_eval(sc, I, *omega_in, pdf);
}
return label;
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index 3d7906eef7d..ac2183e0848 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@ -31,10 +31,10 @@ ccl_device int bsdf_ashikhmin_velvet_setup(ccl_private VelvetBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_ashikhmin_velvet_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
float m_invsigma2 = bsdf->invsigma2;
@@ -42,59 +42,46 @@ ccl_device float3 bsdf_ashikhmin_velvet_eval_reflect(ccl_private const ShaderClo
float cosNO = dot(N, I);
float cosNI = dot(N, omega_in);
- if (cosNO > 0 && cosNI > 0) {
- float3 H = normalize(omega_in + I);
+ if (!(cosNO > 0 && cosNI > 0)) {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
- float cosNH = dot(N, H);
- float cosHO = fabsf(dot(I, H));
+ float3 H = normalize(omega_in + I);
- if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f)) {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
- float cosNHdivHO = cosNH / cosHO;
- cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
+ float cosNH = dot(N, H);
+ float cosHO = fabsf(dot(I, H));
- float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
- float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
+ if (!(fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f)) {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
+ float cosNHdivHO = cosNH / cosHO;
+ cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
- float sinNH2 = 1 - cosNH * cosNH;
- float sinNH4 = sinNH2 * sinNH2;
- float cotangent2 = (cosNH * cosNH) / sinNH2;
+ float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
+ float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
- float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
- float G = fminf(1.0f, fminf(fac1, fac2)); // TODO: derive G from D analytically
+ float sinNH2 = 1 - cosNH * cosNH;
+ float sinNH4 = sinNH2 * sinNH2;
+ float cotangent2 = (cosNH * cosNH) / sinNH2;
- float out = 0.25f * (D * G) / cosNO;
+ float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
+ float G = fminf(1.0f, fminf(fac1, fac2)); // TODO: derive G from D analytically
- *pdf = 0.5f * M_1_PI_F;
- return make_float3(out, out, out);
- }
+ float out = 0.25f * (D * G) / cosNO;
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device float3 bsdf_ashikhmin_velvet_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ *pdf = 0.5f * M_1_PI_F;
+ return make_spectrum(out);
}
ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const VelvetBsdf *bsdf = (ccl_private const VelvetBsdf *)sc;
@@ -105,47 +92,42 @@ ccl_device int bsdf_ashikhmin_velvet_sample(ccl_private const ShaderClosure *sc,
// distribution over the hemisphere
sample_uniform_hemisphere(N, randu, randv, omega_in, pdf);
- if (dot(Ng, *omega_in) > 0) {
- float3 H = normalize(*omega_in + I);
+ if (!(dot(Ng, *omega_in) > 0)) {
+ *pdf = 0.0f;
+ *eval = zero_spectrum();
+ return LABEL_NONE;
+ }
+
+ float3 H = normalize(*omega_in + I);
- float cosNI = dot(N, *omega_in);
- float cosNO = dot(N, I);
- float cosNH = dot(N, H);
- float cosHO = fabsf(dot(I, H));
+ float cosNI = dot(N, *omega_in);
+ float cosNO = dot(N, I);
+ float cosNH = dot(N, H);
+ float cosHO = fabsf(dot(I, H));
- if (fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f) {
- float cosNHdivHO = cosNH / cosHO;
- cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
+ if (!(fabsf(cosNO) > 1e-5f && fabsf(cosNH) < 1.0f - 1e-5f && cosHO > 1e-5f)) {
+ *pdf = 0.0f;
+ *eval = zero_spectrum();
+ return LABEL_NONE;
+ }
- float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
- float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
+ float cosNHdivHO = cosNH / cosHO;
+ cosNHdivHO = fmaxf(cosNHdivHO, 1e-5f);
- float sinNH2 = 1 - cosNH * cosNH;
- float sinNH4 = sinNH2 * sinNH2;
- float cotangent2 = (cosNH * cosNH) / sinNH2;
+ float fac1 = 2 * fabsf(cosNHdivHO * cosNO);
+ float fac2 = 2 * fabsf(cosNHdivHO * cosNI);
- float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
- float G = fminf(1.0f, fminf(fac1, fac2)); // TODO: derive G from D analytically
+ float sinNH2 = 1 - cosNH * cosNH;
+ float sinNH4 = sinNH2 * sinNH2;
+ float cotangent2 = (cosNH * cosNH) / sinNH2;
- float power = 0.25f * (D * G) / cosNO;
+ float D = expf(-cotangent2 * m_invsigma2) * m_invsigma2 * M_1_PI_F / sinNH4;
+ float G = fminf(1.0f, fminf(fac1, fac2)); // TODO: derive G from D analytically
- *eval = make_float3(power, power, power);
+ float power = 0.25f * (D * G) / cosNO;
+
+ *eval = make_spectrum(power);
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the retroreflective bounce
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
-#endif
- }
- else {
- *pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
- }
- }
- else {
- *pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
- }
return LABEL_REFLECT | LABEL_DIFFUSE;
}
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h
index 759ad03f8e8..c9c26754651 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@ -26,39 +26,26 @@ ccl_device int bsdf_diffuse_setup(ccl_private DiffuseBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_diffuse_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_diffuse_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
float3 N = bsdf->N;
float cos_pi = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
*pdf = cos_pi;
- return make_float3(cos_pi, cos_pi, cos_pi);
-}
-
-ccl_device float3 bsdf_diffuse_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_spectrum(cos_pi);
}
ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
@@ -68,16 +55,11 @@ ccl_device int bsdf_diffuse_sample(ccl_private const ShaderClosure *sc,
sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
if (dot(Ng, *omega_in) > 0.0f) {
- *eval = make_float3(*pdf, *pdf, *pdf);
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
-#endif
+ *eval = make_spectrum(*pdf);
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
return LABEL_REFLECT | LABEL_DIFFUSE;
}
@@ -90,39 +72,26 @@ ccl_device int bsdf_translucent_setup(ccl_private DiffuseBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_translucent_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device float3 bsdf_translucent_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_translucent_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
float3 N = bsdf->N;
float cos_pi = fmaxf(-dot(N, omega_in), 0.0f) * M_1_PI_F;
*pdf = cos_pi;
- return make_float3(cos_pi, cos_pi, cos_pi);
+ return make_spectrum(cos_pi);
}
ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
@@ -132,16 +101,11 @@ ccl_device int bsdf_translucent_sample(ccl_private const ShaderClosure *sc,
// distribution over the hemisphere
sample_cos_hemisphere(-N, randu, randv, omega_in, pdf);
if (dot(Ng, *omega_in) < 0) {
- *eval = make_float3(*pdf, *pdf, *pdf);
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
- *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
-#endif
+ *eval = make_spectrum(*pdf);
}
else {
*pdf = 0;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
return LABEL_TRANSMIT | LABEL_DIFFUSE;
}
diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index aa4c091f587..e955ed00b92 100644
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@ -9,6 +9,7 @@
#pragma once
#include "kernel/sample/mapping.h"
+#include "kernel/util/color.h"
CCL_NAMESPACE_BEGIN
@@ -46,38 +47,32 @@ ccl_device void bsdf_diffuse_ramp_blur(ccl_private ShaderClosure *sc, float roug
{
}
-ccl_device float3 bsdf_diffuse_ramp_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_diffuse_ramp_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
float3 N = bsdf->N;
float cos_pi = fmaxf(dot(N, omega_in), 0.0f);
- *pdf = cos_pi * M_1_PI_F;
- return bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F;
-}
-
-ccl_device float3 bsdf_diffuse_ramp_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- return make_float3(0.0f, 0.0f, 0.0f);
+ if (cos_pi >= 0.0f) {
+ *pdf = cos_pi * M_1_PI_F;
+ return rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, cos_pi) * M_1_PI_F);
+ }
+ else {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
}
ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
const DiffuseRampBsdf *bsdf = (const DiffuseRampBsdf *)sc;
@@ -87,15 +82,11 @@ ccl_device int bsdf_diffuse_ramp_sample(ccl_private const ShaderClosure *sc,
sample_cos_hemisphere(N, randu, randv, omega_in, pdf);
if (dot(Ng, *omega_in) > 0.0f) {
- *eval = bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F;
-# ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(N, dIdx)) * N - dIdx;
- *domega_in_dy = (2 * dot(N, dIdy)) * N - dIdy;
-# endif
+ *eval = rgb_to_spectrum(bsdf_diffuse_ramp_get_color(bsdf->colors, *pdf * M_PI_F) * M_1_PI_F);
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
return LABEL_REFLECT | LABEL_DIFFUSE;
}
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index a136ed05800..a8ba4044758 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -37,12 +37,17 @@ ccl_device int bsdf_hair_transmission_setup(ccl_private HairBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_hair_reflection_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
+ if (dot(bsdf->N, omega_in) < 0.0f) {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
+
float offset = bsdf->offset;
float3 Tg = bsdf->T;
float roughness1 = bsdf->roughness1;
@@ -61,7 +66,7 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClos
if (M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) {
*pdf = 0.0f;
- return make_float3(*pdf, *pdf, *pdf);
+ return zero_spectrum();
}
float roughness1_inv = 1.0f / roughness1;
@@ -81,33 +86,20 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(ccl_private const ShaderClos
(2 * (t * t + roughness1 * roughness1) * (a_R - b_R) * costheta_i);
*pdf = phi_pdf * theta_pdf;
- return make_float3(*pdf, *pdf, *pdf);
-}
-
-ccl_device float3 bsdf_hair_transmission_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device float3 bsdf_hair_reflection_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_spectrum(*pdf);
}
-ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_hair_transmission_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
+ if (dot(bsdf->N, omega_in) >= 0.0f) {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
+
float offset = bsdf->offset;
float3 Tg = bsdf->T;
float roughness1 = bsdf->roughness1;
@@ -125,7 +117,7 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderC
if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
*pdf = 0.0f;
- return make_float3(*pdf, *pdf, *pdf);
+ return zero_spectrum();
}
float costheta_i = fast_cosf(theta_i);
@@ -145,27 +137,25 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(ccl_private const ShaderC
float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
*pdf = phi_pdf * theta_pdf;
- return make_float3(*pdf, *pdf, *pdf);
+ return make_spectrum(*pdf);
}
ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
float offset = bsdf->offset;
float3 Tg = bsdf->T;
float roughness1 = bsdf->roughness1;
float roughness2 = bsdf->roughness2;
+ *sampled_roughness = make_float2(roughness1, roughness2);
float Iz = dot(Tg, I);
float3 locy = normalize(I - Tg * Iz);
float3 locx = cross(locy, Tg);
@@ -194,17 +184,11 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
- // differentials - TODO: find a better approximation for the reflective bounce
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
- *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
-#endif
-
*pdf = fabsf(phi_pdf * theta_pdf);
if (M_PI_2_F - fabsf(theta_i) < 0.001f)
*pdf = 0.0f;
- *eval = make_float3(*pdf, *pdf, *pdf);
+ *eval = make_spectrum(*pdf);
return LABEL_REFLECT | LABEL_GLOSSY;
}
@@ -212,21 +196,19 @@ ccl_device int bsdf_hair_reflection_sample(ccl_private const ShaderClosure *sc,
ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness)
{
ccl_private const HairBsdf *bsdf = (ccl_private const HairBsdf *)sc;
float offset = bsdf->offset;
float3 Tg = bsdf->T;
float roughness1 = bsdf->roughness1;
float roughness2 = bsdf->roughness2;
+ *sampled_roughness = make_float2(roughness1, roughness2);
float Iz = dot(Tg, I);
float3 locy = normalize(I - Tg * Iz);
float3 locx = cross(locy, Tg);
@@ -255,18 +237,12 @@ ccl_device int bsdf_hair_transmission_sample(ccl_private const ShaderClosure *sc
fast_sincosf(phi, &sinphi, &cosphi);
*omega_in = (cosphi * costheta_i) * locy - (sinphi * costheta_i) * locx + (sintheta_i)*Tg;
- // differentials - TODO: find a better approximation for the transmission bounce
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = 2 * dot(locy, dIdx) * locy - dIdx;
- *domega_in_dy = 2 * dot(locy, dIdy) * locy - dIdy;
-#endif
-
*pdf = fabsf(phi_pdf * theta_pdf);
if (M_PI_2_F - fabsf(theta_i) < 0.001f) {
*pdf = 0.0f;
}
- *eval = make_float3(*pdf, *pdf, *pdf);
+ *eval = make_spectrum(*pdf);
/* TODO(sergey): Should always be negative, but seems some precision issue
* is involved here.
diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h
index 2cdf6c9f349..f5651d82aae 100644
--- a/intern/cycles/kernel/closure/bsdf_hair_principled.h
+++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h
@@ -3,10 +3,11 @@
#pragma once
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# include <fenv.h>
#endif
+#include "kernel/sample/lcg.h"
#include "kernel/util/color.h"
CCL_NAMESPACE_BEGIN
@@ -20,7 +21,7 @@ typedef struct PrincipledHairBSDF {
SHADER_CLOSURE_BASE;
/* Absorption coefficient. */
- float3 sigma;
+ Spectrum sigma;
/* Variance of the underlying logistic distribution. */
float v;
/* Scale factor of the underlying logistic distribution. */
@@ -56,13 +57,7 @@ ccl_device_inline float delta_phi(int p, float gamma_o, float gamma_t)
/* Remaps the given angle to [-pi, pi]. */
ccl_device_inline float wrap_angle(float a)
{
- while (a > M_PI_F) {
- a -= M_2PI_F;
- }
- while (a < -M_PI_F) {
- a += M_2PI_F;
- }
- return a;
+ return (a + M_PI_F) - M_2PI_F * floorf((a + M_PI_F) / M_2PI_F) - M_PI_F;
}
/* Logistic distribution function. */
@@ -166,12 +161,6 @@ ccl_device_inline float longitudinal_scattering(
}
}
-/* Combine the three values using their luminances. */
-ccl_device_inline float4 combine_with_energy(KernelGlobals kg, float3 c)
-{
- return make_float4(c.x, c.y, c.z, linear_rgb_to_gray(kg, c));
-}
-
#ifdef __HAIR__
/* Set up the hair closure. */
ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
@@ -214,34 +203,36 @@ ccl_device int bsdf_principled_hair_setup(ccl_private ShaderData *sd,
#endif /* __HAIR__ */
/* Given the Fresnel term and transmittance, generate the attenuation terms for each bounce. */
-ccl_device_inline void hair_attenuation(KernelGlobals kg,
- float f,
- float3 T,
- ccl_private float4 *Ap)
+ccl_device_inline void hair_attenuation(
+ KernelGlobals kg, float f, Spectrum T, ccl_private Spectrum *Ap, ccl_private float *Ap_energy)
{
/* Primary specular (R). */
- Ap[0] = make_float4(f, f, f, f);
+ Ap[0] = make_spectrum(f);
+ Ap_energy[0] = f;
/* Transmission (TT). */
- float3 col = sqr(1.0f - f) * T;
- Ap[1] = combine_with_energy(kg, col);
+ Spectrum col = sqr(1.0f - f) * T;
+ Ap[1] = col;
+ Ap_energy[1] = spectrum_to_gray(kg, col);
/* Secondary specular (TRT). */
col *= T * f;
- Ap[2] = combine_with_energy(kg, col);
+ Ap[2] = col;
+ Ap_energy[2] = spectrum_to_gray(kg, col);
/* Residual component (TRRT+). */
- col *= safe_divide_color(T * f, make_float3(1.0f, 1.0f, 1.0f) - T * f);
- Ap[3] = combine_with_energy(kg, col);
+ col *= safe_divide(T * f, one_spectrum() - T * f);
+ Ap[3] = col;
+ Ap_energy[3] = spectrum_to_gray(kg, col);
/* Normalize sampling weights. */
- float totweight = Ap[0].w + Ap[1].w + Ap[2].w + Ap[3].w;
+ float totweight = Ap_energy[0] + Ap_energy[1] + Ap_energy[2] + Ap_energy[3];
float fac = safe_divide(1.0f, totweight);
- Ap[0].w *= fac;
- Ap[1].w *= fac;
- Ap[2].w *= fac;
- Ap[3].w *= fac;
+ Ap_energy[0] *= fac;
+ Ap_energy[1] *= fac;
+ Ap_energy[2] *= fac;
+ Ap_energy[3] *= fac;
}
/* Given the tilt angle, generate the rotated theta_i for the different bounces. */
@@ -266,81 +257,84 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
}
/* Evaluation function for our shader. */
-ccl_device float3 bsdf_principled_hair_eval(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- ccl_private const ShaderClosure *sc,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_principled_hair_eval(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderClosure *sc,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
kernel_assert(isfinite_safe(sd->P) && isfinite_safe(sd->ray_length));
ccl_private const PrincipledHairBSDF *bsdf = (ccl_private const PrincipledHairBSDF *)sc;
- float3 Y = float4_to_float3(bsdf->extra->geom);
+ const float3 Y = float4_to_float3(bsdf->extra->geom);
- float3 X = safe_normalize(sd->dPdu);
+ const float3 X = safe_normalize(sd->dPdu);
kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
- float3 Z = safe_normalize(cross(X, Y));
+ const float3 Z = safe_normalize(cross(X, Y));
- float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
- float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
+ const float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
+ const float3 wi = make_float3(dot(omega_in, X), dot(omega_in, Y), dot(omega_in, Z));
- float sin_theta_o = wo.x;
- float cos_theta_o = cos_from_sin(sin_theta_o);
- float phi_o = atan2f(wo.z, wo.y);
+ const float sin_theta_o = wo.x;
+ const float cos_theta_o = cos_from_sin(sin_theta_o);
+ const float phi_o = atan2f(wo.z, wo.y);
- float sin_theta_t = sin_theta_o / bsdf->eta;
- float cos_theta_t = cos_from_sin(sin_theta_t);
+ const float sin_theta_t = sin_theta_o / bsdf->eta;
+ const float cos_theta_t = cos_from_sin(sin_theta_t);
- float sin_gamma_o = bsdf->extra->geom.w;
- float cos_gamma_o = cos_from_sin(sin_gamma_o);
- float gamma_o = safe_asinf(sin_gamma_o);
+ const float sin_gamma_o = bsdf->extra->geom.w;
+ const float cos_gamma_o = cos_from_sin(sin_gamma_o);
+ const float gamma_o = safe_asinf(sin_gamma_o);
- float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
- float cos_gamma_t = cos_from_sin(sin_gamma_t);
- float gamma_t = safe_asinf(sin_gamma_t);
+ const float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
+ const float cos_gamma_t = cos_from_sin(sin_gamma_t);
+ const float gamma_t = safe_asinf(sin_gamma_t);
- float3 T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
- float4 Ap[4];
- hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
+ const Spectrum T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
+ Spectrum Ap[4];
+ float Ap_energy[4];
+ hair_attenuation(
+ kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap, Ap_energy);
- float sin_theta_i = wi.x;
- float cos_theta_i = cos_from_sin(sin_theta_i);
- float phi_i = atan2f(wi.z, wi.y);
+ const float sin_theta_i = wi.x;
+ const float cos_theta_i = cos_from_sin(sin_theta_i);
+ const float phi_i = atan2f(wi.z, wi.y);
- float phi = phi_i - phi_o;
+ const float phi = phi_i - phi_o;
float angles[6];
hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
- float4 F;
- float Mp, Np;
-
- /* Primary specular (R). */
- Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
- Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
- F = Ap[0] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
-
- /* Transmission (TT). */
- Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
- Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
- F += Ap[1] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
-
- /* Secondary specular (TRT). */
- Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
- Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
- F += Ap[2] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
+ Spectrum F = zero_spectrum();
+ float F_energy = 0.0f;
+
+ /* Primary specular (R), Transmission (TT) and Secondary Specular (TRT). */
+ for (int i = 0; i < 3; i++) {
+ const float Mp = longitudinal_scattering(angles[2 * i],
+ angles[2 * i + 1],
+ sin_theta_o,
+ cos_theta_o,
+ (i == 0) ? bsdf->m0_roughness :
+ (i == 1) ? 0.25f * bsdf->v :
+ 4.0f * bsdf->v);
+ const float Np = azimuthal_scattering(phi, i, bsdf->s, gamma_o, gamma_t);
+ F += Ap[i] * Mp * Np;
+ F_energy += Ap_energy[i] * Mp * Np;
+ kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy));
+ }
/* Residual component (TRRT+). */
- Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
- Np = M_1_2PI_F;
- F += Ap[3] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
+ {
+ const float Mp = longitudinal_scattering(
+ sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+ const float Np = M_1_2PI_F;
+ F += Ap[3] * Mp * Np;
+ F_energy += Ap_energy[3] * Mp * Np;
+ kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy));
+ }
- *pdf = F.w;
- return float4_to_float3(F);
+ *pdf = F_energy;
+ return F;
}
/* Sampling function for the hair shader. */
@@ -349,52 +343,57 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
ccl_private ShaderData *sd,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness,
+ ccl_private float *eta)
{
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
- float3 Y = float4_to_float3(bsdf->extra->geom);
+ *sampled_roughness = make_float2(bsdf->m0_roughness, bsdf->m0_roughness);
+ *eta = bsdf->eta;
- float3 X = safe_normalize(sd->dPdu);
+ const float3 Y = float4_to_float3(bsdf->extra->geom);
+
+ const float3 X = safe_normalize(sd->dPdu);
kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
- float3 Z = safe_normalize(cross(X, Y));
+ const float3 Z = safe_normalize(cross(X, Y));
- float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
+ const float3 wo = make_float3(dot(sd->I, X), dot(sd->I, Y), dot(sd->I, Z));
float2 u[2];
u[0] = make_float2(randu, randv);
u[1].x = lcg_step_float(&sd->lcg_state);
u[1].y = lcg_step_float(&sd->lcg_state);
- float sin_theta_o = wo.x;
- float cos_theta_o = cos_from_sin(sin_theta_o);
- float phi_o = atan2f(wo.z, wo.y);
+ const float sin_theta_o = wo.x;
+ const float cos_theta_o = cos_from_sin(sin_theta_o);
+ const float phi_o = atan2f(wo.z, wo.y);
- float sin_theta_t = sin_theta_o / bsdf->eta;
- float cos_theta_t = cos_from_sin(sin_theta_t);
+ const float sin_theta_t = sin_theta_o / bsdf->eta;
+ const float cos_theta_t = cos_from_sin(sin_theta_t);
- float sin_gamma_o = bsdf->extra->geom.w;
- float cos_gamma_o = cos_from_sin(sin_gamma_o);
- float gamma_o = safe_asinf(sin_gamma_o);
+ const float sin_gamma_o = bsdf->extra->geom.w;
+ const float cos_gamma_o = cos_from_sin(sin_gamma_o);
+ const float gamma_o = safe_asinf(sin_gamma_o);
- float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
- float cos_gamma_t = cos_from_sin(sin_gamma_t);
- float gamma_t = safe_asinf(sin_gamma_t);
+ const float sin_gamma_t = sin_gamma_o * cos_theta_o / sqrtf(sqr(bsdf->eta) - sqr(sin_theta_o));
+ const float cos_gamma_t = cos_from_sin(sin_gamma_t);
+ const float gamma_t = safe_asinf(sin_gamma_t);
- float3 T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
- float4 Ap[4];
- hair_attenuation(kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap);
+ const Spectrum T = exp(-bsdf->sigma * (2.0f * cos_gamma_t / cos_theta_t));
+ Spectrum Ap[4];
+ float Ap_energy[4];
+ hair_attenuation(
+ kg, fresnel_dielectric_cos(cos_theta_o * cos_gamma_o, bsdf->eta), T, Ap, Ap_energy);
int p = 0;
for (; p < 3; p++) {
- if (u[0].x < Ap[p].w) {
+ if (u[0].x < Ap_energy[p]) {
break;
}
- u[0].x -= Ap[p].w;
+ u[0].x -= Ap_energy[p];
}
float v = bsdf->v;
@@ -406,7 +405,7 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
}
u[1].x = max(u[1].x, 1e-5f);
- float fac = 1.0f + v * logf(u[1].x + (1.0f - u[1].x) * expf(-2.0f / v));
+ const float fac = 1.0f + v * logf(u[1].x + (1.0f - u[1].x) * expf(-2.0f / v));
float sin_theta_i = -fac * sin_theta_o +
cos_from_sin(fac) * cosf(M_2PI_F * u[1].y) * cos_theta_o;
float cos_theta_i = cos_from_sin(sin_theta_i);
@@ -425,48 +424,43 @@ ccl_device int bsdf_principled_hair_sample(KernelGlobals kg,
else {
phi = M_2PI_F * u[0].y;
}
- float phi_i = phi_o + phi;
+ const float phi_i = phi_o + phi;
hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
- float4 F;
- float Mp, Np;
-
- /* Primary specular (R). */
- Mp = longitudinal_scattering(angles[0], angles[1], sin_theta_o, cos_theta_o, bsdf->m0_roughness);
- Np = azimuthal_scattering(phi, 0, bsdf->s, gamma_o, gamma_t);
- F = Ap[0] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
-
- /* Transmission (TT). */
- Mp = longitudinal_scattering(angles[2], angles[3], sin_theta_o, cos_theta_o, 0.25f * bsdf->v);
- Np = azimuthal_scattering(phi, 1, bsdf->s, gamma_o, gamma_t);
- F += Ap[1] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
-
- /* Secondary specular (TRT). */
- Mp = longitudinal_scattering(angles[4], angles[5], sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
- Np = azimuthal_scattering(phi, 2, bsdf->s, gamma_o, gamma_t);
- F += Ap[2] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
+ Spectrum F = zero_spectrum();
+ float F_energy = 0.0f;
+
+ /* Primary specular (R), Transmission (TT) and Secondary Specular (TRT). */
+ for (int i = 0; i < 3; i++) {
+ const float Mp = longitudinal_scattering(angles[2 * i],
+ angles[2 * i + 1],
+ sin_theta_o,
+ cos_theta_o,
+ (i == 0) ? bsdf->m0_roughness :
+ (i == 1) ? 0.25f * bsdf->v :
+ 4.0f * bsdf->v);
+ const float Np = azimuthal_scattering(phi, i, bsdf->s, gamma_o, gamma_t);
+ F += Ap[i] * Mp * Np;
+ F_energy += Ap_energy[i] * Mp * Np;
+ kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy));
+ }
/* Residual component (TRRT+). */
- Mp = longitudinal_scattering(sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
- Np = M_1_2PI_F;
- F += Ap[3] * Mp * Np;
- kernel_assert(isfinite_safe(float4_to_float3(F)));
+ {
+ const float Mp = longitudinal_scattering(
+ sin_theta_i, cos_theta_i, sin_theta_o, cos_theta_o, 4.0f * bsdf->v);
+ const float Np = M_1_2PI_F;
+ F += Ap[3] * Mp * Np;
+ F_energy += Ap_energy[3] * Mp * Np;
+ kernel_assert(isfinite_safe(F) && isfinite_safe(F_energy));
+ }
- *eval = float4_to_float3(F);
- *pdf = F.w;
+ *eval = F;
+ *pdf = F_energy;
*omega_in = X * sin_theta_i + Y * cos_theta_i * cosf(phi_i) + Z * cos_theta_i * sinf(phi_i);
-#ifdef __RAY_DIFFERENTIALS__
- float3 N = safe_normalize(sd->I + *omega_in);
- *domega_in_dx = (2 * dot(N, sd->dI.dx)) * N - sd->dI.dx;
- *domega_in_dy = (2 * dot(N, sd->dI.dy)) * N - sd->dI.dy;
-#endif
-
return LABEL_GLOSSY | ((p == 0) ? LABEL_REFLECT : LABEL_TRANSMIT);
}
@@ -489,25 +483,28 @@ ccl_device_inline float bsdf_principled_hair_albedo_roughness_scale(
return (((((0.245f * x) + 5.574f) * x - 10.73f) * x + 2.532f) * x - 0.215f) * x + 5.969f;
}
-ccl_device float3 bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
+ccl_device Spectrum bsdf_principled_hair_albedo(ccl_private const ShaderClosure *sc)
{
ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)sc;
return exp(-sqrt(bsdf->sigma) * bsdf_principled_hair_albedo_roughness_scale(bsdf->v));
}
-ccl_device_inline float3
-bsdf_principled_hair_sigma_from_reflectance(const float3 color, const float azimuthal_roughness)
+ccl_device_inline Spectrum
+bsdf_principled_hair_sigma_from_reflectance(const Spectrum color, const float azimuthal_roughness)
{
- const float3 sigma = log(color) /
- bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness);
+ const Spectrum sigma = log(color) /
+ bsdf_principled_hair_albedo_roughness_scale(azimuthal_roughness);
return sigma * sigma;
}
-ccl_device_inline float3 bsdf_principled_hair_sigma_from_concentration(const float eumelanin,
- const float pheomelanin)
+ccl_device_inline Spectrum bsdf_principled_hair_sigma_from_concentration(const float eumelanin,
+ const float pheomelanin)
{
- return eumelanin * make_float3(0.506f, 0.841f, 1.653f) +
- pheomelanin * make_float3(0.343f, 0.733f, 1.924f);
+ const float3 eumelanin_color = make_float3(0.506f, 0.841f, 1.653f);
+ const float3 pheomelanin_color = make_float3(0.343f, 0.733f, 1.924f);
+
+ return eumelanin * rgb_to_spectrum(eumelanin_color) +
+ pheomelanin * rgb_to_spectrum(pheomelanin_color);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 4fe36b5f235..e0b513b69f1 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -13,14 +13,14 @@
CCL_NAMESPACE_BEGIN
typedef struct MicrofacetExtra {
- float3 color, cspec0;
- float3 fresnel_color;
+ Spectrum color, cspec0;
+ Spectrum fresnel_color;
} MicrofacetExtra;
typedef struct MicrofacetExtrav2 {
/* Metallic fresnel control */
- float3 metal_base, metal_edge_factor;
- float3 metallic;
+ Spectrum metal_base, metal_edge_factor;
+ Spectrum metallic;
float dielectric;
} MicrofacetExtrav2;
@@ -46,19 +46,19 @@ static_assert(sizeof(ShaderClosure) >= sizeof(MicrofacetBsdf), "MicrofacetBsdf i
*
* Else it is simply white
*/
-ccl_device_forceinline float3 reflection_color(ccl_private const MicrofacetBsdf *bsdf,
- float3 L,
- float3 H)
+ccl_device_forceinline Spectrum reflection_color(ccl_private const MicrofacetBsdf *bsdf,
+ float3 L,
+ float3 H)
{
if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_ID) {
return interpolate_fresnel_color(L, H, bsdf->ior, bsdf->extra->cspec0);
}
else if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_ID) {
- return interpolate_fresnel_color(L, H, bsdf->ior, make_float3(0.04f, 0.04f, 0.04f));
+ return interpolate_fresnel_color(L, H, bsdf->ior, make_spectrum(0.04f));
}
else if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_CLEARCOAT_V2_ID) {
float f = fresnel_dielectric_cos(dot(H, L), bsdf->ior);
- return make_float3(f, f, f);
+ return make_spectrum(f);
}
else if (bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_FRESNEL_V2_ID) {
MicrofacetExtrav2 *extra = (MicrofacetExtrav2 *)bsdf->extra;
@@ -66,22 +66,22 @@ ccl_device_forceinline float3 reflection_color(ccl_private const MicrofacetBsdf
/* Metallic Fresnel: Kinda Schlick-Fresnel-like with configurable F0 and F90
* as well as falloff control. F90=white and falloff=0.2 gives classic Schlick Fresnel.
* Metallic factor and albedo scaling is baked into the F0 and F90 parameters. */
- float3 metallic = extra->metallic *
- fresnel_metallic(extra->metal_base, extra->metal_edge_factor, cosHL);
+ Spectrum metallic = extra->metallic *
+ fresnel_metallic(extra->metal_base, extra->metal_edge_factor, cosHL);
/* Dielectric Fresnel, just basic IOR control. */
float dielectric = extra->dielectric * fresnel_dielectric_cos(cosHL, bsdf->ior);
- return metallic + make_float3(dielectric, dielectric, dielectric);
+ return metallic + make_spectrum(dielectric);
}
else {
- return one_float3();
+ return one_spectrum();
}
}
ccl_device_forceinline void bsdf_microfacet_fresnel_color(ccl_private const ShaderData *sd,
ccl_private MicrofacetBsdf *bsdf)
{
- float3 average_fresnel = reflection_color(bsdf, sd->I, bsdf->N);
+ Spectrum average_fresnel = reflection_color(bsdf, sd->I, bsdf->N);
bsdf->sample_weight *= average(average_fresnel);
if (bsdf->extra) {
@@ -89,10 +89,10 @@ ccl_device_forceinline void bsdf_microfacet_fresnel_color(ccl_private const Shad
}
}
-ccl_device_inline float3 microfacet_ggx_albedo_scaling(KernelGlobals kg,
- ccl_private const MicrofacetBsdf *bsdf,
- ccl_private const ShaderData *sd,
- const float3 Fss)
+ccl_device_inline Spectrum microfacet_ggx_albedo_scaling(KernelGlobals kg,
+ ccl_private const MicrofacetBsdf *bsdf,
+ ccl_private const ShaderData *sd,
+ const Spectrum Fss)
{
float mu = dot(sd->I, bsdf->N);
float rough = sqrtf(sqrtf(bsdf->alpha_x * bsdf->alpha_y));
@@ -103,9 +103,9 @@ ccl_device_inline float3 microfacet_ggx_albedo_scaling(KernelGlobals kg,
* https://blog.selfshadow.com/publications/s2017-shading-course/imageworks/s2017_pbs_imageworks_slides_v2.pdf,
* with one Fss cancelled out since this is just a multiplier on top of
* the single-scattering BSDF, which already contains one bounce of Fresnel. */
- float3 Fms = Fss * E_avg / (one_float3() - Fss * (1.0f - E_avg));
+ Spectrum Fms = Fss * E_avg / (one_spectrum() - Fss * (1.0f - E_avg));
- return one_float3() + Fms * ((1.0f - E) / E);
+ return one_spectrum() + Fms * ((1.0f - E) / E);
/* TODO: Ensure that increase in weight does not mess up glossy color, albedo etc. passes */
}
@@ -134,18 +134,10 @@ ccl_device int bsdf_microfacet_ggx_setup(ccl_private MicrofacetBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-/* Required to maintain OSL interface. */
-ccl_device int bsdf_microfacet_ggx_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
-{
- bsdf->alpha_y = bsdf->alpha_x;
-
- return bsdf_microfacet_ggx_setup(bsdf);
-}
-
ccl_device int bsdf_microfacet_multi_ggx_setup(KernelGlobals kg,
ccl_private MicrofacetBsdf *bsdf,
ccl_private const ShaderData *sd,
- const float3 color)
+ const Spectrum color)
{
bsdf->weight *= microfacet_ggx_albedo_scaling(kg, bsdf, sd, saturate(color));
return bsdf_microfacet_ggx_setup(bsdf);
@@ -170,7 +162,7 @@ ccl_device int bsdf_microfacet_multi_ggx_fresnel_setup(KernelGlobals kg,
ccl_private MicrofacetBsdf *bsdf,
ccl_private const ShaderData *sd)
{
- float3 Fss = schlick_fresnel_Fss(bsdf->extra->cspec0);
+ Spectrum Fss = schlick_fresnel_Fss(bsdf->extra->cspec0);
bsdf->weight *= microfacet_ggx_albedo_scaling(kg, bsdf, sd, Fss);
return bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
}
@@ -187,13 +179,13 @@ ccl_device int bsdf_microfacet_ggx_fresnel_v2_setup(KernelGlobals kg,
MicrofacetExtrav2 *extra = (MicrofacetExtrav2 *)bsdf->extra;
if (metallic > 0.0f) {
- float3 metal_Fss = fresnel_metallic_Fss(extra->metal_base, extra->metal_edge_factor);
+ Spectrum metal_Fss = fresnel_metallic_Fss(extra->metal_base, extra->metal_edge_factor);
extra->metallic = metallic * microfacet_ggx_albedo_scaling(kg, bsdf, sd, metal_Fss);
}
else {
- extra->metallic = zero_float3();
- extra->metal_base = zero_float3();
- extra->metal_edge_factor = zero_float3();
+ extra->metallic = zero_spectrum();
+ extra->metal_base = zero_spectrum();
+ extra->metal_edge_factor = zero_spectrum();
}
if (dielectric > 0.0f) {
@@ -262,36 +254,26 @@ ccl_device void bsdf_microfacet_ggx_blur(ccl_private ShaderClosure *sc, float ro
bsdf->alpha_y = fmaxf(roughness, bsdf->alpha_y);
}
-ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_microfacet_ggx_eval_reflect(ccl_private const MicrofacetBsdf *bsdf,
+ const float3 N,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf,
+ const float alpha_x,
+ const float alpha_y,
+ const float cosNO,
+ const float cosNI)
{
- ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
float alpha2 = alpha_x * alpha_y;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
- float3 N = bsdf->N;
-
- if (m_refractive || alpha2 <= 1e-7f) {
+ if (!(cosNI > 0 && cosNO > 0)) {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
- /* Warning: Cycles' naming is misleading here!
- * I is the incoming direction in a ray-tracing sense, but in the shading context,
- * it is actually the outgoing direction since it points towards the camera.
- * Therefore, in the BSDF code, I is referred to as O and omega_in is referred to as I
- * in order to be consistent with papers.
- */
-
/* Ensure that both direction are in the upper hemisphere */
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
if (cosNI <= 0 || cosNO <= 0) {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
/* Compute half vector */
@@ -350,40 +332,31 @@ ccl_device float3 bsdf_microfacet_ggx_eval_reflect(ccl_private const ShaderClosu
/* Evaluate BSDF */
float common = D * 0.25f / cosNO;
- float3 F = reflection_color(bsdf, omega_in, m);
- float3 out = F * common / (1 + lambdaO + lambdaI);
+ Spectrum F = reflection_color(bsdf, omega_in, m);
+ Spectrum out = F * common / (1 + lambdaO + lambdaI);
*pdf = common / (1 + lambdaO);
return out;
}
-ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_microfacet_ggx_eval_transmit(ccl_private const MicrofacetBsdf *bsdf,
+ const float3 N,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf,
+ const float alpha_x,
+ const float alpha_y,
+ const float cosNO,
+ const float cosNI)
{
- ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
float alpha2 = alpha_x * alpha_y;
- float m_eta = bsdf->ior;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
- float3 N = bsdf->N;
-
- if (!m_refractive || alpha2 <= 1e-7f) {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-
- /* Ensure that both directions are in the expected hemispheres. */
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
if (cosNO <= 0 || cosNI >= 0) {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
/* Compute half vector */
+ float m_eta = bsdf->ior;
float3 ht = -(m_eta * omega_in + I);
float3 m = normalize(ht);
float cosMO = dot(m, I);
@@ -400,27 +373,53 @@ ccl_device float3 bsdf_microfacet_ggx_eval_transmit(ccl_private const ShaderClos
float out = common / (1 + lambdaO + lambdaI);
*pdf = common / (1 + lambdaO);
- return make_float3(out, out, out);
+ return make_spectrum(out);
+}
+
+ccl_device Spectrum bsdf_microfacet_ggx_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
+{
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ const float alpha_x = bsdf->alpha_x;
+ const float alpha_y = bsdf->alpha_y;
+ const bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+ const float3 N = bsdf->N;
+ const float cosNO = dot(N, I);
+ const float cosNI = dot(N, omega_in);
+
+ if (((cosNI < 0.0f) != m_refractive) || alpha_x * alpha_y <= 1e-7f) {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
+
+ return (cosNI < 0.0f) ? bsdf_microfacet_ggx_eval_transmit(
+ bsdf, N, I, omega_in, pdf, alpha_x, alpha_y, cosNO, cosNI) :
+ bsdf_microfacet_ggx_eval_reflect(
+ bsdf, N, I, omega_in, pdf, alpha_x, alpha_y, cosNO, cosNI);
}
ccl_device int bsdf_microfacet_ggx_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness,
+ ccl_private float *eta)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
float alpha_x = bsdf->alpha_x;
float alpha_y = bsdf->alpha_y;
float alpha2 = alpha_x * alpha_y;
bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID;
+
+ *sampled_roughness = make_float2(alpha_x, alpha_y);
+ *eta = m_refractive ? 1.0f / bsdf->ior : bsdf->ior;
+
float3 N = bsdf->N;
/* Ensure that the view direction is in the upper hemisphere. */
@@ -455,11 +454,11 @@ ccl_device int bsdf_microfacet_ggx_sample(ccl_private const ShaderClosure *sc,
return LABEL_NONE;
}
- float3 F = reflection_color(bsdf, *omega_in, m);
+ Spectrum F = reflection_color(bsdf, *omega_in, m);
if (alpha2 <= 1e-7f) {
/* Specular case, just return some high number for MIS */
*pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f) * F;
+ *eval = make_spectrum(1e6f) * F;
return LABEL_REFLECT | LABEL_SINGULAR;
}
@@ -501,36 +500,15 @@ ccl_device int bsdf_microfacet_ggx_sample(ccl_private const ShaderClosure *sc,
*pdf = common / (1 + lambdaO);
*eval = common * F / (1 + lambdaO + lambdaI);
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
- *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
-#endif
-
return LABEL_REFLECT | LABEL_GLOSSY;
}
else {
/* Compute refracted direction */
float3 R, T;
-#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
-#endif
float m_eta = bsdf->ior, fresnel;
bool inside;
- fresnel = fresnel_dielectric(m_eta,
- m,
- I,
- &R,
- &T,
-#ifdef __RAY_DIFFERENTIALS__
- dIdx,
- dIdy,
- &dRdx,
- &dRdy,
- &dTdx,
- &dTdy,
-#endif
- &inside);
+ fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, &inside);
/* Ensure that the microfacet is nor masked and that we don't encounter TIR */
if (inside || fresnel == 1.0f) {
@@ -539,15 +517,11 @@ ccl_device int bsdf_microfacet_ggx_sample(ccl_private const ShaderClosure *sc,
}
*omega_in = T;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
-#endif
if (alpha2 <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
/* some high number for MIS */
*pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
+ *eval = make_spectrum(1e6f);
return LABEL_TRANSMIT | LABEL_SINGULAR;
}
@@ -565,7 +539,7 @@ ccl_device int bsdf_microfacet_ggx_sample(ccl_private const ShaderClosure *sc,
float out = common / (1 + lambdaO + lambdaI);
*pdf = common / (1 + lambdaO);
- *eval = make_float3(out, out, out);
+ *eval = make_spectrum(out);
return LABEL_TRANSMIT | LABEL_GLOSSY;
}
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_beckmann.h b/intern/cycles/kernel/closure/bsdf_microfacet_beckmann.h
index 2c15f467db9..4fabbcfc1eb 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_beckmann.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_beckmann.h
@@ -159,14 +159,6 @@ ccl_device int bsdf_microfacet_beckmann_setup(ccl_private MicrofacetBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-/* Required to maintain OSL interface. */
-ccl_device int bsdf_microfacet_beckmann_isotropic_setup(ccl_private MicrofacetBsdf *bsdf)
-{
- bsdf->alpha_y = bsdf->alpha_x;
-
- return bsdf_microfacet_beckmann_setup(bsdf);
-}
-
ccl_device int bsdf_microfacet_beckmann_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
{
bsdf->alpha_x = saturatef(bsdf->alpha_x);
@@ -215,110 +207,93 @@ ccl_device_inline float bsdf_beckmann_aniso_G1(
return ((2.181f * a + 3.535f) * a) / ((2.577f * a + 2.276f) * a + 1.0f);
}
-ccl_device float3 bsdf_microfacet_beckmann_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_microfacet_beckmann_eval_reflect(ccl_private const MicrofacetBsdf *bsdf,
+ const float3 N,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf,
+ const float alpha_x,
+ const float alpha_y,
+ const float cosNO,
+ const float cosNI)
{
- ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
- float3 N = bsdf->N;
-
- if (m_refractive || alpha_x * alpha_y <= 1e-7f) {
+ if (!(cosNO > 0 && cosNI > 0)) {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
-
- if (cosNO > 0 && cosNI > 0) {
- /* get half vector */
- float3 m = normalize(omega_in + I);
-
- float alpha2 = alpha_x * alpha_y;
- float D, G1o, G1i;
-
- if (alpha_x == alpha_y) {
- /* isotropic
- * eq. 20: (F*G*D)/(4*in*on)
- * eq. 25: first we calculate D(m) */
- float cosThetaM = dot(N, m);
- float cosThetaM2 = cosThetaM * cosThetaM;
- float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
- D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
-
- /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
- G1o = bsdf_beckmann_G1(alpha_x, cosNO);
- G1i = bsdf_beckmann_G1(alpha_x, cosNI);
- }
- else {
- /* anisotropic */
- float3 X, Y, Z = N;
- make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
+ /* get half vector */
+ float3 m = normalize(omega_in + I);
- /* distribution */
- float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
- float slope_x = -local_m.x / (local_m.z * alpha_x);
- float slope_y = -local_m.y / (local_m.z * alpha_y);
+ float alpha2 = alpha_x * alpha_y;
+ float D, G1o, G1i;
+
+ if (alpha_x == alpha_y) {
+ /* isotropic
+ * eq. 20: (F*G*D)/(4*in*on)
+ * eq. 25: first we calculate D(m) */
+ float cosThetaM = dot(N, m);
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float tanThetaM2 = (1 - cosThetaM2) / cosThetaM2;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ D = expf(-tanThetaM2 / alpha2) / (M_PI_F * alpha2 * cosThetaM4);
+
+ /* eq. 26, 27: now calculate G1(i,m) and G1(o,m) */
+ G1o = bsdf_beckmann_G1(alpha_x, cosNO);
+ G1i = bsdf_beckmann_G1(alpha_x, cosNI);
+ }
+ else {
+ /* anisotropic */
+ float3 X, Y, Z = N;
+ make_orthonormals_tangent(Z, bsdf->T, &X, &Y);
- float cosThetaM = local_m.z;
- float cosThetaM2 = cosThetaM * cosThetaM;
- float cosThetaM4 = cosThetaM2 * cosThetaM2;
+ /* distribution */
+ float3 local_m = make_float3(dot(X, m), dot(Y, m), dot(Z, m));
+ float slope_x = -local_m.x / (local_m.z * alpha_x);
+ float slope_y = -local_m.y / (local_m.z * alpha_y);
- D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4);
+ float cosThetaM = local_m.z;
+ float cosThetaM2 = cosThetaM * cosThetaM;
+ float cosThetaM4 = cosThetaM2 * cosThetaM2;
- /* G1(i,m) and G1(o,m) */
- G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y));
- G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y));
- }
+ D = expf(-slope_x * slope_x - slope_y * slope_y) / (M_PI_F * alpha2 * cosThetaM4);
- float G = G1o * G1i;
+ /* G1(i,m) and G1(o,m) */
+ G1o = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNO, dot(I, X), dot(I, Y));
+ G1i = bsdf_beckmann_aniso_G1(alpha_x, alpha_y, cosNI, dot(omega_in, X), dot(omega_in, Y));
+ }
- /* eq. 20 */
- float common = D * 0.25f / cosNO;
- float out = G * common;
+ float G = G1o * G1i;
- /* eq. 2 in distribution of visible normals sampling
- * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
+ /* eq. 20 */
+ float common = D * 0.25f / cosNO;
+ float out = G * common;
- /* eq. 38 - but see also:
- * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
- * pdf = pm * 0.25 / dot(m, I); */
- *pdf = G1o * common;
+ /* eq. 2 in distribution of visible normals sampling
+ * pm = Dw = G1o * dot(m, I) * D / dot(N, I); */
- return make_float3(out, out, out);
- }
+ /* eq. 38 - but see also:
+ * eq. 17 in http://www.graphics.cornell.edu/~bjw/wardnotes.pdf
+ * pdf = pm * 0.25 / dot(m, I); */
+ *pdf = G1o * common;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return make_spectrum(out);
}
-ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_microfacet_beckmann_eval_transmit(ccl_private const MicrofacetBsdf *bsdf,
+ const float3 N,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf,
+ const float alpha_x,
+ const float alpha_y,
+ const float cosNO,
+ const float cosNI)
{
- ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- float m_eta = bsdf->ior;
- bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
- float3 N = bsdf->N;
-
- if (!m_refractive || alpha_x * alpha_y <= 1e-7f) {
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
-
+ const float m_eta = bsdf->ior;
if (cosNO <= 0 || cosNI >= 0) {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
/* compute half-vector of the refraction (eq. 16) */
float3 ht = -(m_eta * omega_in + I);
@@ -351,22 +326,44 @@ ccl_device float3 bsdf_microfacet_beckmann_eval_transmit(ccl_private const Shade
float out = G * fabsf(cosHI * cosHO) * common;
*pdf = G1o * fabsf(cosHO * cosHI) * common;
- return make_float3(out, out, out);
+ return make_spectrum(out);
+}
+
+ccl_device Spectrum bsdf_microfacet_beckmann_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
+{
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ const float alpha_x = bsdf->alpha_x;
+ const float alpha_y = bsdf->alpha_y;
+ const bool m_refractive = bsdf->type == CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID;
+ const float3 N = bsdf->N;
+ const float cosNO = dot(N, I);
+ const float cosNI = dot(N, omega_in);
+
+ if (((cosNI < 0.0f) != m_refractive) || alpha_x * alpha_y <= 1e-7f) {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
+
+ return (cosNI < 0.0f) ? bsdf_microfacet_beckmann_eval_transmit(
+ bsdf, N, I, omega_in, pdf, alpha_x, alpha_y, cosNO, cosNI) :
+ bsdf_microfacet_beckmann_eval_reflect(
+ bsdf, N, I, omega_in, pdf, alpha_x, alpha_y, cosNO, cosNI);
}
ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness,
+ ccl_private float *eta)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
float alpha_x = bsdf->alpha_x;
@@ -375,6 +372,9 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
float3 N = bsdf->N;
int label;
+ *sampled_roughness = make_float2(alpha_x, alpha_y);
+ *eta = m_refractive ? 1.0f / bsdf->ior : bsdf->ior;
+
float cosNO = dot(N, I);
if (cosNO > 0) {
float3 X, Y, Z = N;
@@ -408,7 +408,7 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
if (alpha_x * alpha_y <= 1e-7f) {
/* some high number for MIS */
*pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
+ *eval = make_spectrum(1e6f);
label = LABEL_REFLECT | LABEL_SINGULAR;
}
else {
@@ -454,16 +454,11 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
float out = G * common;
*pdf = G1o * common;
- *eval = make_float3(out, out, out);
+ *eval = make_spectrum(out);
}
-
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(m, dIdx)) * m - dIdx;
- *domega_in_dy = (2 * dot(m, dIdy)) * m - dIdy;
-#endif
}
else {
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
*pdf = 0.0f;
}
}
@@ -474,39 +469,18 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
/* CAUTION: the i and o variables are inverted relative to the paper
* eq. 39 - compute actual refractive direction */
float3 R, T;
-#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
-#endif
float m_eta = bsdf->ior, fresnel;
bool inside;
- fresnel = fresnel_dielectric(m_eta,
- m,
- I,
- &R,
- &T,
-#ifdef __RAY_DIFFERENTIALS__
- dIdx,
- dIdy,
- &dRdx,
- &dRdy,
- &dTdx,
- &dTdy,
-#endif
- &inside);
+ fresnel = fresnel_dielectric(m_eta, m, I, &R, &T, &inside);
if (!inside && fresnel != 1.0f) {
*omega_in = T;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
-#endif
-
if (alpha_x * alpha_y <= 1e-7f || fabsf(m_eta - 1.0f) < 1e-4f) {
/* some high number for MIS */
*pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
+ *eval = make_spectrum(1e6f);
label = LABEL_TRANSMIT | LABEL_SINGULAR;
}
else {
@@ -535,11 +509,11 @@ ccl_device int bsdf_microfacet_beckmann_sample(KernelGlobals kg,
float out = G * fabsf(cosHI * cosHO) * common;
*pdf = G1o * cosHO * fabsf(cosHI) * common;
- *eval = make_float3(out, out, out);
+ *eval = make_spectrum(out);
}
}
else {
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
*pdf = 0.0f;
}
}
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_glass.h b/intern/cycles/kernel/closure/bsdf_microfacet_glass.h
index 121b9a2dd50..3c0ea32447b 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_glass.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_glass.h
@@ -9,11 +9,11 @@
CCL_NAMESPACE_BEGIN
-ccl_device_inline float3
+ccl_device_inline Spectrum
microfacet_ggx_glass_albedo_scaling(KernelGlobals kg,
ccl_private const ShaderData *sd,
ccl_private const MicrofacetBsdf *bsdf,
- const float3 Fss)
+ const Spectrum Fss)
{
float mu = dot(sd->I, bsdf->N);
float rough = sqrtf(sqrtf(bsdf->alpha_x * bsdf->alpha_y));
@@ -21,9 +21,9 @@ microfacet_ggx_glass_albedo_scaling(KernelGlobals kg,
/* Close enough for glass, coloring here is unphysical anyways and it's unclear how to
* approximate it better. */
- float3 Fms = Fss;
+ Spectrum Fms = Fss;
- return one_float3() + Fms * ((1.0f - E) / E);
+ return one_spectrum() + Fms * ((1.0f - E) / E);
/* TODO: Ensure that increase in weight does not mess up glossy color, albedo etc. passes */
}
@@ -33,7 +33,7 @@ microfacet_ggx_glass_albedo_scaling(KernelGlobals kg,
ccl_device int bsdf_microfacet_multi_ggx_glass_setup(KernelGlobals kg,
ccl_private MicrofacetBsdf *bsdf,
ccl_private const ShaderData *sd,
- const float3 color)
+ const Spectrum color)
{
bsdf->extra = NULL;
@@ -60,36 +60,28 @@ ccl_device int bsdf_microfacet_multi_ggx_glass_fresnel_setup(KernelGlobals kg,
bsdf_microfacet_fresnel_color(sd, bsdf);
- float3 Fss = schlick_fresnel_Fss(bsdf->extra->cspec0);
+ Spectrum Fss = schlick_fresnel_Fss(bsdf->extra->cspec0);
bsdf->weight *= microfacet_ggx_glass_albedo_scaling(kg, sd, bsdf, Fss);
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_microfacet_ggx_glass_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_microfacet_ggx_glass_eval_reflect(ccl_private const MicrofacetBsdf *bsdf,
+ const float3 N,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf,
+ const float alpha_x,
+ const float alpha_y,
+ const float cosNO,
+ const float cosNI)
{
- ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- float alpha2 = alpha_x * alpha_y;
-
- if (alpha2 <= 1e-7f) {
- *pdf = 0.0f;
- return zero_float3();
- }
-
- float3 N = bsdf->N;
-
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
if (cosNI <= 0 || cosNO <= 0) {
*pdf = 0.0f;
- return zero_float3();
+ return zero_spectrum();
}
+ float alpha2 = alpha_x * alpha_y;
float3 m = normalize(omega_in + I);
float D = microfacet_ggx_D(dot(N, m), alpha2);
float lambdaO = microfacet_ggx_lambda(cosNO, alpha2);
@@ -101,37 +93,30 @@ ccl_device float3 bsdf_microfacet_ggx_glass_eval_reflect(ccl_private const Shade
float out = F * common / (1 + lambdaO + lambdaI);
*pdf = common / (1 + lambdaO);
- float3 eval = make_float3(out, out, out);
+ Spectrum eval = make_spectrum(out);
if (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID) {
eval *= reflection_color(bsdf, omega_in, m);
}
return eval;
}
-ccl_device float3 bsdf_microfacet_ggx_glass_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_microfacet_ggx_glass_eval_transmit(ccl_private const MicrofacetBsdf *bsdf,
+ const float3 N,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf,
+ const float alpha_x,
+ const float alpha_y,
+ const float cosNO,
+ const float cosNI)
{
- ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
- float alpha_x = bsdf->alpha_x;
- float alpha_y = bsdf->alpha_y;
- float alpha2 = alpha_x * alpha_y;
- float eta = bsdf->ior;
- float3 N = bsdf->N;
- if (alpha2 <= 1e-7f) {
- *pdf = 0.0f;
- return zero_float3();
- }
-
- float cosNO = dot(N, I);
- float cosNI = dot(N, omega_in);
if (cosNO <= 0 || cosNI >= 0) {
*pdf = 0.0f;
- return zero_float3();
+ return zero_spectrum();
}
+ float eta = bsdf->ior;
float3 ht = -(eta * omega_in + I);
float3 m = normalize(ht);
float cosMO = dot(m, I);
@@ -141,9 +126,10 @@ ccl_device float3 bsdf_microfacet_ggx_glass_eval_transmit(ccl_private const Shad
if (F == 1.0f) {
/* TIR */
*pdf = 0.0f;
- return zero_float3();
+ return zero_spectrum();
}
+ float alpha2 = alpha_x * alpha_y;
float D = microfacet_ggx_D(dot(N, m), alpha2);
float lambdaO = microfacet_ggx_lambda(cosNO, alpha2);
float lambdaI = microfacet_ggx_lambda(cosNI, alpha2);
@@ -154,33 +140,56 @@ ccl_device float3 bsdf_microfacet_ggx_glass_eval_transmit(ccl_private const Shad
float out = (1.0f - F) * common / (1 + lambdaO + lambdaI);
*pdf = common / (1 + lambdaO);
- float3 eval = make_float3(out, out, out);
+ Spectrum eval = make_spectrum(out);
if (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID) {
eval *= bsdf->extra->color;
}
return eval;
}
+ccl_device Spectrum bsdf_microfacet_ggx_glass_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
+{
+ ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
+ const float alpha_x = bsdf->alpha_x;
+ const float alpha_y = bsdf->alpha_y;
+ const float3 N = bsdf->N;
+ const float cosNO = dot(N, I);
+ const float cosNI = dot(N, omega_in);
+
+ if (alpha_x * alpha_y <= 1e-7f) {
+ *pdf = 0.0f;
+ return zero_spectrum();
+ }
+
+ return (cosNI < 0.0f) ? bsdf_microfacet_ggx_glass_eval_transmit(
+ bsdf, N, I, omega_in, pdf, alpha_x, alpha_y, cosNO, cosNI) :
+ bsdf_microfacet_ggx_glass_eval_reflect(
+ bsdf, N, I, omega_in, pdf, alpha_x, alpha_y, cosNO, cosNI);
+}
+
ccl_device int bsdf_microfacet_ggx_glass_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness,
+ ccl_private float *eta)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
float alpha_x = bsdf->alpha_x;
float alpha_y = bsdf->alpha_y;
- float eta = bsdf->ior;
float3 N = bsdf->N;
int label;
+ *sampled_roughness = make_float2(alpha_x, alpha_y);
+ *eta = bsdf->ior; // TODO: Do we need to invert in case of refraction?
+
float cosNO = dot(N, I);
if (cosNO <= 0) {
*pdf = 0.0f;
@@ -205,24 +214,8 @@ ccl_device int bsdf_microfacet_ggx_glass_sample(ccl_private const ShaderClosure
}
float3 R, T;
-#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
-#endif
bool inside; /* Will never be inside, we already checked cosMO */
- float fresnel = fresnel_dielectric(eta,
- m,
- I,
- &R,
- &T,
-#ifdef __RAY_DIFFERENTIALS__
- dIdx,
- dIdy,
- &dRdx,
- &dRdy,
- &dTdx,
- &dTdy,
-#endif
- &inside);
+ float fresnel = fresnel_dielectric(bsdf->ior, m, I, &R, &T, &inside);
// TODO: Somehow get a properly stratified value here, this causes considerable noise
float randw = hash_float2_to_float(make_float2(randu, randv));
@@ -235,10 +228,6 @@ ccl_device int bsdf_microfacet_ggx_glass_sample(ccl_private const ShaderClosure
*eval = make_float3(1e6f, 1e6f, 1e6f);
*omega_in = do_reflect ? R : T;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = do_reflect ? dRdx : dTdx;
- *domega_in_dy = do_reflect ? dRdy : dTdy;
-#endif
if (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID) {
*eval *= do_reflect ? reflection_color(bsdf, *omega_in, m) : bsdf->extra->color;
@@ -262,11 +251,6 @@ ccl_device int bsdf_microfacet_ggx_glass_sample(ccl_private const ShaderClosure
label = LABEL_REFLECT | LABEL_GLOSSY;
*omega_in = R;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dRdx;
- *domega_in_dy = dRdy;
-#endif
-
common = fresnel * D * 0.25f / cosNO;
}
else {
@@ -278,21 +262,17 @@ ccl_device int bsdf_microfacet_ggx_glass_sample(ccl_private const ShaderClosure
label = LABEL_TRANSMIT | LABEL_GLOSSY;
*omega_in = T;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
-#endif
float cosMI = dot(m, *omega_in);
- float Ht2 = sqr(eta * cosMI + cosMO);
+ float Ht2 = sqr(bsdf->ior * cosMI + cosMO);
- common = (1.0f - fresnel) * D * fabsf(cosMI * cosMO) * sqr(eta) / (cosNO * Ht2);
+ common = (1.0f - fresnel) * D * fabsf(cosMI * cosMO) * sqr(bsdf->ior) / (cosNO * Ht2);
}
float lambdaI = microfacet_ggx_lambda(cosNI, alpha2);
float out = common / (1 + lambdaO + lambdaI);
*pdf = common / (1 + lambdaO);
- *eval = make_float3(out, out, out);
+ *eval = make_spectrum(out);
if (bsdf->type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_FRESNEL_ID) {
*eval *= do_reflect ? reflection_color(bsdf, *omega_in, m) : bsdf->extra->color;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_util.h b/intern/cycles/kernel/closure/bsdf_microfacet_util.h
index a5c77763133..b531c192e03 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_util.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_util.h
@@ -92,7 +92,7 @@ ccl_device_forceinline float microfacet_ggx_glass_E(KernelGlobals kg,
kernel_data.tables.ggx_glass_E_offset;
float x = mu, y = 1 - rough;
- float z = sqrtf(0.5f * ((inv_table? 1.0f / ior : ior) - 1.0f));
+ float z = sqrtf(0.5f * ((inv_table ? 1.0f / ior : ior) - 1.0f));
return lookup_table_read_3D(kg, x, y, z, offset, 16, 16, 16);
}
@@ -109,7 +109,7 @@ ccl_device_forceinline float microfacet_ggx_dielectric_E(KernelGlobals kg,
float F0 = fresnel_dielectric_cos(1.0f, ior);
float x = mix(mu, inverse_lerp(1.0f, F0, macro_fresnel), 0.5f);
float y = 1 - rough;
- float z = sqrtf(0.5f * ((inv_table? 1.0f / ior : ior) - 1.0f));
+ float z = sqrtf(0.5f * ((inv_table ? 1.0f / ior : ior) - 1.0f));
return lookup_table_read_3D(kg, x, y, z, offset, 16, 16, 16);
}
@@ -131,14 +131,14 @@ ccl_device_forceinline float clearcoat_E(KernelGlobals kg, float mu, float rough
return table * fresnel_dielectric_cos(mu, 1.5f);
}
-ccl_device_inline float3 fresnel_metallic_Fss(float3 F0, float3 B)
+ccl_device_inline Spectrum fresnel_metallic_Fss(Spectrum F0, Spectrum B)
{
- return saturate(mix(F0, one_float3(), 1.0f / 21.0f) - B * (1.0f / 126.0f));
+ return saturate(mix(F0, one_spectrum(), 1.0f / 21.0f) - B * (1.0f / 126.0f));
}
-ccl_device_inline float3 schlick_fresnel_Fss(float3 F0)
+ccl_device_inline Spectrum schlick_fresnel_Fss(Spectrum F0)
{
- return saturate(mix(F0, one_float3(), 1.0f / 21.0f));
+ return saturate(mix(F0, one_spectrum(), 1.0f / 21.0f));
}
/* TODO Imageworks source */
diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 56c7ec869c7..6912d5b3f18 100644
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@ -15,10 +15,10 @@ typedef struct OrenNayarBsdf {
static_assert(sizeof(ShaderClosure) >= sizeof(OrenNayarBsdf), "OrenNayarBsdf is too large!");
-ccl_device float3 bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure *sc,
- float3 n,
- float3 v,
- float3 l)
+ccl_device Spectrum bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure *sc,
+ float3 n,
+ float3 v,
+ float3 l)
{
ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
float nl = max(dot(n, l), 0.0f);
@@ -28,7 +28,7 @@ ccl_device float3 bsdf_oren_nayar_get_intensity(ccl_private const ShaderClosure
if (t > 0.0f)
t /= max(nl, nv) + FLT_MIN;
float is = nl * (bsdf->a + bsdf->b * t);
- return make_float3(is, is, is);
+ return make_spectrum(is);
}
ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf)
@@ -47,10 +47,10 @@ ccl_device int bsdf_oren_nayar_setup(ccl_private OrenNayarBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_oren_nayar_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_oren_nayar_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
if (dot(bsdf->N, omega_in) > 0.0f) {
@@ -59,30 +59,17 @@ ccl_device float3 bsdf_oren_nayar_eval_reflect(ccl_private const ShaderClosure *
}
else {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
}
-ccl_device float3 bsdf_oren_nayar_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const OrenNayarBsdf *bsdf = (ccl_private const OrenNayarBsdf *)sc;
@@ -90,16 +77,10 @@ ccl_device int bsdf_oren_nayar_sample(ccl_private const ShaderClosure *sc,
if (dot(Ng, *omega_in) > 0.0f) {
*eval = bsdf_oren_nayar_get_intensity(sc, bsdf->N, I, *omega_in);
-
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the bounce
- *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
-#endif
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
return LABEL_REFLECT | LABEL_DIFFUSE;
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 74a1f7ae090..04bc165af30 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -8,6 +8,8 @@
#pragma once
+#include "kernel/util/color.h"
+
CCL_NAMESPACE_BEGIN
#ifdef __OSL__
@@ -42,10 +44,10 @@ ccl_device int bsdf_phong_ramp_setup(ccl_private PhongRampBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_phong_ramp_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_phong_ramp_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
float m_exponent = bsdf->exponent;
@@ -61,48 +63,37 @@ ccl_device float3 bsdf_phong_ramp_eval_reflect(ccl_private const ShaderClosure *
float common = 0.5f * M_1_PI_F * cosp;
float out = cosNI * (m_exponent + 2) * common;
*pdf = (m_exponent + 1) * common;
- return bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
+ return rgb_to_spectrum(bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out);
}
}
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
-ccl_device float3 bsdf_phong_ramp_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device_inline float phong_ramp_exponent_to_roughness(float exponent)
{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return sqrt(1.0f / ((exponent + 2.0f) / 2.0f));
}
ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness)
{
ccl_private const PhongRampBsdf *bsdf = (ccl_private const PhongRampBsdf *)sc;
float cosNO = dot(bsdf->N, I);
float m_exponent = bsdf->exponent;
+ const float m_roughness = phong_ramp_exponent_to_roughness(m_exponent);
+ *sampled_roughness = make_float2(m_roughness, m_roughness);
if (cosNO > 0) {
// reflect the view vector
float3 R = (2 * cosNO) * bsdf->N - I;
-
-# ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
-# endif
-
float3 T, B;
make_orthonormals(R, &T, &B);
float phi = M_2PI_F * randu;
@@ -119,12 +110,12 @@ ccl_device int bsdf_phong_ramp_sample(ccl_private const ShaderClosure *sc,
float common = 0.5f * M_1_PI_F * cosp;
*pdf = (m_exponent + 1) * common;
float out = cosNI * (m_exponent + 2) * common;
- *eval = bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out;
+ *eval = rgb_to_spectrum(bsdf_phong_ramp_get_color(bsdf->colors, cosp) * out);
}
}
}
else {
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
*pdf = 0.0f;
}
return LABEL_REFLECT | LABEL_GLOSSY;
diff --git a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
index 5a7020e82d2..be8ee78fcac 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_diffuse.h
@@ -42,7 +42,7 @@ ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf *
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3
+ccl_device Spectrum
bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bsdf,
float3 N,
float3 V,
@@ -52,7 +52,7 @@ bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bs
const float NdotL = dot(N, L);
if (NdotL <= 0) {
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
const float NdotV = dot(N, V);
@@ -82,7 +82,7 @@ bsdf_principled_diffuse_compute_brdf(ccl_private const PrincipledDiffuseBsdf *bs
float value = M_1_PI_F * NdotL * f;
- return make_float3(value, value, value);
+ return make_spectrum(value);
}
/* Compute Fresnel at entry point, to be combined with #PRINCIPLED_DIFFUSE_LAMBERT_EXIT
@@ -109,47 +109,33 @@ ccl_device int bsdf_principled_diffuse_setup(ccl_private PrincipledDiffuseBsdf *
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_principled_diffuse_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_principled_diffuse_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
-
- float3 N = bsdf->N;
- float3 V = I; // outgoing
- float3 L = omega_in; // incoming
+ const float3 N = bsdf->N;
if (dot(N, omega_in) > 0.0f) {
+ const float3 V = I; // outgoing
+ const float3 L = omega_in; // incoming
*pdf = fmaxf(dot(N, omega_in), 0.0f) * M_1_PI_F;
return bsdf_principled_diffuse_compute_brdf(bsdf, N, V, L, pdf);
}
else {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
}
-ccl_device float3 bsdf_principled_diffuse_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)sc;
@@ -160,16 +146,10 @@ ccl_device int bsdf_principled_diffuse_sample(ccl_private const ShaderClosure *s
if (dot(Ng, *omega_in) > 0) {
*eval = bsdf_principled_diffuse_compute_brdf(bsdf, N, I, *omega_in, pdf);
-
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
- *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
-#endif
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
return LABEL_REFLECT | LABEL_DIFFUSE;
}
diff --git a/intern/cycles/kernel/closure/bsdf_principled_sheen.h b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
index 5f4677cba3d..949a6f8f567 100644
--- a/intern/cycles/kernel/closure/bsdf_principled_sheen.h
+++ b/intern/cycles/kernel/closure/bsdf_principled_sheen.h
@@ -33,7 +33,7 @@ ccl_device_inline float calculate_avg_principled_sheen_brdf(float3 N, float3 I)
return schlick_fresnel(NdotI) * NdotI;
}
-ccl_device float3
+ccl_device Spectrum
calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, ccl_private float *pdf)
{
float NdotL = dot(N, L);
@@ -41,14 +41,14 @@ calculate_principled_sheen_brdf(float3 N, float3 V, float3 L, float3 H, ccl_priv
if (NdotL < 0 || NdotV < 0) {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
float LdotH = dot(L, H);
float value = schlick_fresnel(LdotH) * NdotL;
- return make_float3(value, value, value);
+ return make_spectrum(value);
}
/* Based on
@@ -72,11 +72,13 @@ ccl_device_inline float sheen_v2_lambda(float mu, float w)
return expf(exponent);
}
-ccl_device_inline float3 sheen_v2_eval(float3 N, float3 V, float3 L, float3 H, float r, float *pdf)
+ccl_device_inline Spectrum
+sheen_v2_eval(float3 N, float3 V, float3 L, float3 H, float r, float *pdf)
{
float cosNH = dot(N, H), cosNV = dot(N, V), cosNL = dot(N, L);
if (cosNH < 0 || cosNV < 0 || cosNL < 0) {
+ *pdf = 0.0f;
return zero_float3();
}
@@ -99,7 +101,7 @@ ccl_device_inline float3 sheen_v2_eval(float3 N, float3 V, float3 L, float3 H, f
* term G=1/(1+lambdaV+lambdaL).
*/
float val = D / (4 * cosNV * (1 + lambdaV + lambdaL));
- return make_float3(val, val, val);
+ return make_spectrum(val);
}
ccl_device_forceinline float sheen_v2_E(KernelGlobals kg, float mu, float rough)
@@ -131,19 +133,19 @@ ccl_device int bsdf_principled_sheen_v2_setup(KernelGlobals kg,
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_principled_sheen_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_principled_sheen_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
-
- float3 N = bsdf->N;
- float3 V = I; // outgoing
- float3 L = omega_in; // incoming
- float3 H = normalize(L + V);
+ const float3 N = bsdf->N;
if (dot(N, omega_in) > 0.0f) {
+ const float3 V = I; // outgoing
+ const float3 L = omega_in; // incoming
+ const float3 H = normalize(L + V);
+
*pdf = M_1_2PI_F;
if (bsdf->type == CLOSURE_BSDF_PRINCIPLED_SHEEN_V2_ID) {
return sheen_v2_eval(N, V, L, H, bsdf->roughness, pdf);
@@ -154,30 +156,17 @@ ccl_device float3 bsdf_principled_sheen_eval_reflect(ccl_private const ShaderClo
}
else {
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
}
-ccl_device float3 bsdf_principled_sheen_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const PrincipledSheenBsdf *bsdf = (ccl_private const PrincipledSheenBsdf *)sc;
@@ -195,15 +184,9 @@ ccl_device int bsdf_principled_sheen_sample(ccl_private const ShaderClosure *sc,
else {
*eval = calculate_principled_sheen_brdf(N, I, *omega_in, H, pdf);
}
-
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the diffuse bounce
- *domega_in_dx = -((2 * dot(N, dIdx)) * N - dIdx);
- *domega_in_dy = -((2 * dot(N, dIdy)) * N - dIdy);
-#endif
}
else {
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
*pdf = 0.0f;
}
return LABEL_REFLECT | LABEL_DIFFUSE;
diff --git a/intern/cycles/kernel/closure/bsdf_reflection.h b/intern/cycles/kernel/closure/bsdf_reflection.h
index c8db2b7cf13..2f761974e9a 100644
--- a/intern/cycles/kernel/closure/bsdf_reflection.h
+++ b/intern/cycles/kernel/closure/bsdf_reflection.h
@@ -18,57 +18,42 @@ ccl_device int bsdf_reflection_setup(ccl_private MicrofacetBsdf *bsdf)
return SD_BSDF;
}
-ccl_device float3 bsdf_reflection_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_reflection_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device float3 bsdf_reflection_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
ccl_device int bsdf_reflection_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float *eta)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
float3 N = bsdf->N;
+ *eta = bsdf->ior;
// only one direction is possible
float cosNO = dot(N, I);
if (cosNO > 0) {
*omega_in = (2 * cosNO) * N - I;
if (dot(Ng, *omega_in) > 0) {
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = 2 * dot(N, dIdx) * N - dIdx;
- *domega_in_dy = 2 * dot(N, dIdy) * N - dIdy;
-#endif
/* Some high number for MIS. */
*pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
+ *eval = make_spectrum(1e6f);
}
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
return LABEL_REFLECT | LABEL_SINGULAR;
}
diff --git a/intern/cycles/kernel/closure/bsdf_refraction.h b/intern/cycles/kernel/closure/bsdf_refraction.h
index 862e774da87..e4f66245a0b 100644
--- a/intern/cycles/kernel/closure/bsdf_refraction.h
+++ b/intern/cycles/kernel/closure/bsdf_refraction.h
@@ -18,75 +18,45 @@ ccl_device int bsdf_refraction_setup(ccl_private MicrofacetBsdf *bsdf)
return SD_BSDF;
}
-ccl_device float3 bsdf_refraction_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_refraction_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device float3 bsdf_refraction_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
ccl_device int bsdf_refraction_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
- ccl_private float *pdf)
+ ccl_private float *pdf,
+ ccl_private float *eta)
{
ccl_private const MicrofacetBsdf *bsdf = (ccl_private const MicrofacetBsdf *)sc;
float m_eta = bsdf->ior;
+
+ *eta = 1.0f / m_eta;
float3 N = bsdf->N;
float3 R, T;
-#ifdef __RAY_DIFFERENTIALS__
- float3 dRdx, dRdy, dTdx, dTdy;
-#endif
bool inside;
float fresnel;
- fresnel = fresnel_dielectric(m_eta,
- N,
- I,
- &R,
- &T,
-#ifdef __RAY_DIFFERENTIALS__
- dIdx,
- dIdy,
- &dRdx,
- &dRdy,
- &dTdx,
- &dTdy,
-#endif
- &inside);
+ fresnel = fresnel_dielectric(m_eta, N, I, &R, &T, &inside);
if (!inside && fresnel != 1.0f) {
/* Some high number for MIS. */
*pdf = 1e6f;
- *eval = make_float3(1e6f, 1e6f, 1e6f);
+ *eval = make_spectrum(1e6f);
*omega_in = T;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = dTdx;
- *domega_in_dy = dTdy;
-#endif
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
return LABEL_TRANSMIT | LABEL_SINGULAR;
}
diff --git a/intern/cycles/kernel/closure/bsdf_toon.h b/intern/cycles/kernel/closure/bsdf_toon.h
index 0400fc61860..9f78c86b3b7 100644
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@ -30,7 +30,7 @@ ccl_device int bsdf_diffuse_toon_setup(ccl_private ToonBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float angle)
+ccl_device float bsdf_toon_get_intensity(float max_angle, float smooth, float angle)
{
float is;
@@ -41,7 +41,7 @@ ccl_device float3 bsdf_toon_get_intensity(float max_angle, float smooth, float a
else
is = 0.0f;
- return make_float3(is, is, is);
+ return is;
}
ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
@@ -49,48 +49,40 @@ ccl_device float bsdf_toon_get_sample_angle(float max_angle, float smooth)
return fminf(max_angle + smooth, M_PI_2_F);
}
-ccl_device float3 bsdf_diffuse_toon_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_diffuse_toon_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
- float max_angle = bsdf->size * M_PI_2_F;
- float smooth = bsdf->smooth * M_PI_2_F;
- float angle = safe_acosf(fmaxf(dot(bsdf->N, omega_in), 0.0f));
+ float cosNI = dot(bsdf->N, omega_in);
- float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
+ if (cosNI >= 0.0f) {
+ float max_angle = bsdf->size * M_PI_2_F;
+ float smooth = bsdf->smooth * M_PI_2_F;
+ float angle = safe_acosf(fmaxf(cosNI, 0.0f));
- if (eval.x > 0.0f) {
- float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+ float eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
- *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
- return *pdf * eval;
+ if (eval > 0.0f) {
+ float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
+
+ *pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
+ return make_spectrum(*pdf * eval);
+ }
}
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-ccl_device float3 bsdf_diffuse_toon_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
@@ -103,21 +95,15 @@ ccl_device int bsdf_diffuse_toon_sample(ccl_private const ShaderClosure *sc,
sample_uniform_cone(bsdf->N, sample_angle, randu, randv, omega_in, pdf);
if (dot(Ng, *omega_in) > 0.0f) {
- *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
-
-#ifdef __RAY_DIFFERENTIALS__
- // TODO: find a better approximation for the bounce
- *domega_in_dx = (2.0f * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2.0f * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
-#endif
+ *eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle));
}
else {
- *eval = make_float3(0.f, 0.f, 0.f);
+ *eval = zero_spectrum();
*pdf = 0.0f;
}
}
else {
- *eval = make_float3(0.f, 0.f, 0.f);
+ *eval = zero_spectrum();
*pdf = 0.0f;
}
@@ -135,10 +121,10 @@ ccl_device int bsdf_glossy_toon_setup(ccl_private ToonBsdf *bsdf)
return SD_BSDF | SD_BSDF_HAS_EVAL;
}
-ccl_device float3 bsdf_glossy_toon_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_glossy_toon_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
float max_angle = bsdf->size * M_PI_2_F;
@@ -153,36 +139,23 @@ ccl_device float3 bsdf_glossy_toon_eval_reflect(ccl_private const ShaderClosure
float angle = safe_acosf(fmaxf(cosRI, 0.0f));
- float3 eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
+ float eval = bsdf_toon_get_intensity(max_angle, smooth, angle);
float sample_angle = bsdf_toon_get_sample_angle(max_angle, smooth);
*pdf = 0.5f * M_1_PI_F / (1.0f - cosf(sample_angle));
- return *pdf * eval;
+ return make_spectrum(*pdf * eval);
}
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device float3 bsdf_glossy_toon_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
ccl_private const ToonBsdf *bsdf = (ccl_private const ToonBsdf *)sc;
@@ -204,21 +177,16 @@ ccl_device int bsdf_glossy_toon_sample(ccl_private const ShaderClosure *sc,
/* make sure the direction we chose is still in the right hemisphere */
if (cosNI > 0) {
- *eval = *pdf * bsdf_toon_get_intensity(max_angle, smooth, angle);
-
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = (2 * dot(bsdf->N, dIdx)) * bsdf->N - dIdx;
- *domega_in_dy = (2 * dot(bsdf->N, dIdy)) * bsdf->N - dIdy;
-#endif
+ *eval = make_spectrum(*pdf * bsdf_toon_get_intensity(max_angle, smooth, angle));
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
}
else {
*pdf = 0.0f;
- *eval = make_float3(0.0f, 0.0f, 0.0f);
+ *eval = zero_spectrum();
}
}
diff --git a/intern/cycles/kernel/closure/bsdf_transparent.h b/intern/cycles/kernel/closure/bsdf_transparent.h
index 636d9d664f2..9306e82b579 100644
--- a/intern/cycles/kernel/closure/bsdf_transparent.h
+++ b/intern/cycles/kernel/closure/bsdf_transparent.h
@@ -11,7 +11,7 @@
CCL_NAMESPACE_BEGIN
ccl_device void bsdf_transparent_setup(ccl_private ShaderData *sd,
- const float3 weight,
+ const Spectrum weight,
uint32_t path_flag)
{
/* Check cutoff weight. */
@@ -59,45 +59,28 @@ ccl_device void bsdf_transparent_setup(ccl_private ShaderData *sd,
}
}
-ccl_device float3 bsdf_transparent_eval_reflect(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum bsdf_transparent_eval(ccl_private const ShaderClosure *sc,
+ const float3 I,
+ const float3 omega_in,
+ ccl_private float *pdf)
{
*pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
-}
-
-ccl_device float3 bsdf_transparent_eval_transmit(ccl_private const ShaderClosure *sc,
- const float3 I,
- const float3 omega_in,
- ccl_private float *pdf)
-{
- *pdf = 0.0f;
- return make_float3(0.0f, 0.0f, 0.0f);
+ return zero_spectrum();
}
ccl_device int bsdf_transparent_sample(ccl_private const ShaderClosure *sc,
float3 Ng,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
// only one direction is possible
*omega_in = -I;
-#ifdef __RAY_DIFFERENTIALS__
- *domega_in_dx = -dIdx;
- *domega_in_dy = -dIdy;
-#endif
*pdf = 1;
- *eval = make_float3(1, 1, 1);
+ *eval = one_spectrum();
return LABEL_TRANSMIT | LABEL_TRANSPARENT;
}
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index 39ac4161c6e..13de4ac15b0 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -15,14 +15,6 @@ ccl_device float fresnel_dielectric(float eta,
const float3 I,
ccl_private float3 *R,
ccl_private float3 *T,
-#ifdef __RAY_DIFFERENTIALS__
- const float3 dIdx,
- const float3 dIdy,
- ccl_private float3 *dRdx,
- ccl_private float3 *dRdy,
- ccl_private float3 *dTdx,
- ccl_private float3 *dTdy,
-#endif
ccl_private bool *is_inside)
{
float cos = dot(N, I), neta;
@@ -45,28 +37,16 @@ ccl_device float fresnel_dielectric(float eta,
// compute reflection
*R = (2 * cos) * Nn - I;
-#ifdef __RAY_DIFFERENTIALS__
- *dRdx = (2 * dot(Nn, dIdx)) * Nn - dIdx;
- *dRdy = (2 * dot(Nn, dIdy)) * Nn - dIdy;
-#endif
float arg = 1 - (neta * neta * (1 - (cos * cos)));
if (arg < 0) {
*T = make_float3(0.0f, 0.0f, 0.0f);
-#ifdef __RAY_DIFFERENTIALS__
- *dTdx = make_float3(0.0f, 0.0f, 0.0f);
- *dTdy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
return 1; // total internal reflection
}
else {
float dnp = max(sqrtf(arg), 1e-7f);
float nK = (neta * cos) - dnp;
*T = -(neta * I) + (nK * Nn);
-#ifdef __RAY_DIFFERENTIALS__
- *dTdx = -(neta * dIdx) + ((neta - neta * neta * cos / dnp) * dot(dIdx, Nn)) * Nn;
- *dTdy = -(neta * dIdy) + ((neta - neta * neta * cos / dnp) * dot(dIdy, Nn)) * Nn;
-#endif
// compute Fresnel terms
float cosTheta1 = cos; // N.R
float cosTheta2 = -dot(Nn, *T);
@@ -91,14 +71,14 @@ ccl_device float fresnel_dielectric_cos(float cosi, float eta)
return 1.0f; // TIR(no refracted component)
}
-ccl_device float3 fresnel_conductor(float cosi, const float3 eta, const float3 k)
+ccl_device Spectrum fresnel_conductor(float cosi, const Spectrum eta, const Spectrum k)
{
- float3 cosi2 = make_float3(cosi * cosi, cosi * cosi, cosi * cosi);
- float3 one = make_float3(1.0f, 1.0f, 1.0f);
- float3 tmp_f = eta * eta + k * k;
- float3 tmp = tmp_f * cosi2;
- float3 Rparl2 = (tmp - (2.0f * eta * cosi) + one) / (tmp + (2.0f * eta * cosi) + one);
- float3 Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / (tmp_f + (2.0f * eta * cosi) + cosi2);
+ Spectrum cosi2 = make_spectrum(sqr(cosi));
+ Spectrum one = make_spectrum(1.0f);
+ Spectrum tmp_f = eta * eta + k * k;
+ Spectrum tmp = tmp_f * cosi2;
+ Spectrum Rparl2 = (tmp - (2.0f * eta * cosi) + one) / (tmp + (2.0f * eta * cosi) + one);
+ Spectrum Rperp2 = (tmp_f - (2.0f * eta * cosi) + cosi2) / (tmp_f + (2.0f * eta * cosi) + cosi2);
return (Rparl2 + Rperp2) * 0.5f;
}
@@ -116,27 +96,31 @@ ccl_device float schlick_fresnel(float u)
* Source:
* https://substance3d.adobe.com/documentation/s3d/files/225969599/225969601/1/1647019577092/Adobe+Standard+Material+-+Technical+Documentation.pdf
*/
-ccl_device float3 metallic_edge_factor(float3 F0, float3 F82)
+ccl_device Spectrum metallic_edge_factor(Spectrum F0, Spectrum F82)
{
- if (F82 == one_float3()) {
- return zero_float3();
+ if (F82 == one_spectrum()) {
+ return zero_spectrum();
}
- /* Precompute the B factor of the F82 model, which scales an additional term around cosI == 1/7. */
+ /* Precompute the B factor of the F82 model, which scales an additional term around cosI == 1/7.
+ */
const float f = 6.0f / 7.0f; /* 1 - cosI_max */
const float f5 = sqr(sqr(f)) * f;
- return (7.0f / (f5 * f)) * mix(F0, one_float3(), f5) * (one_float3() - F82);
+ return (7.0f / (f5 * f)) * mix(F0, one_spectrum(), f5) * (one_spectrum() - F82);
}
-ccl_device float3 fresnel_metallic(float3 F0, float3 B, float cosi)
+ccl_device Spectrum fresnel_metallic(Spectrum F0, Spectrum B, float cosi)
{
float s = saturatef(1.0f - cosi);
float s5 = sqr(sqr(s)) * s;
- return saturate(mix(F0, one_float3(), s5) - B * cosi * s5 * s);
+ return saturate(mix(F0, one_spectrum(), s5) - B * cosi * s5 * s);
}
/* Calculate the fresnel color which is a blend between white and the F0 color */
-ccl_device_forceinline float3 interpolate_fresnel_color(float3 L, float3 H, float ior, float3 F0)
+ccl_device_forceinline Spectrum interpolate_fresnel_color(Spectrum L,
+ Spectrum H,
+ float ior,
+ Spectrum F0)
{
/* Compute the real Fresnel term and remap it from real_F0...1 to F0...1.
* We could also just use actual Schlick fresnel (mix(F0, 1, (1-cosI)^5)) here. */
@@ -144,7 +128,7 @@ ccl_device_forceinline float3 interpolate_fresnel_color(float3 L, float3 H, floa
float F0_norm = 1.0f / (1.0f - real_F0);
float FH = (fresnel_dielectric_cos(dot(L, H), ior) - real_F0) * F0_norm;
- return mix(F0, one_float3(), FH);
+ return mix(F0, one_spectrum(), FH);
}
ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index b87790f5f8a..7131d9d8f38 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -8,8 +8,8 @@ CCL_NAMESPACE_BEGIN
typedef struct Bssrdf {
SHADER_CLOSURE_BASE;
- float3 radius;
- float3 albedo;
+ Spectrum radius;
+ Spectrum albedo;
float roughness;
float anisotropy;
} Bssrdf;
@@ -69,12 +69,13 @@ ccl_device void bssrdf_setup_radius(ccl_private Bssrdf *bssrdf,
const float fourthirdA = (4.0f / 3.0f) * (1.0f + F_dr) /
(1.0f - F_dr); /* From Jensen's `Fdr` ratio formula. */
- const float3 alpha_prime = make_float3(
- bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.x, fourthirdA),
- bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.y, fourthirdA),
- bssrdf_dipole_compute_alpha_prime(bssrdf->albedo.z, fourthirdA));
+ Spectrum alpha_prime;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ GET_SPECTRUM_CHANNEL(alpha_prime, i) = bssrdf_dipole_compute_alpha_prime(
+ GET_SPECTRUM_CHANNEL(bssrdf->albedo, i), fourthirdA);
+ }
- bssrdf->radius *= sqrt(3.0f * (one_float3() - alpha_prime));
+ bssrdf->radius *= sqrt(3.0f * (one_spectrum() - alpha_prime));
}
}
@@ -98,7 +99,7 @@ ccl_device_inline float bssrdf_burley_fitting(float A)
/* Scale mean free path length so it gives similar looking result
* to Cubic and Gaussian models. */
-ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
+ccl_device_inline Spectrum bssrdf_burley_compatible_mfp(Spectrum r)
{
return 0.25f * M_1_PI_F * r;
}
@@ -106,11 +107,13 @@ ccl_device_inline float3 bssrdf_burley_compatible_mfp(float3 r)
ccl_device void bssrdf_burley_setup(ccl_private Bssrdf *bssrdf)
{
/* Mean free path length. */
- const float3 l = bssrdf_burley_compatible_mfp(bssrdf->radius);
+ const Spectrum l = bssrdf_burley_compatible_mfp(bssrdf->radius);
/* Surface albedo. */
- const float3 A = bssrdf->albedo;
- const float3 s = make_float3(
- bssrdf_burley_fitting(A.x), bssrdf_burley_fitting(A.y), bssrdf_burley_fitting(A.z));
+ const Spectrum A = bssrdf->albedo;
+ Spectrum s;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ GET_SPECTRUM_CHANNEL(s, i) = bssrdf_burley_fitting(GET_SPECTRUM_CHANNEL(A, i));
+ }
bssrdf->radius = l / s;
}
@@ -198,22 +201,18 @@ ccl_device void bssrdf_burley_sample(const float d,
*h = safe_sqrtf(Rm * Rm - r_ * r_);
}
-ccl_device float bssrdf_num_channels(const float3 radius)
+ccl_device float bssrdf_num_channels(const Spectrum radius)
{
float channels = 0;
- if (radius.x > 0.0f) {
- channels += 1.0f;
- }
- if (radius.y > 0.0f) {
- channels += 1.0f;
- }
- if (radius.z > 0.0f) {
- channels += 1.0f;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ if (GET_SPECTRUM_CHANNEL(radius, i) > 0.0f) {
+ channels += 1.0f;
+ }
}
return channels;
}
-ccl_device void bssrdf_sample(const float3 radius,
+ccl_device void bssrdf_sample(const Spectrum radius,
float xi,
ccl_private float *r,
ccl_private float *h)
@@ -224,39 +223,44 @@ ccl_device void bssrdf_sample(const float3 radius,
/* Sample color channel and reuse random number. Only a subset of channels
* may be used if their radius was too small to handle as BSSRDF. */
xi *= num_channels;
-
- if (xi < 1.0f) {
- sampled_radius = (radius.x > 0.0f) ? radius.x : (radius.y > 0.0f) ? radius.y : radius.z;
- }
- else if (xi < 2.0f) {
- xi -= 1.0f;
- sampled_radius = (radius.x > 0.0f && radius.y > 0.0f) ? radius.y : radius.z;
- }
- else {
- xi -= 2.0f;
- sampled_radius = radius.z;
+ sampled_radius = 0.0f;
+
+ float sum = 0.0f;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ const float channel_radius = GET_SPECTRUM_CHANNEL(radius, i);
+ if (channel_radius > 0.0f) {
+ const float next_sum = sum + 1.0f;
+ if (xi < next_sum) {
+ xi -= sum;
+ sampled_radius = channel_radius;
+ break;
+ }
+ sum = next_sum;
+ }
}
/* Sample BSSRDF. */
bssrdf_burley_sample(sampled_radius, xi, r, h);
}
-ccl_device_forceinline float3 bssrdf_eval(const float3 radius, float r)
+ccl_device_forceinline Spectrum bssrdf_eval(const Spectrum radius, float r)
{
- return make_float3(bssrdf_burley_pdf(radius.x, r),
- bssrdf_burley_pdf(radius.y, r),
- bssrdf_burley_pdf(radius.z, r));
+ Spectrum result;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ GET_SPECTRUM_CHANNEL(result, i) = bssrdf_burley_pdf(GET_SPECTRUM_CHANNEL(radius, i), r);
+ }
+ return result;
}
-ccl_device_forceinline float bssrdf_pdf(const float3 radius, float r)
+ccl_device_forceinline float bssrdf_pdf(const Spectrum radius, float r)
{
- float3 pdf = bssrdf_eval(radius, r);
- return (pdf.x + pdf.y + pdf.z) / bssrdf_num_channels(radius);
+ Spectrum pdf = bssrdf_eval(radius, r);
+ return reduce_add(pdf) / bssrdf_num_channels(radius);
}
/* Setup */
-ccl_device_inline ccl_private Bssrdf *bssrdf_alloc(ccl_private ShaderData *sd, float3 weight)
+ccl_device_inline ccl_private Bssrdf *bssrdf_alloc(ccl_private ShaderData *sd, Spectrum weight)
{
ccl_private Bssrdf *bssrdf = (ccl_private Bssrdf *)closure_alloc(
sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
@@ -294,31 +298,20 @@ ccl_device int bssrdf_setup(ccl_private ShaderData *sd,
}
/* Verify if the radii are large enough to sample without precision issues. */
- int bssrdf_channels = 3;
- float3 diffuse_weight = make_float3(0.0f, 0.0f, 0.0f);
-
- if (bssrdf->radius.x < BSSRDF_MIN_RADIUS) {
- diffuse_weight.x = bssrdf->weight.x;
- bssrdf->weight.x = 0.0f;
- bssrdf->radius.x = 0.0f;
- bssrdf_channels--;
- }
- if (bssrdf->radius.y < BSSRDF_MIN_RADIUS) {
- diffuse_weight.y = bssrdf->weight.y;
- bssrdf->weight.y = 0.0f;
- bssrdf->radius.y = 0.0f;
- bssrdf_channels--;
- }
- if (bssrdf->radius.z < BSSRDF_MIN_RADIUS) {
- diffuse_weight.z = bssrdf->weight.z;
- bssrdf->weight.z = 0.0f;
- bssrdf->radius.z = 0.0f;
- bssrdf_channels--;
+ int bssrdf_channels = SPECTRUM_CHANNELS;
+ Spectrum diffuse_weight = zero_spectrum();
+
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ if (GET_SPECTRUM_CHANNEL(bssrdf->radius, i) < BSSRDF_MIN_RADIUS) {
+ GET_SPECTRUM_CHANNEL(diffuse_weight, i) = GET_SPECTRUM_CHANNEL(bssrdf->weight, i);
+ GET_SPECTRUM_CHANNEL(bssrdf->weight, i) = 0.0f;
+ GET_SPECTRUM_CHANNEL(bssrdf->radius, i) = 0.0f;
+ bssrdf_channels--;
+ }
}
- if (bssrdf_channels < 3) {
+ if (bssrdf_channels < SPECTRUM_CHANNELS) {
/* Add diffuse BSDF if any radius too small. */
-#ifdef __PRINCIPLED__
if (bssrdf->roughness != FLT_MAX) {
ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
sd, sizeof(PrincipledDiffuseBsdf), diffuse_weight);
@@ -329,9 +322,7 @@ ccl_device int bssrdf_setup(ccl_private ShaderData *sd,
flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT);
}
}
- else
-#endif /* __PRINCIPLED__ */
- {
+ else {
ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
sd, sizeof(DiffuseBsdf), diffuse_weight);
diff --git a/intern/cycles/kernel/closure/emissive.h b/intern/cycles/kernel/closure/emissive.h
index 03e19cbde21..d896721f77b 100644
--- a/intern/cycles/kernel/closure/emissive.h
+++ b/intern/cycles/kernel/closure/emissive.h
@@ -12,7 +12,7 @@ CCL_NAMESPACE_BEGIN
/* BACKGROUND CLOSURE */
-ccl_device void background_setup(ccl_private ShaderData *sd, const float3 weight)
+ccl_device void background_setup(ccl_private ShaderData *sd, const Spectrum weight)
{
if (sd->flag & SD_EMISSION) {
sd->closure_emission_background += weight;
@@ -25,7 +25,7 @@ ccl_device void background_setup(ccl_private ShaderData *sd, const float3 weight
/* EMISSION CLOSURE */
-ccl_device void emission_setup(ccl_private ShaderData *sd, const float3 weight)
+ccl_device void emission_setup(ccl_private ShaderData *sd, const Spectrum weight)
{
if (sd->flag & SD_EMISSION) {
sd->closure_emission_background += weight;
@@ -54,11 +54,11 @@ ccl_device void emissive_sample(const float3 Ng,
/* todo: not implemented and used yet */
}
-ccl_device float3 emissive_simple_eval(const float3 Ng, const float3 I)
+ccl_device Spectrum emissive_simple_eval(const float3 Ng, const float3 I)
{
float res = emissive_pdf(Ng, I);
- return make_float3(res, res, res);
+ return make_spectrum(res);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index ef414c7b821..9dbb5154457 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -7,7 +7,7 @@ CCL_NAMESPACE_BEGIN
/* VOLUME EXTINCTION */
-ccl_device void volume_extinction_setup(ccl_private ShaderData *sd, float3 weight)
+ccl_device void volume_extinction_setup(ccl_private ShaderData *sd, Spectrum weight)
{
if (sd->flag & SD_EXTINCTION) {
sd->closure_transparent_extinction += weight;
@@ -48,10 +48,10 @@ ccl_device int volume_henyey_greenstein_setup(ccl_private HenyeyGreensteinVolume
return SD_SCATTER;
}
-ccl_device float3 volume_henyey_greenstein_eval_phase(ccl_private const ShaderVolumeClosure *svc,
- const float3 I,
- float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum volume_henyey_greenstein_eval_phase(ccl_private const ShaderVolumeClosure *svc,
+ const float3 I,
+ float3 omega_in,
+ ccl_private float *pdf)
{
float g = svc->g;
@@ -64,7 +64,7 @@ ccl_device float3 volume_henyey_greenstein_eval_phase(ccl_private const ShaderVo
*pdf = single_peaked_henyey_greenstein(cos_theta, g);
}
- return make_float3(*pdf, *pdf, *pdf);
+ return make_spectrum(*pdf);
}
ccl_device float3
@@ -101,37 +101,27 @@ henyey_greenstrein_sample(float3 D, float g, float randu, float randv, ccl_priva
ccl_device int volume_henyey_greenstein_sample(ccl_private const ShaderVolumeClosure *svc,
float3 I,
- float3 dIdx,
- float3 dIdy,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private float3 *domega_in_dx,
- ccl_private float3 *domega_in_dy,
ccl_private float *pdf)
{
float g = svc->g;
/* note that I points towards the viewer and so is used negated */
*omega_in = henyey_greenstrein_sample(-I, g, randu, randv, pdf);
- *eval = make_float3(*pdf, *pdf, *pdf); /* perfect importance sampling */
-
-#ifdef __RAY_DIFFERENTIALS__
- /* todo: implement ray differential estimation */
- *domega_in_dx = make_float3(0.0f, 0.0f, 0.0f);
- *domega_in_dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
+ *eval = make_spectrum(*pdf); /* perfect importance sampling */
return LABEL_VOLUME_SCATTER;
}
/* VOLUME CLOSURE */
-ccl_device float3 volume_phase_eval(ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumeClosure *svc,
- float3 omega_in,
- ccl_private float *pdf)
+ccl_device Spectrum volume_phase_eval(ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumeClosure *svc,
+ float3 omega_in,
+ ccl_private float *pdf)
{
return volume_henyey_greenstein_eval_phase(svc, sd->I, omega_in, pdf);
}
@@ -140,22 +130,11 @@ ccl_device int volume_phase_sample(ccl_private const ShaderData *sd,
ccl_private const ShaderVolumeClosure *svc,
float randu,
float randv,
- ccl_private float3 *eval,
+ ccl_private Spectrum *eval,
ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
ccl_private float *pdf)
{
- return volume_henyey_greenstein_sample(svc,
- sd->I,
- sd->dI.dx,
- sd->dI.dy,
- randu,
- randv,
- eval,
- omega_in,
- &domega_in->dx,
- &domega_in->dy,
- pdf);
+ return volume_henyey_greenstein_sample(svc, sd->I, randu, randv, eval, omega_in, pdf);
}
/* Volume sampling utilities. */
@@ -164,45 +143,44 @@ ccl_device int volume_phase_sample(ccl_private const ShaderData *sd,
* unnecessary work in volumes and subsurface scattering. */
#define VOLUME_THROUGHPUT_EPSILON 1e-6f
-ccl_device float3 volume_color_transmittance(float3 sigma, float t)
+ccl_device Spectrum volume_color_transmittance(Spectrum sigma, float t)
{
return exp(-sigma * t);
}
-ccl_device float volume_channel_get(float3 value, int channel)
+ccl_device float volume_channel_get(Spectrum value, int channel)
{
- return (channel == 0) ? value.x : ((channel == 1) ? value.y : value.z);
+ return GET_SPECTRUM_CHANNEL(value, channel);
}
-ccl_device int volume_sample_channel(float3 albedo,
- float3 throughput,
+ccl_device int volume_sample_channel(Spectrum albedo,
+ Spectrum throughput,
float rand,
- ccl_private float3 *pdf)
+ ccl_private Spectrum *pdf)
{
/* Sample color channel proportional to throughput and single scattering
* albedo, to significantly reduce noise with many bounce, following:
*
* "Practical and Controllable Subsurface Scattering for Production Path
* Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
- float3 weights = fabs(throughput * albedo);
- float sum_weights = weights.x + weights.y + weights.z;
+ Spectrum weights = fabs(throughput * albedo);
+ float sum_weights = reduce_add(weights);
if (sum_weights > 0.0f) {
*pdf = weights / sum_weights;
}
else {
- *pdf = make_float3(1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f);
+ *pdf = make_spectrum(1.0f / SPECTRUM_CHANNELS);
}
- if (rand < pdf->x) {
- return 0;
- }
- else if (rand < pdf->x + pdf->y) {
- return 1;
- }
- else {
- return 2;
+ float pdf_sum = 0.0f;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ pdf_sum += GET_SPECTRUM_CHANNEL(*pdf, i);
+ if (rand < pdf_sum) {
+ return i;
+ }
}
+ return SPECTRUM_CHANNELS - 1;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/data_arrays.h b/intern/cycles/kernel/data_arrays.h
index 7205f728088..f2877e6c37f 100644
--- a/intern/cycles/kernel/data_arrays.h
+++ b/intern/cycles/kernel/data_arrays.h
@@ -70,7 +70,7 @@ KERNEL_DATA_ARRAY(KernelShader, shaders)
/* lookup tables */
KERNEL_DATA_ARRAY(float, lookup_table)
-/* sobol */
+/* PMJ sample pattern */
KERNEL_DATA_ARRAY(float, sample_pattern_lut)
/* image textures */
diff --git a/intern/cycles/kernel/data_template.h b/intern/cycles/kernel/data_template.h
index 807d0650fc3..1e9e25f2f9d 100644
--- a/intern/cycles/kernel/data_template.h
+++ b/intern/cycles/kernel/data_template.h
@@ -133,6 +133,10 @@ KERNEL_STRUCT_MEMBER(film, int, pass_bake_primitive)
KERNEL_STRUCT_MEMBER(film, int, pass_bake_differential)
/* Shadow catcher. */
KERNEL_STRUCT_MEMBER(film, int, use_approximate_shadow_catcher)
+/* Path Guiding */
+KERNEL_STRUCT_MEMBER(film, int, pass_guiding_color)
+KERNEL_STRUCT_MEMBER(film, int, pass_guiding_probability)
+KERNEL_STRUCT_MEMBER(film, int, pass_guiding_avg_roughness)
/* Padding. */
KERNEL_STRUCT_MEMBER(film, int, pad1)
KERNEL_STRUCT_MEMBER(film, int, pad2)
@@ -190,8 +194,17 @@ KERNEL_STRUCT_MEMBER(integrator, int, has_shadow_catcher)
KERNEL_STRUCT_MEMBER(integrator, int, filter_closures)
/* MIS debugging. */
KERNEL_STRUCT_MEMBER(integrator, int, direct_light_sampling_type)
-/* Padding */
-KERNEL_STRUCT_MEMBER(integrator, int, pad1)
+
+/* Path Guiding */
+KERNEL_STRUCT_MEMBER(integrator, float, surface_guiding_probability)
+KERNEL_STRUCT_MEMBER(integrator, float, volume_guiding_probability)
+KERNEL_STRUCT_MEMBER(integrator, int, guiding_distribution_type)
+KERNEL_STRUCT_MEMBER(integrator, int, use_guiding)
+KERNEL_STRUCT_MEMBER(integrator, int, train_guiding)
+KERNEL_STRUCT_MEMBER(integrator, int, use_surface_guiding)
+KERNEL_STRUCT_MEMBER(integrator, int, use_volume_guiding)
+KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_direct_light)
+KERNEL_STRUCT_MEMBER(integrator, int, use_guiding_mis_weights)
KERNEL_STRUCT_END(KernelIntegrator)
/* SVM. For shader specialization. */
diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h
new file mode 100644
index 00000000000..2d7d8c2d704
--- /dev/null
+++ b/intern/cycles/kernel/device/cpu/bvh.h
@@ -0,0 +1,582 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* CPU Embree implementation of ray-scene intersection. */
+
+#pragma once
+
+#include <embree3/rtcore_ray.h>
+#include <embree3/rtcore_scene.h>
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+#include "kernel/geom/object.h"
+#include "kernel/integrator/state.h"
+#include "kernel/sample/lcg.h"
+
+#include "util/vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+#define EMBREE_IS_HAIR(x) (x & 1)
+
+/* Intersection context. */
+
+struct CCLIntersectContext {
+ typedef enum {
+ RAY_REGULAR = 0,
+ RAY_SHADOW_ALL = 1,
+ RAY_LOCAL = 2,
+ RAY_SSS = 3,
+ RAY_VOLUME_ALL = 4,
+ } RayType;
+
+ KernelGlobals kg;
+ RayType type;
+
+ /* For avoiding self intersections */
+ const Ray *ray;
+
+ /* for shadow rays */
+ Intersection *isect_s;
+ uint max_hits;
+ uint num_hits;
+ uint num_recorded_hits;
+ float throughput;
+ float max_t;
+ bool opaque_hit;
+
+ /* for SSS Rays: */
+ LocalIntersection *local_isect;
+ int local_object_id;
+ uint *lcg_state;
+
+ CCLIntersectContext(KernelGlobals kg_, RayType type_)
+ {
+ kg = kg_;
+ type = type_;
+ ray = NULL;
+ max_hits = 1;
+ num_hits = 0;
+ num_recorded_hits = 0;
+ throughput = 1.0f;
+ max_t = FLT_MAX;
+ opaque_hit = false;
+ isect_s = NULL;
+ local_isect = NULL;
+ local_object_id = -1;
+ lcg_state = NULL;
+ }
+};
+
+class IntersectContext {
+ public:
+ IntersectContext(CCLIntersectContext *ctx)
+ {
+ rtcInitIntersectContext(&context);
+ userRayExt = ctx;
+ }
+ RTCIntersectContext context;
+ CCLIntersectContext *userRayExt;
+};
+
+/* Utilities. */
+
+ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
+ RTCRay &rtc_ray,
+ const uint visibility)
+{
+ rtc_ray.org_x = ray.P.x;
+ rtc_ray.org_y = ray.P.y;
+ rtc_ray.org_z = ray.P.z;
+ rtc_ray.dir_x = ray.D.x;
+ rtc_ray.dir_y = ray.D.y;
+ rtc_ray.dir_z = ray.D.z;
+ rtc_ray.tnear = ray.tmin;
+ rtc_ray.tfar = ray.tmax;
+ rtc_ray.time = ray.time;
+ rtc_ray.mask = visibility;
+}
+
+ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
+ RTCRayHit &rayhit,
+ const uint visibility)
+{
+ kernel_embree_setup_ray(ray, rayhit.ray, visibility);
+ rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
+ rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
+}
+
+ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
+ const RTCHit *hit,
+ const Ray *ray)
+{
+ int object, prim;
+
+ if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ object = hit->instID[0] / 2;
+ if ((ray->self.object == object) || (ray->self.light_object == object)) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+ prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ }
+ else {
+ return false;
+ }
+ }
+ else {
+ object = hit->geomID / 2;
+ if ((ray->self.object == object) || (ray->self.light_object == object)) {
+ prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+ }
+ else {
+ return false;
+ }
+ }
+
+ const bool is_hair = hit->geomID & 1;
+ if (is_hair) {
+ prim = kernel_data_fetch(curve_segments, prim).prim;
+ }
+
+ return intersection_skip_self_shadow(ray->self, object, prim);
+}
+
+ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
+ const RTCRay *ray,
+ const RTCHit *hit,
+ Intersection *isect)
+{
+ isect->t = ray->tfar;
+ if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->instID[0]));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ isect->object = hit->instID[0] / 2;
+ }
+ else {
+ isect->prim = hit->primID + (intptr_t)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, hit->geomID));
+ isect->object = hit->geomID / 2;
+ }
+
+ const bool is_hair = hit->geomID & 1;
+ if (is_hair) {
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
+ isect->type = segment.type;
+ isect->prim = segment.prim;
+ isect->u = hit->u;
+ isect->v = hit->v;
+ }
+ else {
+ isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
+ isect->u = hit->u;
+ isect->v = hit->v;
+ }
+}
+
+ccl_device_inline void kernel_embree_convert_sss_hit(
+ KernelGlobals kg, const RTCRay *ray, const RTCHit *hit, Intersection *isect, int object)
+{
+ isect->u = hit->u;
+ isect->v = hit->v;
+ isect->t = ray->tfar;
+ RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
+ rtcGetGeometry(kernel_data.device_bvh, object * 2));
+ isect->prim = hit->primID +
+ (intptr_t)rtcGetGeometryUserData(rtcGetGeometry(inst_scene, hit->geomID));
+ isect->object = object;
+ isect->type = kernel_data_fetch(objects, object).primitive_type;
+}
+
+/* Ray filter functions. */
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls. */
+ccl_device void kernel_embree_filter_intersection_func(const RTCFilterFunctionNArguments *args)
+{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
+ assert(args->N == 1);
+
+ RTCHit *hit = (RTCHit *)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ }
+}
+
+/* This gets called by Embree at every valid ray/object intersection.
+ * Things like recording subsurface or shadow hits for later evaluation
+ * as well as filtering for volume objects happen here.
+ * Cycles' own BVH does that directly inside the traversal calls.
+ */
+ccl_device void kernel_embree_filter_occluded_func(const RTCFilterFunctionNArguments *args)
+{
+ /* Current implementation in Cycles assumes only single-ray intersection queries. */
+ assert(args->N == 1);
+
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ switch (ctx->type) {
+ case CCLIntersectContext::RAY_SHADOW_ALL: {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+ /* If no transparent shadows or max number of hits exceeded, all light is blocked. */
+ const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
+ if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
+ ctx->opaque_hit = true;
+ return;
+ }
+
+ ++ctx->num_hits;
+
+ /* Always use baked shadow transparency for curves. */
+ if (current_isect.type & PRIMITIVE_CURVE) {
+ ctx->throughput *= intersection_curve_shadow_transparency(
+ kg, current_isect.object, current_isect.prim, current_isect.type, current_isect.u);
+
+ if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ ctx->opaque_hit = true;
+ return;
+ }
+ else {
+ *args->valid = 0;
+ return;
+ }
+ }
+
+ /* Test if we need to record this transparent intersection. */
+ const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
+ /* If maximum number of hits was reached, replace the intersection with the
+ * highest distance. We want to find the N closest intersections. */
+ const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
+ uint isect_index = num_recorded_hits;
+ if (num_recorded_hits + 1 >= max_record_hits) {
+ float max_t = ctx->isect_s[0].t;
+ uint max_recorded_hit = 0;
+
+ for (uint i = 1; i < num_recorded_hits; ++i) {
+ if (ctx->isect_s[i].t > max_t) {
+ max_recorded_hit = i;
+ max_t = ctx->isect_s[i].t;
+ }
+ }
+
+ if (num_recorded_hits >= max_record_hits) {
+ isect_index = max_recorded_hit;
+ }
+
+ /* Limit the ray distance and stop counting hits beyond this.
+ * TODO: is there some way we can tell Embree to stop intersecting beyond
+ * this distance when max number of hits is reached?. Or maybe it will
+ * become irrelevant if we make max_hits a very high number on the CPU. */
+ ctx->max_t = max(current_isect.t, max_t);
+ }
+
+ ctx->isect_s[isect_index] = current_isect;
+ }
+
+ /* Always increase the number of recorded hits, even beyond the maximum,
+ * so that we can detect this and trace another ray if needed. */
+ ++ctx->num_recorded_hits;
+
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ case CCLIntersectContext::RAY_LOCAL:
+ case CCLIntersectContext::RAY_SSS: {
+ /* Check if it's hitting the correct object. */
+ Intersection current_isect;
+ if (ctx->type == CCLIntersectContext::RAY_SSS) {
+ kernel_embree_convert_sss_hit(kg, ray, hit, &current_isect, ctx->local_object_id);
+ }
+ else {
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (ctx->local_object_id != current_isect.object) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ }
+ if (intersection_skip_self_local(cray->self, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+
+ /* No intersection information requested, just return a hit. */
+ if (ctx->max_hits == 0) {
+ break;
+ }
+
+ /* Ignore curves. */
+ if (EMBREE_IS_HAIR(hit->geomID)) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+
+ LocalIntersection *local_isect = ctx->local_isect;
+ int hit_idx = 0;
+
+ if (ctx->lcg_state) {
+ /* See triangle_intersect_subsurface() for the native equivalent. */
+ for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (local_isect->hits[i].t == ray->tfar) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ return;
+ }
+ }
+
+ local_isect->num_hits++;
+
+ if (local_isect->num_hits <= ctx->max_hits) {
+ hit_idx = local_isect->num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
+
+ if (hit_idx >= ctx->max_hits) {
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ return;
+ }
+ }
+ }
+ else {
+ /* Record closest intersection only. */
+ if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
+ *args->valid = 0;
+ return;
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ /* record intersection */
+ local_isect->hits[hit_idx] = current_isect;
+ local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ break;
+ }
+ case CCLIntersectContext::RAY_VOLUME_ALL: {
+ /* Append the intersection to the end of the array. */
+ if (ctx->num_hits < ctx->max_hits) {
+ Intersection current_isect;
+ kernel_embree_convert_hit(kg, ray, hit, &current_isect);
+ if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
+ *args->valid = 0;
+ return;
+ }
+
+ Intersection *isect = &ctx->isect_s[ctx->num_hits];
+ ++ctx->num_hits;
+ *isect = current_isect;
+ /* Only primitives from volume object. */
+ uint tri_object = isect->object;
+ int object_flag = kernel_data_fetch(object_flag, tri_object);
+ if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
+ --ctx->num_hits;
+ }
+ /* This tells Embree to continue tracing. */
+ *args->valid = 0;
+ }
+ break;
+ }
+ case CCLIntersectContext::RAY_REGULAR:
+ default:
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ return;
+ }
+ break;
+ }
+}
+
+ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore back-facing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
+ const KernelGlobalsCPU *kg = ctx->kg;
+ const Ray *cray = ctx->ray;
+
+ if (kernel_embree_is_self_intersection(kg, hit, cray)) {
+ *args->valid = 0;
+ }
+}
+
+ccl_device void kernel_embree_filter_occluded_func_backface_cull(
+ const RTCFilterFunctionNArguments *args)
+{
+ const RTCRay *ray = (RTCRay *)args->ray;
+ RTCHit *hit = (RTCHit *)args->hit;
+
+ /* Always ignore back-facing intersections. */
+ if (dot(make_float3(ray->dir_x, ray->dir_y, ray->dir_z),
+ make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z)) > 0.0f) {
+ *args->valid = 0;
+ return;
+ }
+
+ kernel_embree_filter_occluded_func(args);
+}
+
+/* Scene intersection. */
+
+ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ isect->t = ray->tmax;
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
+ IntersectContext rtc_ctx(&ctx);
+ RTCRayHit ray_hit;
+ ctx.ray = ray;
+ kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
+ rtcIntersect1(kernel_data.device_bvh, &rtc_ctx.context, &ray_hit);
+ if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID ||
+ ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID) {
+ return false;
+ }
+
+ kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
+ return true;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
+ SD_OBJECT_TRANSFORM_APPLIED);
+ CCLIntersectContext ctx(kg,
+ has_bvh ? CCLIntersectContext::RAY_SSS : CCLIntersectContext::RAY_LOCAL);
+ ctx.lcg_state = lcg_state;
+ ctx.max_hits = max_hits;
+ ctx.ray = ray;
+ ctx.local_isect = local_isect;
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ ctx.local_object_id = local_object;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+
+ /* If this object has its own BVH, use it. */
+ if (has_bvh) {
+ RTCGeometry geom = rtcGetGeometry(kernel_data.device_bvh, local_object * 2);
+ if (geom) {
+ float3 P = ray->P;
+ float3 dir = ray->D;
+ float3 idir = ray->D;
+ bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
+
+ rtc_ray.org_x = P.x;
+ rtc_ray.org_y = P.y;
+ rtc_ray.org_z = P.z;
+ rtc_ray.dir_x = dir.x;
+ rtc_ray.dir_y = dir.y;
+ rtc_ray.dir_z = dir.z;
+ rtc_ray.tnear = ray->tmin;
+ rtc_ray.tfar = ray->tmax;
+ RTCScene scene = (RTCScene)rtcGetGeometryUserData(geom);
+ kernel_assert(scene);
+ if (scene) {
+ rtcOccluded1(scene, &rtc_ctx.context, &rtc_ray);
+ }
+ }
+ }
+ else {
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+ }
+
+ /* rtcOccluded1 sets tfar to -inf if a hit was found. */
+ return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowStateCPU *state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL);
+ Intersection *isect_array = (Intersection *)state->shadow_isect;
+ ctx.isect_s = isect_array;
+ ctx.max_hits = max_hits;
+ ctx.ray = ray;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+
+ *num_recorded_hits = ctx.num_recorded_hits;
+ *throughput = ctx.throughput;
+ return ctx.opaque_hit;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint max_hits,
+ const uint visibility)
+{
+ CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL);
+ ctx.isect_s = isect;
+ ctx.max_hits = max_hits;
+ ctx.num_hits = 0;
+ ctx.ray = ray;
+ IntersectContext rtc_ctx(&ctx);
+ RTCRay rtc_ray;
+ kernel_embree_setup_ray(*ray, rtc_ray, visibility);
+ rtcOccluded1(kernel_data.device_bvh, &rtc_ctx.context, &rtc_ray);
+ return ctx.num_hits;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index 3bfc37e98ee..1e3e790ca1f 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -3,8 +3,6 @@
#pragma once
-#define __KERNEL_CPU__
-
/* Release kernel has too much false-positive maybe-uninitialized warnings,
* which makes it possible to miss actual warnings.
*/
@@ -35,38 +33,4 @@ CCL_NAMESPACE_BEGIN
#define kernel_assert(cond) assert(cond)
-/* Macros to handle different memory storage on different devices */
-
-#ifdef __KERNEL_SSE2__
-typedef vector3<sseb> sse3b;
-typedef vector3<ssef> sse3f;
-typedef vector3<ssei> sse3i;
-
-ccl_device_inline void print_sse3b(const char *label, sse3b &a)
-{
- print_sseb(label, a.x);
- print_sseb(label, a.y);
- print_sseb(label, a.z);
-}
-
-ccl_device_inline void print_sse3f(const char *label, sse3f &a)
-{
- print_ssef(label, a.x);
- print_ssef(label, a.y);
- print_ssef(label, a.z);
-}
-
-ccl_device_inline void print_sse3i(const char *label, sse3i &a)
-{
- print_ssei(label, a.x);
- print_ssei(label, a.y);
- print_ssei(label, a.z);
-}
-
-# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-typedef vector3<avxf> avx3f;
-# endif
-
-#endif
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h
index 309afae412e..f7f1a36b2a7 100644
--- a/intern/cycles/kernel/device/cpu/globals.h
+++ b/intern/cycles/kernel/device/cpu/globals.h
@@ -9,6 +9,8 @@
#include "kernel/types.h"
#include "kernel/util/profiling.h"
+#include "util/guiding.h"
+
CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
@@ -43,9 +45,20 @@ typedef struct KernelGlobalsCPU {
#ifdef __OSL__
/* On the CPU, we also have the OSL globals here. Most data structures are shared
* with SVM, the difference is in the shaders and object/mesh attributes. */
- OSLGlobals *osl;
- OSLShadingSystem *osl_ss;
- OSLThreadData *osl_tdata;
+ OSLGlobals *osl = nullptr;
+ OSLShadingSystem *osl_ss = nullptr;
+ OSLThreadData *osl_tdata = nullptr;
+#endif
+
+#ifdef __PATH_GUIDING__
+ /* Pointers to global data structures. */
+ openpgl::cpp::SampleStorage *opgl_sample_data_storage = nullptr;
+ openpgl::cpp::Field *opgl_guiding_field = nullptr;
+
+ /* Local data structures owned by the thread. */
+ openpgl::cpp::PathSegmentStorage *opgl_path_segment_storage = nullptr;
+ openpgl::cpp::SurfaceSamplingDistribution *opgl_surface_sampling_distribution = nullptr;
+ openpgl::cpp::VolumeSamplingDistribution *opgl_volume_sampling_distribution = nullptr;
#endif
/* **** Run-time data **** */
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 0e5f7b4a2fd..0d7c06f4fc6 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -34,7 +34,7 @@
# include "kernel/integrator/megakernel.h"
# include "kernel/film/adaptive_sampling.h"
-# include "kernel/film/id_passes.h"
+# include "kernel/film/cryptomatte_passes.h"
# include "kernel/film/read.h"
# include "kernel/bake/bake.h"
@@ -169,7 +169,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_convergence_check);
return false;
#else
- return kernel_adaptive_sampling_convergence_check(
+ return film_adaptive_sampling_convergence_check(
kg, render_buffer, x, y, threshold, reset, offset, stride);
#endif
}
@@ -185,7 +185,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCP
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_x);
#else
- kernel_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
+ film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
#endif
}
@@ -200,7 +200,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, adaptive_sampling_filter_y);
#else
- kernel_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
+ film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
#endif
}
@@ -215,7 +215,7 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *
#ifdef KERNEL_STUB
STUB_ASSERT(KERNEL_ARCH, cryptomatte_postprocess);
#else
- kernel_cryptomatte_post(kg, render_buffer, pixel_index);
+ film_cryptomatte_post(kg, render_buffer, pixel_index);
#endif
}
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index e1ab802aa80..d7d2000775f 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -526,7 +526,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
bool converged = true;
if (x < sw && y < sh) {
- converged = ccl_gpu_kernel_call(kernel_adaptive_sampling_convergence_check(
+ converged = ccl_gpu_kernel_call(film_adaptive_sampling_convergence_check(
nullptr, render_buffer, sx + x, sy + y, threshold, reset, offset, stride));
}
@@ -553,7 +553,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (y < sh) {
ccl_gpu_kernel_call(
- kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
+ film_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
}
}
ccl_gpu_kernel_postfix
@@ -572,7 +572,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
if (x < sw) {
ccl_gpu_kernel_call(
- kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
+ film_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
}
}
ccl_gpu_kernel_postfix
@@ -589,7 +589,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
const int pixel_index = ccl_gpu_global_id_x();
if (pixel_index < num_pixels) {
- ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index));
+ ccl_gpu_kernel_call(film_cryptomatte_post(nullptr, render_buffer, pixel_index));
}
}
ccl_gpu_kernel_postfix
diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h
index c1df49c4f49..38cdcb572eb 100644
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@@ -23,22 +23,6 @@ CCL_NAMESPACE_BEGIN
* and keep device specific code in compat.h */
#ifdef __KERNEL_ONEAPI__
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-template<typename IsActiveOp>
-void cpu_serial_active_index_array_impl(const uint num_states,
- ccl_global int *ccl_restrict indices,
- ccl_global int *ccl_restrict num_indices,
- IsActiveOp is_active_op)
-{
- int write_index = 0;
- for (int state_index = 0; state_index < num_states; state_index++) {
- if (is_active_op(state_index))
- indices[write_index++] = state_index;
- }
- *num_indices = write_index;
- return;
-}
-# endif /* WITH_ONEAPI_SYCL_HOST_ENABLED */
template<typename IsActiveOp>
void gpu_parallel_active_index_array_impl(const uint num_states,
@@ -182,18 +166,11 @@ __device__
num_simd_groups, \
simdgroup_offset)
#elif defined(__KERNEL_ONEAPI__)
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-# define gpu_parallel_active_index_array( \
- blocksize, num_states, indices, num_indices, is_active_op) \
- if (ccl_gpu_global_size_x() == 1) \
- cpu_serial_active_index_array_impl(num_states, indices, num_indices, is_active_op); \
- else \
- gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op);
-# else
-# define gpu_parallel_active_index_array( \
- blocksize, num_states, indices, num_indices, is_active_op) \
- gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
-# endif
+
+# define gpu_parallel_active_index_array( \
+ blocksize, num_states, indices, num_indices, is_active_op) \
+ gpu_parallel_active_index_array_impl(num_states, indices, num_indices, is_active_op)
+
#else
# define gpu_parallel_active_index_array( \
diff --git a/intern/cycles/kernel/device/metal/bvh.h b/intern/cycles/kernel/device/metal/bvh.h
new file mode 100644
index 00000000000..03faa3f020f
--- /dev/null
+++ b/intern/cycles/kernel/device/metal/bvh.h
@@ -0,0 +1,360 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* MetalRT implementation of ray-scene intersection. */
+
+#pragma once
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Payload types. */
+
+struct MetalRTIntersectionPayload {
+ RaySelfPrimitives self;
+ uint visibility;
+ float u, v;
+ int prim;
+ int type;
+#if defined(__METALRT_MOTION__)
+ float time;
+#endif
+};
+
+struct MetalRTIntersectionLocalPayload {
+ RaySelfPrimitives self;
+ uint local_object;
+ uint lcg_state;
+ short max_hits;
+ bool has_lcg_state;
+ bool result;
+ LocalIntersection local_isect;
+};
+
+struct MetalRTIntersectionShadowPayload {
+ RaySelfPrimitives self;
+ uint visibility;
+#if defined(__METALRT_MOTION__)
+ float time;
+#endif
+ int state;
+ float throughput;
+ short max_hits;
+ short num_hits;
+ short num_recorded_hits;
+ bool result;
+};
+
+/* Scene intersection. */
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ if (!intersection_ray_valid(ray)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ return false;
+ }
+
+#if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+ kernel_assert(!"Invalid ift_default");
+ return false;
+ }
+#endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionPayload payload;
+ payload.self = ray->self;
+ payload.u = 0.0f;
+ payload.v = 0.0f;
+ payload.visibility = visibility;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ /* No further intersector setup required: Default MetalRT behavior is any-hit. */
+ }
+ else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ /* No further intersector setup required: Shadow ray early termination is controlled by the
+ * intersection handler */
+ }
+
+#if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_default,
+ payload);
+#else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+#endif
+
+ if (intersection.type == intersection_type::none) {
+ isect->t = ray->tmax;
+ isect->type = PRIMITIVE_NONE;
+
+ return false;
+ }
+
+ isect->t = intersection.distance;
+
+ isect->prim = payload.prim;
+ isect->type = payload.type;
+ isect->object = intersection.user_instance_id;
+
+ isect->t = intersection.distance;
+ if (intersection.type == intersection_type::triangle) {
+ isect->u = intersection.triangle_barycentric_coord.x;
+ isect->v = intersection.triangle_barycentric_coord.y;
+ }
+ else {
+ isect->u = payload.u;
+ isect->v = payload.v;
+ }
+
+ return isect->type != PRIMITIVE_NONE;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ if (!intersection_ray_valid(ray)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_local)) {
+ if (local_isect) {
+ local_isect->num_hits = 0;
+ }
+ kernel_assert(!"Invalid ift_local");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionLocalPayload payload;
+ payload.self = ray->self;
+ payload.local_object = local_object;
+ payload.max_hits = max_hits;
+ payload.local_isect.num_hits = 0;
+ if (lcg_state) {
+ payload.has_lcg_state = true;
+ payload.lcg_state = *lcg_state;
+ }
+ payload.result = false;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+# if defined(__METALRT_MOTION__)
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload);
+# endif
+
+ if (lcg_state) {
+ *lcg_state = payload.lcg_state;
+ }
+ *local_isect = payload.local_isect;
+
+ return payload.result;
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowState state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_shadow)) {
+ kernel_assert(!"Invalid ift_shadow");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionShadowPayload payload;
+ payload.self = ray->self;
+ payload.visibility = visibility;
+ payload.max_hits = max_hits;
+ payload.num_hits = 0;
+ payload.num_recorded_hits = 0;
+ payload.throughput = 1.0f;
+ payload.result = false;
+ payload.state = state;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ typename metalrt_intersector_type::result_type intersection;
+
+# if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_shadow,
+ payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_shadow, payload);
+# endif
+
+ *num_recorded_hits = payload.num_recorded_hits;
+ *throughput = payload.throughput;
+
+ return payload.result;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint visibility)
+{
+ if (!intersection_ray_valid(ray)) {
+ return false;
+ }
+
+# if defined(__KERNEL_DEBUG__)
+ if (is_null_instance_acceleration_structure(metal_ancillaries->accel_struct)) {
+ kernel_assert(!"Invalid metal_ancillaries->accel_struct pointer");
+ return false;
+ }
+
+ if (is_null_intersection_function_table(metal_ancillaries->ift_default)) {
+ kernel_assert(!"Invalid ift_default");
+ return false;
+ }
+# endif
+
+ metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax);
+ metalrt_intersector_type metalrt_intersect;
+
+ metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque);
+ if (!kernel_data.bvh.have_curves) {
+ metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle);
+ }
+
+ MetalRTIntersectionPayload payload;
+ payload.self = ray->self;
+ payload.visibility = visibility;
+
+ typename metalrt_intersector_type::result_type intersection;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+# if defined(__METALRT_MOTION__)
+ payload.time = ray->time;
+ intersection = metalrt_intersect.intersect(r,
+ metal_ancillaries->accel_struct,
+ ray_mask,
+ ray->time,
+ metal_ancillaries->ift_default,
+ payload);
+# else
+ intersection = metalrt_intersect.intersect(
+ r, metal_ancillaries->accel_struct, ray_mask, metal_ancillaries->ift_default, payload);
+# endif
+
+ if (intersection.type == intersection_type::none) {
+ return false;
+ }
+
+ isect->prim = payload.prim;
+ isect->type = payload.type;
+ isect->object = intersection.user_instance_id;
+
+ isect->t = intersection.distance;
+ if (intersection.type == intersection_type::triangle) {
+ isect->u = intersection.triangle_barycentric_coord.x;
+ isect->v = intersection.triangle_barycentric_coord.y;
+ }
+ else {
+ isect->u = payload.u;
+ isect->v = payload.v;
+ }
+
+ return isect->type != PRIMITIVE_NONE;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 0ed52074a90..f689e93e5a2 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -29,24 +29,13 @@ using namespace metal::raytracing;
/* Qualifiers */
+#define ccl_device
+#define ccl_device_inline ccl_device __attribute__((always_inline))
+#define ccl_device_forceinline ccl_device __attribute__((always_inline))
#if defined(__KERNEL_METAL_APPLE__)
-
-/* Inline everything for Apple GPUs.
- * This gives ~1.1x speedup and 10% spill reduction for integator_shade_surface
- * at the cost of longer compile times (~4.5 minutes on M1 Max). */
-
-# define ccl_device __attribute__((always_inline))
-# define ccl_device_inline __attribute__((always_inline))
-# define ccl_device_forceinline __attribute__((always_inline))
-# define ccl_device_noinline __attribute__((always_inline))
-
+# define ccl_device_noinline ccl_device
#else
-
-# define ccl_device
-# define ccl_device_inline ccl_device
-# define ccl_device_forceinline ccl_device
# define ccl_device_noinline ccl_device __attribute__((noinline))
-
#endif
#define ccl_device_noinline_cpu ccl_device
@@ -189,35 +178,46 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
} volume_write_lambda_pass{kg, this, state};
/* make_type definitions with Metal style element initializers */
-#ifdef make_float2
-# undef make_float2
-#endif
-#ifdef make_float3
-# undef make_float3
-#endif
-#ifdef make_float4
-# undef make_float4
-#endif
-#ifdef make_int2
-# undef make_int2
-#endif
-#ifdef make_int3
-# undef make_int3
-#endif
-#ifdef make_int4
-# undef make_int4
-#endif
-#ifdef make_uchar4
-# undef make_uchar4
-#endif
-
-#define make_float2(x, y) float2(x, y)
-#define make_float3(x, y, z) float3(x, y, z)
-#define make_float4(x, y, z, w) float4(x, y, z, w)
-#define make_int2(x, y) int2(x, y)
-#define make_int3(x, y, z) int3(x, y, z)
-#define make_int4(x, y, z, w) int4(x, y, z, w)
-#define make_uchar4(x, y, z, w) uchar4(x, y, z, w)
+ccl_device_forceinline float2 make_float2(const float x, const float y)
+{
+ return float2(x, y);
+}
+
+ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
+{
+ return float3(x, y, z);
+}
+
+ccl_device_forceinline float4 make_float4(const float x,
+ const float y,
+ const float z,
+ const float w)
+{
+ return float4(x, y, z, w);
+}
+
+ccl_device_forceinline int2 make_int2(const int x, const int y)
+{
+ return int2(x, y);
+}
+
+ccl_device_forceinline int3 make_int3(const int x, const int y, const int z)
+{
+ return int3(x, y, z);
+}
+
+ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
+{
+ return int4(x, y, z, w);
+}
+
+ccl_device_forceinline uchar4 make_uchar4(const uchar x,
+ const uchar y,
+ const uchar z,
+ const uchar w)
+{
+ return uchar4(x, y, z, w);
+}
/* Math functions */
@@ -260,8 +260,6 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
#ifdef __METALRT__
-# define __KERNEL_GPU_RAYTRACING__
-
# if defined(__METALRT_MOTION__)
# define METALRT_TAGS instancing, instance_motion, primitive_motion
# else
diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h
index 99cb1e3826e..e75ec9cadec 100644
--- a/intern/cycles/kernel/device/metal/context_begin.h
+++ b/intern/cycles/kernel/device/metal/context_begin.h
@@ -34,21 +34,48 @@ class MetalKernelContext {
kernel_assert(0);
return 0;
}
-
+
+#ifdef __KERNEL_METAL_INTEL__
+ template<typename TextureType, typename CoordsType>
+ inline __attribute__((__always_inline__))
+ auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array,
+ const uint tid, const uint sid,
+ CoordsType coords) const
+ {
+ switch(sid) {
+ default:
+ case 0: return texture_array[tid].tex.sample(sampler(address::repeat, filter::nearest), coords);
+ case 1: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), coords);
+ case 2: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), coords);
+ case 3: return texture_array[tid].tex.sample(sampler(address::repeat, filter::linear), coords);
+ case 4: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), coords);
+ case 5: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), coords);
+ }
+ }
+#endif
+
// texture2d
template<>
inline __attribute__((__always_inline__))
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y));
+#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)).x;
+#endif
}
// texture3d
@@ -57,14 +84,22 @@ class MetalKernelContext {
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z));
+#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)).x;
+#endif
}
# include "kernel/device/gpu/image.h"
diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal
index 764c26dbe8f..8b69ee025cd 100644
--- a/intern/cycles/kernel/device/metal/kernel.metal
+++ b/intern/cycles/kernel/device/metal/kernel.metal
@@ -1,41 +1,44 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2021-2022 Blender Foundation */
-/* Metal kernel entry points */
+/* Metal kernel entry points. */
#include "kernel/device/metal/compat.h"
#include "kernel/device/metal/globals.h"
#include "kernel/device/metal/function_constants.h"
#include "kernel/device/gpu/kernel.h"
-/* MetalRT intersection handlers */
+/* MetalRT intersection handlers. */
+
#ifdef __METALRT__
-/* Return type for a bounding box intersection function. */
-struct BoundingBoxIntersectionResult
-{
+/* Intersection return types. */
+
+/* For a bounding box intersection function. */
+struct BoundingBoxIntersectionResult {
bool accept [[accept_intersection]];
bool continue_search [[continue_search]];
float distance [[distance]];
};
-/* Return type for a triangle intersection function. */
-struct TriangleIntersectionResult
-{
+/* For a triangle intersection function. */
+struct TriangleIntersectionResult {
bool accept [[accept_intersection]];
- bool continue_search [[continue_search]];
+ bool continue_search [[continue_search]];
};
enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
-ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives& self,
+/* Utilities. */
+
+ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self,
const int object,
const int prim)
{
return (self.prim == prim) && (self.object == object);
}
-ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives& self,
+ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self,
const int object,
const int prim)
{
@@ -43,12 +46,14 @@ ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimi
((self.light_prim == prim) && (self.light_object == object));
}
-ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives& self,
+ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self,
const int prim)
{
return (self.prim == prim);
}
+/* Hit functions. */
+
template<typename TReturn, uint intersection_type>
TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
@@ -58,7 +63,7 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
const float ray_tmax)
{
TReturn result;
-
+
#ifdef __BVH_LOCAL__
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
@@ -101,7 +106,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
}
else {
if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
- /* Record closest intersection only. Do not terminate ray here, since there is no guarantee about distance ordering in any-hit */
+ /* Record closest intersection only. Do not terminate ray here, since there is no guarantee
+ * about distance ordering in any-hit */
result.accept = false;
result.continue_search = true;
return result;
@@ -116,8 +122,8 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
isect->object = object;
isect->type = kernel_data_fetch(objects, object).primitive_type;
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
+ isect->u = barycentrics.x;
+ isect->v = barycentrics.y;
/* Record geometric normal */
const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
@@ -133,21 +139,20 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
#endif
}
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_local_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
- uint instance_id [[user_instance_id]],
- uint primitive_id [[primitive_id]],
- float2 barycentrics [[barycentric_coord]],
- float ray_tmax [[distance]])
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_local_hit_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
+ uint instance_id [[user_instance_id]],
+ uint primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
{
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
+ launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
{
/* unused function */
@@ -175,23 +180,14 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
}
# endif
- if (intersection_skip_self_shadow(payload.self, object, prim)) {
- /* continue search */
- return true;
- }
-
- float u = 0.0f, v = 0.0f;
+ const float u = barycentrics.x;
+ const float v = barycentrics.y;
int type = 0;
if (intersection_type == METALRT_HIT_TRIANGLE) {
- u = 1.0f - barycentrics.y - barycentrics.x;
- v = barycentrics.x;
type = kernel_data_fetch(objects, object).primitive_type;
}
# ifdef __HAIR__
else {
- u = barycentrics.x;
- v = barycentrics.y;
-
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
type = segment.type;
prim = segment.prim;
@@ -204,6 +200,11 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
}
# endif
+ if (intersection_skip_self_shadow(payload.self, object, prim)) {
+ /* continue search */
+ return true;
+ }
+
# ifndef __TRANSPARENT_SHADOWS__
/* No transparent shadows support compiled in, make opaque. */
payload.result = true;
@@ -215,7 +216,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
short num_recorded_hits = payload.num_recorded_hits;
MetalKernelContext context(launch_params_metal);
-
+
/* If no transparent shadows, all light is blocked and we can stop immediately. */
if (num_hits >= max_hits ||
!(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
@@ -223,11 +224,11 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
/* terminate ray */
return false;
}
-
+
/* Always use baked shadow transparency for curves. */
if (type & PRIMITIVE_CURVE) {
float throughput = payload.throughput;
- throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
+ throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, type, u);
payload.throughput = throughput;
payload.num_hits += 1;
@@ -240,10 +241,10 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
return true;
}
}
-
+
payload.num_hits += 1;
payload.num_recorded_hits += 1;
-
+
uint record_index = num_recorded_hits;
const IntegratorShadowState state = payload.state;
@@ -278,7 +279,7 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
-
+
/* Continue tracing. */
# endif /* __TRANSPARENT_SHADOWS__ */
#endif /* __SHADOW_RECORD_ALL__ */
@@ -286,26 +287,25 @@ bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
return true;
}
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_shadow_all_hit_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- unsigned int object [[user_instance_id]],
- unsigned int primitive_id [[primitive_id]],
- float2 barycentrics [[barycentric_coord]],
- float ray_tmax [[distance]])
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_shadow_all_hit_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]],
+ float2 barycentrics [[barycentric_coord]],
+ float ray_tmax [[distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
TriangleIntersectionResult result;
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
+ launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
result.accept = !result.continue_search;
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
{
/* unused function */
@@ -317,15 +317,16 @@ __anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]
}
template<typename TReturnType, uint intersection_type>
-inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const float u)
+inline TReturnType metalrt_visibility_test(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ uint prim,
+ const float u)
{
TReturnType result;
-
-# ifdef __HAIR__
+
+#ifdef __HAIR__
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
/* Filter out curve endcaps. */
if (u == 0.0f || u == 1.0f) {
@@ -334,15 +335,23 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
return result;
}
}
-# endif
+#endif
uint visibility = payload.visibility;
-# ifdef __VISIBILITY_FLAG__
+#ifdef __VISIBILITY_FLAG__
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
result.accept = false;
result.continue_search = true;
return result;
}
+#endif
+
+ if (intersection_type == METALRT_HIT_TRIANGLE) {
+ }
+# ifdef __HAIR__
+ else {
+ prim = kernel_data_fetch(curve_segments, prim).prim;
+ }
# endif
/* Shadow ray early termination. */
@@ -371,16 +380,17 @@ inline TReturnType metalrt_visibility_test(constant KernelParamsMetal &launch_pa
return result;
}
-[[intersection(triangle, triangle_data, METALRT_TAGS)]]
-TriangleIntersectionResult
-__anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- unsigned int object [[user_instance_id]],
- unsigned int primitive_id [[primitive_id]])
+[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
+__anyhit__cycles_metalrt_visibility_test_tri(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ unsigned int object [[user_instance_id]],
+ unsigned int primitive_id [[primitive_id]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
- TriangleIntersectionResult result = metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
- launch_params_metal, payload, object, prim, 0.0f);
+ TriangleIntersectionResult result =
+ metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
+ launch_params_metal, payload, object, prim, 0.0f);
if (result.accept) {
payload.prim = prim;
payload.type = kernel_data_fetch(objects, object).primitive_type;
@@ -388,8 +398,7 @@ __anyhit__cycles_metalrt_visibility_test_tri(constant KernelParamsMetal &launch_
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
{
/* Unused function */
@@ -400,19 +409,21 @@ __anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance
return result;
}
+/* Primitive intersection functions. */
+
#ifdef __HAIR__
-ccl_device_inline
-void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_curve(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
@@ -421,25 +432,16 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
}
# endif
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.curve_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, isect.u);
+ launch_params_metal, payload, object, prim, isect.u);
if (result.accept) {
- result.distance = isect.t / len;
+ result.distance = isect.t;
payload.u = isect.u;
payload.v = isect.v;
payload.prim = prim;
@@ -448,54 +450,41 @@ void metalrt_intersection_curve(constant KernelParamsMetal &launch_params_metal,
}
}
-ccl_device_inline
-void metalrt_intersection_curve_shadow(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_curve_shadow(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
const uint visibility = payload.visibility;
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.curve_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.curve_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
+ launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
result.accept = !result.continue_search;
-
- if (result.accept) {
- result.distance = isect.t / len;
- }
}
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
+ [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
const float ray_tmin [[min_distance]],
const float ray_tmax [[max_distance]])
{
@@ -508,28 +497,36 @@ __intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[b
result.distance = ray_tmax;
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
- metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
}
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_ribbon_shadow(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
@@ -540,57 +537,73 @@ __intersection__curve_ribbon_shadow(constant KernelParamsMetal &launch_params_me
result.distance = ray_tmax;
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
- metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve_shadow(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
- payload.time,
+ payload.time,
# else
- 0.0f,
+ 0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
}
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
+ [[payload]],
const uint object [[user_instance_id]],
const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
const float ray_tmin [[min_distance]],
const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
-
+
BoundingBoxIntersectionResult result;
result.accept = false;
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_curve(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
-__intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
+__intersection__curve_all_shadow(
+ constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_P [[origin]],
+ const float3 ray_D [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
@@ -600,31 +613,39 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_curve_shadow(launch_params_metal, payload, object, segment.prim, segment.type, ray_origin, ray_direction,
+ metalrt_intersection_curve_shadow(launch_params_metal,
+ payload,
+ object,
+ segment.prim,
+ segment.type,
+ ray_P,
+ ray_D,
# if defined(__METALRT_MOTION__)
- payload.time,
+ payload.time,
# else
- 0.0f,
+ 0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
#endif /* __HAIR__ */
#ifdef __POINTCLOUD__
-ccl_device_inline
-void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_point(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
# ifdef __VISIBILITY_FLAG__
const uint visibility = payload.visibility;
@@ -633,25 +654,16 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
}
# endif
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the point intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.point_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, isect.u);
+ launch_params_metal, payload, object, prim, isect.u);
if (result.accept) {
- result.distance = isect.t / len;
+ result.distance = isect.t;
payload.u = isect.u;
payload.v = isect.v;
payload.prim = prim;
@@ -660,56 +672,46 @@ void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal,
}
}
-ccl_device_inline
-void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal,
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
- const uint object,
- const uint prim,
- const uint type,
- const float3 ray_origin,
- const float3 ray_direction,
- float time,
- const float ray_tmin,
- const float ray_tmax,
- thread BoundingBoxIntersectionResult &result)
+ccl_device_inline void metalrt_intersection_point_shadow(
+ constant KernelParamsMetal &launch_params_metal,
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
+ const uint object,
+ const uint prim,
+ const uint type,
+ const float3 ray_P,
+ const float3 ray_D,
+ float time,
+ const float ray_tmin,
+ const float ray_tmax,
+ thread BoundingBoxIntersectionResult &result)
{
const uint visibility = payload.visibility;
- float3 P = ray_origin;
- float3 dir = ray_direction;
-
- /* The direction is not normalized by default, but the point intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
Intersection isect;
isect.t = ray_tmax;
- /* Transform maximum distance into object space */
- if (isect.t != FLT_MAX)
- isect.t *= len;
MetalKernelContext context(launch_params_metal);
- if (context.point_intersect(NULL, &isect, P, dir, ray_tmin, isect.t, object, prim, time, type)) {
+ if (context.point_intersect(
+ NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
- launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
+ launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
result.accept = !result.continue_search;
if (result.accept) {
- result.distance = isect.t / len;
+ result.distance = isect.t;
}
}
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+ ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const int type = kernel_data_fetch(objects, object).primitive_type;
@@ -719,27 +721,35 @@ __intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
+ metalrt_intersection_point(launch_params_metal,
+ payload,
+ object,
+ prim,
+ type,
+ ray_origin,
+ ray_direction,
# if defined(__METALRT_MOTION__)
payload.time,
# else
0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
-[[intersection(bounding_box, triangle_data, METALRT_TAGS)]]
-BoundingBoxIntersectionResult
+[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
- ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
- const uint object [[user_instance_id]],
- const uint primitive_id [[primitive_id]],
- const float3 ray_origin [[origin]],
- const float3 ray_direction [[direction]],
- const float ray_tmin [[min_distance]],
- const float ray_tmax [[max_distance]])
+ ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload
+ [[payload]],
+ const uint object [[user_instance_id]],
+ const uint primitive_id [[primitive_id]],
+ const float3 ray_origin [[origin]],
+ const float3 ray_direction [[direction]],
+ const float ray_tmin [[min_distance]],
+ const float ray_tmax [[max_distance]])
{
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
const int type = kernel_data_fetch(objects, object).primitive_type;
@@ -749,13 +759,21 @@ __intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[b
result.continue_search = true;
result.distance = ray_tmax;
- metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction,
+ metalrt_intersection_point_shadow(launch_params_metal,
+ payload,
+ object,
+ prim,
+ type,
+ ray_origin,
+ ray_direction,
# if defined(__METALRT_MOTION__)
- payload.time,
+ payload.time,
# else
- 0.0f,
+ 0.0f,
# endif
- ray_tmin, ray_tmax, result);
+ ray_tmin,
+ ray_tmax,
+ result);
return result;
}
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index 1b25259bcf5..dfaec65130c 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -10,6 +10,7 @@
#define CCL_NAMESPACE_END
#include <cstdint>
+#include <math.h>
#ifndef __NODES_MAX_GROUP__
# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
@@ -30,7 +31,7 @@
#define ccl_global
#define ccl_always_inline __attribute__((always_inline))
#define ccl_device_inline inline
-#define ccl_noinline
+#define ccl_noinline __attribute__((noinline))
#define ccl_inline_constant const constexpr
#define ccl_static_constant const
#define ccl_device_forceinline __attribute__((always_inline))
@@ -54,18 +55,6 @@
#define ccl_gpu_kernel(block_num_threads, thread_num_registers)
#define ccl_gpu_kernel_threads(block_num_threads)
-#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
-# define KG_ND_ITEMS \
- kg->nd_item_local_id_0 = item.get_local_id(0); \
- kg->nd_item_local_range_0 = item.get_local_range(0); \
- kg->nd_item_group_0 = item.get_group(0); \
- kg->nd_item_group_range_0 = item.get_group_range(0); \
- kg->nd_item_global_id_0 = item.get_global_id(0); \
- kg->nd_item_global_range_0 = item.get_global_range(0);
-#else
-# define KG_ND_ITEMS
-#endif
-
#define ccl_gpu_kernel_signature(name, ...) \
void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
size_t kernel_global_size, \
@@ -75,8 +64,7 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
(kg); \
cgh.parallel_for<class kernel_##name>( \
sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
- [=](sycl::nd_item<1> item) { \
- KG_ND_ITEMS
+ [=](sycl::nd_item<1> item) {
#define ccl_gpu_kernel_postfix \
}); \
@@ -94,31 +82,17 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
} ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg)
/* GPU thread, block, grid size and index */
-#ifndef WITH_ONEAPI_SYCL_HOST_ENABLED
-# define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
-# define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
-# define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
-# define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
-# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-# define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
-# define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
-#else
-# define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0)
-# define ccl_gpu_block_dim_x (kg->nd_item_local_range_0)
-# define ccl_gpu_block_idx_x (kg->nd_item_group_0)
-# define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0)
-# define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-# define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
-
-# define ccl_gpu_global_id_x() (kg->nd_item_global_id_0)
-# define ccl_gpu_global_size_x() (kg->nd_item_global_range_0)
-#endif
+#define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
+#define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
+#define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
+#define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
+#define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
+#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
+#define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
+#define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
/* GPU warp synchronization */
-
#define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
#define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
#ifdef __SYCL_DEVICE_ONLY__
@@ -149,25 +123,13 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
/* clang-format on */
/* Types */
+
/* It's not possible to use sycl types like sycl::float3, sycl::int3, etc
- * because these types have different interfaces from blender version */
+ * because these types have different interfaces from blender version. */
using uchar = unsigned char;
using sycl::half;
-struct float3 {
- float x, y, z;
-};
-
-ccl_always_inline float3 make_float3(float x, float y, float z)
-{
- return {x, y, z};
-}
-ccl_always_inline float3 make_float3(float x)
-{
- return {x, x, x};
-}
-
/* math functions */
#define fabsf(x) sycl::fabs((x))
#define copysignf(x, y) sycl::copysign((x), (y))
@@ -186,21 +148,15 @@ ccl_always_inline float3 make_float3(float x)
#define fmodf(x, y) sycl::fmod((x), (y))
#define lgammaf(x) sycl::lgamma((x))
-#define __forceinline __attribute__((always_inline))
-
-/* Types */
-#include "util/half.h"
-#include "util/types.h"
-
-/* NOTE(@nsirgien): Declaring these functions after types headers is very important because they
- * include oneAPI headers, which transitively include math.h headers which will cause redefinitions
- * of the math defines because math.h also uses them and having them defined before math.h include
- * is actually UB. */
-/* Use fast math functions - get them from sycl::native namespace for native math function
- * implementations */
#define cosf(x) sycl::native::cos(((float)(x)))
#define sinf(x) sycl::native::sin(((float)(x)))
#define powf(x, y) sycl::native::powr(((float)(x)), ((float)(y)))
#define tanf(x) sycl::native::tan(((float)(x)))
#define logf(x) sycl::native::log(((float)(x)))
#define expf(x) sycl::native::exp(((float)(x)))
+
+#define __forceinline __attribute__((always_inline))
+
+/* Types */
+#include "util/half.h"
+#include "util/types.h"
diff --git a/intern/cycles/kernel/device/oneapi/dll_interface_template.h b/intern/cycles/kernel/device/oneapi/dll_interface_template.h
deleted file mode 100644
index 662068c0fed..00000000000
--- a/intern/cycles/kernel/device/oneapi/dll_interface_template.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2022 Intel Corporation */
-
-/* device_capabilities() returns a C string that must be free'd with oneapi_free(). */
-DLL_INTERFACE_CALL(oneapi_device_capabilities, char *)
-DLL_INTERFACE_CALL(oneapi_free, void, void *)
-DLL_INTERFACE_CALL(oneapi_get_memcapacity, size_t, SyclQueue *queue)
-
-DLL_INTERFACE_CALL(oneapi_get_compute_units_amount, size_t, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_iterate_devices, void, OneAPIDeviceIteratorCallback cb, void *user_ptr)
-DLL_INTERFACE_CALL(oneapi_set_error_cb, void, OneAPIErrorCallback, void *user_ptr)
-
-DLL_INTERFACE_CALL(oneapi_create_queue, bool, SyclQueue *&external_queue, int device_index)
-DLL_INTERFACE_CALL(oneapi_free_queue, void, SyclQueue *queue)
-DLL_INTERFACE_CALL(
- oneapi_usm_aligned_alloc_host, void *, SyclQueue *queue, size_t memory_size, size_t alignment)
-DLL_INTERFACE_CALL(oneapi_usm_alloc_device, void *, SyclQueue *queue, size_t memory_size)
-DLL_INTERFACE_CALL(oneapi_usm_free, void, SyclQueue *queue, void *usm_ptr)
-
-DLL_INTERFACE_CALL(
- oneapi_usm_memcpy, bool, SyclQueue *queue, void *dest, void *src, size_t num_bytes)
-DLL_INTERFACE_CALL(oneapi_queue_synchronize, bool, SyclQueue *queue)
-DLL_INTERFACE_CALL(oneapi_usm_memset,
- bool,
- SyclQueue *queue,
- void *usm_ptr,
- unsigned char value,
- size_t num_bytes)
-
-DLL_INTERFACE_CALL(oneapi_run_test_kernel, bool, SyclQueue *queue)
-
-/* Operation with Kernel globals structure - map of global/constant allocation - filled before
- * render/kernel execution As we don't know in cycles `sizeof` this - Cycles will manage just as
- * pointer. */
-DLL_INTERFACE_CALL(oneapi_kernel_globals_size, bool, SyclQueue *queue, size_t &kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_set_global_memory,
- void,
- SyclQueue *queue,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-
-DLL_INTERFACE_CALL(oneapi_kernel_preferred_local_size,
- size_t,
- SyclQueue *queue,
- const DeviceKernel kernel,
- const size_t kernel_global_size)
-DLL_INTERFACE_CALL(oneapi_enqueue_kernel,
- bool,
- KernelContext *context,
- int kernel,
- size_t global_size,
- void **args)
diff --git a/intern/cycles/kernel/device/oneapi/globals.h b/intern/cycles/kernel/device/oneapi/globals.h
index d60f4f135ba..116620eb725 100644
--- a/intern/cycles/kernel/device/oneapi/globals.h
+++ b/intern/cycles/kernel/device/oneapi/globals.h
@@ -23,15 +23,6 @@ typedef struct KernelGlobalsGPU {
#undef KERNEL_DATA_ARRAY
IntegratorStateGPU *integrator_state;
const KernelData *__data;
-#ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- size_t nd_item_local_id_0;
- size_t nd_item_local_range_0;
- size_t nd_item_group_0;
- size_t nd_item_group_range_0;
-
- size_t nd_item_global_id_0;
- size_t nd_item_global_range_0;
-#endif
} KernelGlobalsGPU;
typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
diff --git a/intern/cycles/kernel/device/oneapi/image.h b/intern/cycles/kernel/device/oneapi/image.h
index 6681977a675..2417b8eac3b 100644
--- a/intern/cycles/kernel/device/oneapi/image.h
+++ b/intern/cycles/kernel/device/oneapi/image.h
@@ -81,10 +81,15 @@ ccl_device_inline float4 svm_image_texture_read_2d(int id, int x, int y)
x = svm_image_texture_wrap_periodic(x, info.width);
y = svm_image_texture_wrap_periodic(y, info.height);
}
- else {
+ else if (info.extension == EXTENSION_EXTEND) {
x = svm_image_texture_wrap_clamp(x, info.width);
y = svm_image_texture_wrap_clamp(y, info.height);
}
+ else {
+ if (x < 0 || x >= info.width || y < 0 || y >= info.height) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
return svm_image_texture_read(info, x, y, 0);
}
@@ -99,11 +104,16 @@ ccl_device_inline float4 svm_image_texture_read_3d(int id, int x, int y, int z)
y = svm_image_texture_wrap_periodic(y, info.height);
z = svm_image_texture_wrap_periodic(z, info.depth);
}
- else {
+ else if (info.extension == EXTENSION_EXTEND) {
x = svm_image_texture_wrap_clamp(x, info.width);
y = svm_image_texture_wrap_clamp(y, info.height);
z = svm_image_texture_wrap_clamp(z, info.depth);
}
+ else {
+ if (x < 0 || x >= info.width || y < 0 || y >= info.height || z < 0 || z >= info.depth) {
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+ }
return svm_image_texture_read(info, x, y, z);
}
@@ -128,12 +138,6 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals, int id, float x, float
{
const TextureInfo &info = kernel_data_fetch(texture_info, id);
- if (info.extension == EXTENSION_CLIP) {
- if (x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
if (info.interpolation == INTERPOLATION_CLOSEST) {
/* Closest interpolation. */
int ix, iy;
@@ -315,12 +319,6 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals, int id, float3 P, in
}
#endif
else {
- if (info.extension == EXTENSION_CLIP) {
- if (x < 0.0f || y < 0.0f || z < 0.0f || x > 1.0f || y > 1.0f || z > 1.0f) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
- }
-
x *= info.width;
y *= info.height;
z *= info.depth;
diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp
index 300e201600c..525ae288f0c 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@@ -3,208 +3,79 @@
#ifdef WITH_ONEAPI
-/* clang-format off */
# include "kernel.h"
# include <iostream>
# include <map>
# include <set>
-# include <CL/sycl.hpp>
+# include <sycl/sycl.hpp>
# include "kernel/device/oneapi/compat.h"
# include "kernel/device/oneapi/globals.h"
# include "kernel/device/oneapi/kernel_templates.h"
# include "kernel/device/gpu/kernel.h"
-/* clang-format on */
static OneAPIErrorCallback s_error_cb = nullptr;
static void *s_error_user_ptr = nullptr;
-static std::vector<sycl::device> oneapi_available_devices();
-
void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr)
{
s_error_cb = cb;
s_error_user_ptr = user_ptr;
}
-void oneapi_check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_host = false)
-{
-# ifdef _DEBUG
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- sycl::info::device_type device_type =
- queue->get_device().get_info<sycl::info::device::device_type>();
- sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
- (void)usm_type;
- assert(usm_type == sycl::usm::alloc::device ||
- ((device_type == sycl::info::device_type::host ||
- device_type == sycl::info::device_type::is_cpu || allow_host) &&
- usm_type == sycl::usm::alloc::host));
-# endif
-}
-
-bool oneapi_create_queue(SyclQueue *&external_queue, int device_index)
-{
- bool finished_correct = true;
- try {
- std::vector<sycl::device> devices = oneapi_available_devices();
- if (device_index < 0 || device_index >= devices.size()) {
- return false;
- }
- sycl::queue *created_queue = new sycl::queue(devices[device_index],
- sycl::property::queue::in_order());
- external_queue = reinterpret_cast<SyclQueue *>(created_queue);
- }
- catch (sycl::exception const &e) {
- finished_correct = false;
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- }
- return finished_correct;
-}
-
-void oneapi_free_queue(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- delete queue;
-}
-
-void *oneapi_usm_aligned_alloc_host(SyclQueue *queue_, size_t memory_size, size_t alignment)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::aligned_alloc_host(alignment, memory_size, *queue);
-}
-
-void *oneapi_usm_alloc_device(SyclQueue *queue_, size_t memory_size)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- return sycl::malloc_device(memory_size, *queue);
-}
-
-void oneapi_usm_free(SyclQueue *queue_, void *usm_ptr)
+/* NOTE(@nsirgien): Execution of this simple kernel will check basic functionality like
+ * memory allocations, memory transfers and execution of kernel with USM memory. */
+bool oneapi_run_test_kernel(SyclQueue *queue_)
{
assert(queue_);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::free(usm_ptr, *queue);
-}
+ const size_t N = 8;
+ const size_t memory_byte_size = sizeof(int) * N;
-bool oneapi_usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, dest, true);
- oneapi_check_usm(queue_, src, true);
- sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
+ bool is_computation_correct = true;
try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-# else
- sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
- sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
- bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
- src_type == sycl::usm::alloc::device;
- bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
- src_type == sycl::usm::alloc::unknown;
- /* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
- * may not wait until the end of the transfer before using the memory.
- */
- if (from_device_to_host || host_or_device_memop_with_offset)
- mem_event.wait();
- return true;
-# endif
-}
+ int *A_host = (int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
-bool oneapi_usm_memset(SyclQueue *queue_, void *usm_ptr, unsigned char value, size_t num_bytes)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- oneapi_check_usm(queue_, usm_ptr, true);
- sycl::event mem_event = queue->memset(usm_ptr, value, num_bytes);
-# ifdef WITH_CYCLES_DEBUG
- try {
- /* NOTE(@nsirgien) Waiting on memory operation may give more precise error
- * messages. Due to impact on occupancy, it makes sense to enable it only during Cycles debug.
- */
- mem_event.wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
+ for (size_t i = (size_t)0; i < N; i++) {
+ A_host[i] = rand() % 32;
}
- return false;
- }
-# else
- (void)mem_event;
- return true;
-# endif
-}
-bool oneapi_queue_synchronize(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- try {
- queue->wait_and_throw();
- return true;
- }
- catch (sycl::exception const &e) {
- if (s_error_cb) {
- s_error_cb(e.what(), s_error_user_ptr);
- }
- return false;
- }
-}
+ int *A_device = (int *)sycl::malloc_device(memory_byte_size, *queue);
+ int *B_device = (int *)sycl::malloc_device(memory_byte_size, *queue);
-/* NOTE(@nsirgien): Execution of this simple kernel will check basic functionality and
- * also trigger runtime compilation of all existing oneAPI kernels */
-bool oneapi_run_test_kernel(SyclQueue *queue_)
-{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
- size_t N = 8;
- sycl::buffer<float, 1> A(N);
- sycl::buffer<float, 1> B(N);
-
- {
- sycl::host_accessor A_host_acc(A, sycl::write_only);
- for (size_t i = (size_t)0; i < N; i++)
- A_host_acc[i] = rand() % 32;
- }
+ queue->memcpy(A_device, A_host, memory_byte_size);
+ queue->wait_and_throw();
- try {
queue->submit([&](sycl::handler &cgh) {
- sycl::accessor A_acc(A, cgh, sycl::read_only);
- sycl::accessor B_acc(B, cgh, sycl::write_only, sycl::no_init);
-
- cgh.parallel_for(N, [=](sycl::id<1> idx) { B_acc[idx] = A_acc[idx] + idx.get(0); });
+ cgh.parallel_for(N, [=](sycl::id<1> idx) { B_device[idx] = A_device[idx] + idx.get(0); });
});
queue->wait_and_throw();
- sycl::host_accessor A_host_acc(A, sycl::read_only);
- sycl::host_accessor B_host_acc(B, sycl::read_only);
+ int *B_host = (int *)sycl::aligned_alloc_host(16, memory_byte_size, *queue);
+
+ queue->memcpy(B_host, B_device, memory_byte_size);
+ queue->wait_and_throw();
for (size_t i = (size_t)0; i < N; i++) {
- float result = A_host_acc[i] + B_host_acc[i];
- (void)result;
+ const int expected_result = i + A_host[i];
+ if (B_host[i] != expected_result) {
+ is_computation_correct = false;
+ if (s_error_cb) {
+ s_error_cb(("Incorrect result in test kernel execution - expected " +
+ std::to_string(expected_result) + ", got " + std::to_string(B_host[i]))
+ .c_str(),
+ s_error_user_ptr);
+ }
+ }
}
+
+ sycl::free(A_host, *queue);
+ sycl::free(B_host, *queue);
+ sycl::free(A_device, *queue);
+ sycl::free(B_device, *queue);
+ queue->wait_and_throw();
}
catch (sycl::exception const &e) {
if (s_error_cb) {
@@ -213,63 +84,16 @@ bool oneapi_run_test_kernel(SyclQueue *queue_)
return false;
}
- return true;
-}
-
-bool oneapi_kernel_globals_size(SyclQueue *queue_, size_t &kernel_global_size)
-{
- kernel_global_size = sizeof(KernelGlobalsGPU);
-
- return true;
-}
-
-void oneapi_set_global_memory(SyclQueue *queue_,
- void *kernel_globals,
- const char *memory_name,
- void *memory_device_pointer)
-{
- assert(queue_);
- assert(kernel_globals);
- assert(memory_name);
- assert(memory_device_pointer);
- KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals;
- oneapi_check_usm(queue_, memory_device_pointer);
- oneapi_check_usm(queue_, kernel_globals, true);
-
- std::string matched_name(memory_name);
-
-/* This macro will change global ptr of KernelGlobals via name matching. */
-# define KERNEL_DATA_ARRAY(type, name) \
- else if (#name == matched_name) \
- { \
- globals->__##name = (type *)memory_device_pointer; \
- return; \
- }
- if (false) {
- }
- else if ("integrator_state" == matched_name) {
- globals->integrator_state = (IntegratorStateGPU *)memory_device_pointer;
- return;
- }
- KERNEL_DATA_ARRAY(KernelData, data)
-# include "kernel/data_arrays.h"
- else
- {
- std::cerr << "Can't found global/constant memory with name \"" << matched_name << "\"!"
- << std::endl;
- assert(false);
- }
-# undef KERNEL_DATA_ARRAY
+ return is_computation_correct;
}
/* TODO: Move device information to OneapiDevice initialized on creation and use it. */
/* TODO: Move below function to oneapi/queue.cpp. */
-size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
+size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
const DeviceKernel kernel,
const size_t kernel_global_size)
{
- assert(queue_);
- sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+ assert(queue);
(void)kernel_global_size;
const static size_t preferred_work_group_size_intersect_shading = 32;
const static size_t preferred_work_group_size_technical = 1024;
@@ -311,11 +135,63 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue_,
preferred_work_group_size = 512;
}
- const size_t limit_work_group_size =
- queue->get_device().get_info<sycl::info::device::max_work_group_size>();
+ const size_t limit_work_group_size = reinterpret_cast<sycl::queue *>(queue)
+ ->get_device()
+ .get_info<sycl::info::device::max_work_group_size>();
+
return std::min(limit_work_group_size, preferred_work_group_size);
}
+bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
+{
+# ifdef SYCL_SKIP_KERNELS_PRELOAD
+ (void)queue_;
+ (void)requested_features;
+# else
+ assert(queue_);
+ sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+
+ try {
+ sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
+ sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
+ {queue->get_device()});
+
+ for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
+ const std::string &kernel_name = kernel_id.get_name();
+
+ /* NOTE(@nsirgien): Names in this conditions below should match names from
+ * oneapi_call macro in oneapi_enqueue_kernel below */
+ if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
+ kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
+ continue;
+ }
+
+ if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
+ kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
+ continue;
+ }
+
+ if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
+ kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
+ std::string::npos) {
+ continue;
+ }
+
+ sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
+ sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
+ sycl::build(one_kernel_bundle);
+ }
+ }
+ catch (sycl::exception const &e) {
+ if (s_error_cb) {
+ s_error_cb(e.what(), s_error_user_ptr);
+ }
+ return false;
+ }
+# endif
+ return true;
+}
+
bool oneapi_enqueue_kernel(KernelContext *kernel_context,
int kernel,
size_t global_size,
@@ -354,13 +230,6 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
/* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices,
* we extend work size to fit uniformity requirements. */
global_size = groups_count * local_size;
-
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- if (queue->get_device().is_host()) {
- global_size = 1;
- local_size = 1;
- }
-# endif
}
/* Let the compiler throw an error if there are any kernels missing in this implementation. */
@@ -645,13 +514,9 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
/* Unsupported kernels */
case DEVICE_KERNEL_NUM:
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
- assert(0);
- return false;
+ kernel_assert(0);
+ break;
}
-
- /* Unknown kernel. */
- assert(0);
- return false;
});
}
catch (sycl::exception const &e) {
@@ -668,247 +533,4 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
# endif
return success;
}
-
-static const int lowest_supported_driver_version_win = 1011660;
-static const int lowest_supported_driver_version_neo = 23570;
-
-static int parse_driver_build_version(const sycl::device &device)
-{
- const std::string &driver_version = device.get_info<sycl::info::device::driver_version>();
- int driver_build_version = 0;
-
- size_t second_dot_position = driver_version.find('.', driver_version.find('.') + 1);
- if (second_dot_position == std::string::npos) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- else {
- try {
- size_t third_dot_position = driver_version.find('.', second_dot_position + 1);
- if (third_dot_position != std::string::npos) {
- const std::string &third_number_substr = driver_version.substr(
- second_dot_position + 1, third_dot_position - second_dot_position - 1);
- const std::string &forth_number_substr = driver_version.substr(third_dot_position + 1);
- if (third_number_substr.length() == 3 && forth_number_substr.length() == 4)
- driver_build_version = std::stoi(third_number_substr) * 10000 +
- std::stoi(forth_number_substr);
- }
- else {
- const std::string &third_number_substr = driver_version.substr(second_dot_position + 1);
- driver_build_version = std::stoi(third_number_substr);
- }
- }
- catch (std::invalid_argument &e) {
- std::cerr << "Unable to parse unknown Intel GPU driver version \"" << driver_version
- << "\" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0),"
- << " xx.xx.xxx.xxxx (Windows) for device \""
- << device.get_info<sycl::info::device::name>() << "\"." << std::endl;
- }
- }
-
- return driver_build_version;
-}
-
-static std::vector<sycl::device> oneapi_available_devices()
-{
- bool allow_all_devices = false;
- if (getenv("CYCLES_ONEAPI_ALL_DEVICES") != nullptr)
- allow_all_devices = true;
-
- /* Host device is useful only for debugging at the moment
- * so we hide this device with default build settings. */
-# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
- bool allow_host = true;
-# else
- bool allow_host = false;
-# endif
-
- const std::vector<sycl::platform> &oneapi_platforms = sycl::platform::get_platforms();
-
- std::vector<sycl::device> available_devices;
- for (const sycl::platform &platform : oneapi_platforms) {
- /* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL.
- */
- if (platform.get_backend() == sycl::backend::opencl) {
- continue;
- }
-
- const std::vector<sycl::device> &oneapi_devices =
- (allow_all_devices || allow_host) ? platform.get_devices(sycl::info::device_type::all) :
- platform.get_devices(sycl::info::device_type::gpu);
-
- for (const sycl::device &device : oneapi_devices) {
- if (allow_all_devices) {
- /* still filter out host device if build doesn't support it. */
- if (allow_host || !device.is_host()) {
- available_devices.push_back(device);
- }
- }
- else {
- bool filter_out = false;
-
- /* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
- * assuming they have either more than 96 Execution Units or not 7 threads per EU.
- * Official support can be broaden to older and smaller GPUs once ready. */
- if (device.is_gpu() && platform.get_backend() == sycl::backend::ext_oneapi_level_zero) {
- /* Filtered-out defaults in-case these values aren't available through too old L0
- * runtime. */
- int number_of_eus = 96;
- int threads_per_eu = 7;
- if (device.has(sycl::aspect::ext_intel_gpu_eu_count)) {
- number_of_eus = device.get_info<sycl::info::device::ext_intel_gpu_eu_count>();
- }
- if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
- threads_per_eu =
- device.get_info<sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
- }
- /* This filters out all Level-Zero supported GPUs from older generation than Arc. */
- if (number_of_eus <= 96 && threads_per_eu == 7) {
- filter_out = true;
- }
- /* if not already filtered out, check driver version. */
- if (!filter_out) {
- int driver_build_version = parse_driver_build_version(device);
- if ((driver_build_version > 100000 &&
- driver_build_version < lowest_supported_driver_version_win) ||
- (driver_build_version > 0 &&
- driver_build_version < lowest_supported_driver_version_neo)) {
- filter_out = true;
- }
- }
- }
- else if (!allow_host && device.is_host()) {
- filter_out = true;
- }
- else if (!allow_all_devices) {
- filter_out = true;
- }
-
- if (!filter_out) {
- available_devices.push_back(device);
- }
- }
- }
- }
-
- return available_devices;
-}
-
-char *oneapi_device_capabilities()
-{
- std::stringstream capabilities;
-
- const std::vector<sycl::device> &oneapi_devices = oneapi_available_devices();
- for (const sycl::device &device : oneapi_devices) {
- const std::string &name = device.get_info<sycl::info::device::name>();
-
- capabilities << std::string("\t") << name << "\n";
-# define WRITE_ATTR(attribute_name, attribute_variable) \
- capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
- << "\n";
-# define GET_NUM_ATTR(attribute) \
- { \
- size_t attribute = (size_t)device.get_info<sycl::info::device ::attribute>(); \
- capabilities << "\t\tsycl::info::device::" #attribute "\t\t\t" << attribute << "\n"; \
- }
-
- GET_NUM_ATTR(vendor_id)
- GET_NUM_ATTR(max_compute_units)
- GET_NUM_ATTR(max_work_item_dimensions)
-
- sycl::id<3> max_work_item_sizes = device.get_info<sycl::info::device::max_work_item_sizes>();
- WRITE_ATTR("max_work_item_sizes_dim0", ((size_t)max_work_item_sizes.get(0)))
- WRITE_ATTR("max_work_item_sizes_dim1", ((size_t)max_work_item_sizes.get(1)))
- WRITE_ATTR("max_work_item_sizes_dim2", ((size_t)max_work_item_sizes.get(2)))
-
- GET_NUM_ATTR(max_work_group_size)
- GET_NUM_ATTR(max_num_sub_groups)
- GET_NUM_ATTR(sub_group_independent_forward_progress)
-
- GET_NUM_ATTR(preferred_vector_width_char)
- GET_NUM_ATTR(preferred_vector_width_short)
- GET_NUM_ATTR(preferred_vector_width_int)
- GET_NUM_ATTR(preferred_vector_width_long)
- GET_NUM_ATTR(preferred_vector_width_float)
- GET_NUM_ATTR(preferred_vector_width_double)
- GET_NUM_ATTR(preferred_vector_width_half)
-
- GET_NUM_ATTR(native_vector_width_char)
- GET_NUM_ATTR(native_vector_width_short)
- GET_NUM_ATTR(native_vector_width_int)
- GET_NUM_ATTR(native_vector_width_long)
- GET_NUM_ATTR(native_vector_width_float)
- GET_NUM_ATTR(native_vector_width_double)
- GET_NUM_ATTR(native_vector_width_half)
-
- size_t max_clock_frequency =
- (size_t)(device.is_host() ? (size_t)0 :
- device.get_info<sycl::info::device::max_clock_frequency>());
- WRITE_ATTR("max_clock_frequency", max_clock_frequency)
-
- GET_NUM_ATTR(address_bits)
- GET_NUM_ATTR(max_mem_alloc_size)
-
- /* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't
- * supported so we always return false, even if device supports HW texture usage acceleration.
- */
- bool image_support = false;
- WRITE_ATTR("image_support", (size_t)image_support)
-
- GET_NUM_ATTR(max_parameter_size)
- GET_NUM_ATTR(mem_base_addr_align)
- GET_NUM_ATTR(global_mem_size)
- GET_NUM_ATTR(local_mem_size)
- GET_NUM_ATTR(error_correction_support)
- GET_NUM_ATTR(profiling_timer_resolution)
- GET_NUM_ATTR(is_available)
-
-# undef GET_NUM_ATTR
-# undef WRITE_ATTR
- capabilities << "\n";
- }
-
- return ::strdup(capabilities.str().c_str());
-}
-
-void oneapi_free(void *p)
-{
- if (p) {
- ::free(p);
- }
-}
-
-void oneapi_iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr)
-{
- int num = 0;
- std::vector<sycl::device> devices = oneapi_available_devices();
- for (sycl::device &device : devices) {
- const std::string &platform_name =
- device.get_platform().get_info<sycl::info::platform::name>();
- std::string name = device.get_info<sycl::info::device::name>();
- std::string id = "ONEAPI_" + platform_name + "_" + name;
- if (device.has(sycl::aspect::ext_intel_pci_address)) {
- id.append("_" + device.get_info<sycl::info::device::ext_intel_pci_address>());
- }
- (cb)(id.c_str(), name.c_str(), num, user_ptr);
- num++;
- }
-}
-
-size_t oneapi_get_memcapacity(SyclQueue *queue)
-{
- return reinterpret_cast<sycl::queue *>(queue)
- ->get_device()
- .get_info<sycl::info::device::global_mem_size>();
-}
-
-size_t oneapi_get_compute_units_amount(SyclQueue *queue)
-{
- return reinterpret_cast<sycl::queue *>(queue)
- ->get_device()
- .get_info<sycl::info::device::max_compute_units>();
-}
-
#endif /* WITH_ONEAPI */
diff --git a/intern/cycles/kernel/device/oneapi/kernel.h b/intern/cycles/kernel/device/oneapi/kernel.h
index c5f853742ed..2bfc0b89c87 100644
--- a/intern/cycles/kernel/device/oneapi/kernel.h
+++ b/intern/cycles/kernel/device/oneapi/kernel.h
@@ -25,11 +25,6 @@ enum DeviceKernel : int;
class SyclQueue;
-typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
- const char *name,
- int num,
- void *user_ptr);
-
typedef void (*OneAPIErrorCallback)(const char *error, void *user_ptr);
struct KernelContext {
@@ -45,13 +40,17 @@ struct KernelContext {
extern "C" {
# endif
-# define DLL_INTERFACE_CALL(function, return_type, ...) \
- CYCLES_KERNEL_ONEAPI_EXPORT return_type function(__VA_ARGS__);
-# include "kernel/device/oneapi/dll_interface_template.h"
-# undef DLL_INTERFACE_CALL
-
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_run_test_kernel(SyclQueue *queue_);
+CYCLES_KERNEL_ONEAPI_EXPORT void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr);
+CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_kernel_preferred_local_size(
+ SyclQueue *queue, const DeviceKernel kernel, const size_t kernel_global_size);
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
+ int kernel,
+ size_t global_size,
+ void **args);
+CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_load_kernels(SyclQueue *queue,
+ const unsigned int requested_features);
# ifdef __cplusplus
}
# endif
-
#endif /* WITH_ONEAPI */
diff --git a/intern/cycles/kernel/device/oneapi/kernel_templates.h b/intern/cycles/kernel/device/oneapi/kernel_templates.h
index d8964d9b672..0ae925cf748 100644
--- a/intern/cycles/kernel/device/oneapi/kernel_templates.h
+++ b/intern/cycles/kernel/device/oneapi/kernel_templates.h
@@ -80,7 +80,7 @@ void oneapi_call(
(x, ##__VA_ARGS__)
/* This template automatically casts entries in the void **args array to the types requested by the kernel func.
- Since kernel parameters are passed as void ** to the device, this is the closest that we have to type safety. */
+ * Since kernel parameters are passed as void ** to the device, this is the closest that we have to type safety. */
#define oneapi_template(...) \
template<ONEAPI_CALL_FOR(ONEAPI_TYP, __VA_ARGS__)> \
void oneapi_call( \
diff --git a/intern/cycles/kernel/device/optix/bvh.h b/intern/cycles/kernel/device/optix/bvh.h
new file mode 100644
index 00000000000..6d81b44660c
--- /dev/null
+++ b/intern/cycles/kernel/device/optix/bvh.h
@@ -0,0 +1,659 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2021-2022 Blender Foundation */
+
+/* OptiX implementation of ray-scene intersection. */
+
+#pragma once
+
+#include "kernel/bvh/types.h"
+#include "kernel/bvh/util.h"
+
+#define OPTIX_DEFINE_ABI_VERSION_ONLY
+#include <optix_function_table.h>
+
+CCL_NAMESPACE_BEGIN
+
+/* Utilities. */
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
+{
+ return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
+}
+template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
+{
+ return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
+}
+
+template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
+{
+ return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
+}
+
+ccl_device_forceinline int get_object_id()
+{
+#ifdef __OBJECT_MOTION__
+ /* Always get the instance ID from the TLAS
+ * There might be a motion transform node between TLAS and BLAS which does not have one. */
+ return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
+#else
+ return optixGetInstanceId();
+#endif
+}
+
+/* Hit/miss functions. */
+
+extern "C" __global__ void __miss__kernel_optix_miss()
+{
+ /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
+ optixSetPayload_5(PRIMITIVE_NONE);
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_local_hit()
+{
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
+ if (!optixIsTriangleHit()) {
+ /* Ignore curves and points. */
+ return optixIgnoreIntersection();
+ }
+#endif
+
+#ifdef __BVH_LOCAL__
+ const int object = get_object_id();
+ if (object != optixGetPayload_4() /* local_object */) {
+ /* Only intersect with matching object. */
+ return optixIgnoreIntersection();
+ }
+
+ const int prim = optixGetPrimitiveIndex();
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self_local(ray->self, prim)) {
+ return optixIgnoreIntersection();
+ }
+
+ const uint max_hits = optixGetPayload_5();
+ if (max_hits == 0) {
+ /* Special case for when no hit information is requested, just report that something was hit */
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+
+ int hit = 0;
+ uint *const lcg_state = get_payload_ptr_0<uint>();
+ LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
+
+ if (lcg_state) {
+ for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+ if (optixGetRayTmax() == local_isect->hits[i].t) {
+ return optixIgnoreIntersection();
+ }
+ }
+
+ hit = local_isect->num_hits++;
+
+ if (local_isect->num_hits > max_hits) {
+ hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
+ if (hit >= max_hits) {
+ return optixIgnoreIntersection();
+ }
+ }
+ }
+ else {
+ if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
+ /* Record closest intersection only.
+ * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
+ */
+ return optixIgnoreIntersection();
+ }
+
+ local_isect->num_hits = 1;
+ }
+
+ Intersection *isect = &local_isect->hits[hit];
+ isect->t = optixGetRayTmax();
+ isect->prim = prim;
+ isect->object = get_object_id();
+ isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
+
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ isect->u = barycentrics.x;
+ isect->v = barycentrics.y;
+
+ /* Record geometric normal. */
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
+ const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
+ const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
+ const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
+ local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
+
+ /* Continue tracing (without this the trace call would return after the first hit). */
+ optixIgnoreIntersection();
+#endif
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
+{
+#ifdef __SHADOW_RECORD_ALL__
+ int prim = optixGetPrimitiveIndex();
+ const uint object = get_object_id();
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+# endif
+
+ float u = 0.0f, v = 0.0f;
+ int type = 0;
+ if (optixIsTriangleHit()) {
+ /* Triangle. */
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ u = barycentrics.x;
+ v = barycentrics.y;
+ type = kernel_data_fetch(objects, object).primitive_type;
+ }
+# ifdef __HAIR__
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ /* Curve. */
+ u = __uint_as_float(optixGetAttribute_0());
+ v = __uint_as_float(optixGetAttribute_1());
+
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+ type = segment.type;
+ prim = segment.prim;
+
+# if OPTIX_ABI_VERSION < 55
+ /* Filter out curve end-caps. */
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+# endif
+ }
+# endif
+ else {
+ /* Point. */
+ type = kernel_data_fetch(objects, object).primitive_type;
+ u = 0.0f;
+ v = 0.0f;
+ }
+
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self_shadow(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+
+# ifndef __TRANSPARENT_SHADOWS__
+ /* No transparent shadows support compiled in, make opaque. */
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+# else
+ const uint max_hits = optixGetPayload_3();
+ const uint num_hits_packed = optixGetPayload_2();
+ const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
+ const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
+
+ /* If no transparent shadows, all light is blocked and we can stop immediately. */
+ if (num_hits >= max_hits ||
+ !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+
+ /* Always use baked shadow transparency for curves. */
+ if (type & PRIMITIVE_CURVE) {
+ float throughput = __uint_as_float(optixGetPayload_1());
+ throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, type, u);
+ optixSetPayload_1(__float_as_uint(throughput));
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
+
+ if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+ optixSetPayload_5(true);
+ return optixTerminateRay();
+ }
+ else {
+ /* Continue tracing. */
+ optixIgnoreIntersection();
+ return;
+ }
+ }
+
+ /* Record transparent intersection. */
+ optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
+
+ uint record_index = num_recorded_hits;
+
+ const IntegratorShadowState state = optixGetPayload_0();
+
+ const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+ if (record_index >= max_record_hits) {
+ /* If maximum number of hits reached, find a hit to replace. */
+ float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
+ uint max_recorded_hit = 0;
+
+ for (int i = 1; i < max_record_hits; i++) {
+ const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
+ if (isect_t > max_recorded_t) {
+ max_recorded_t = isect_t;
+ max_recorded_hit = i;
+ }
+ }
+
+ if (optixGetRayTmax() >= max_recorded_t) {
+ /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
+ * current hit anymore. */
+ return;
+ }
+
+ record_index = max_recorded_hit;
+ }
+
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
+ INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
+
+ /* Continue tracing. */
+ optixIgnoreIntersection();
+# endif /* __TRANSPARENT_SHADOWS__ */
+#endif /* __SHADOW_RECORD_ALL__ */
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_volume_test()
+{
+#if defined(__HAIR__) || defined(__POINTCLOUD__)
+ if (!optixIsTriangleHit()) {
+ /* Ignore curves. */
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ const uint object = get_object_id();
+#ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
+ return optixIgnoreIntersection();
+ }
+
+ const int prim = optixGetPrimitiveIndex();
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+ if (intersection_skip_self(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+}
+
+extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
+{
+#ifdef __HAIR__
+# if OPTIX_ABI_VERSION < 55
+ if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
+ /* Filter out curve end-caps. */
+ const float u = __uint_as_float(optixGetAttribute_0());
+ if (u == 0.0f || u == 1.0f) {
+ return optixIgnoreIntersection();
+ }
+ }
+# endif
+#endif
+
+ const uint object = get_object_id();
+ const uint visibility = optixGetPayload_4();
+#ifdef __VISIBILITY_FLAG__
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return optixIgnoreIntersection();
+ }
+#endif
+
+ int prim = optixGetPrimitiveIndex();
+ if (optixIsTriangleHit()) {
+ /* Triangle. */
+ }
+#ifdef __HAIR__
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ /* Curve. */
+ prim = kernel_data_fetch(curve_segments, prim).prim;
+ }
+#endif
+
+ ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
+
+ if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ if (intersection_skip_self_shadow(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+ else {
+ /* Shadow ray early termination. */
+ return optixTerminateRay();
+ }
+ }
+ else {
+ if (intersection_skip_self(ray->self, object, prim)) {
+ return optixIgnoreIntersection();
+ }
+ }
+}
+
+extern "C" __global__ void __closesthit__kernel_optix_hit()
+{
+ const int object = get_object_id();
+ const int prim = optixGetPrimitiveIndex();
+
+ optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
+ optixSetPayload_4(object);
+
+ if (optixIsTriangleHit()) {
+ const float2 barycentrics = optixGetTriangleBarycentrics();
+ optixSetPayload_1(__float_as_uint(barycentrics.x));
+ optixSetPayload_2(__float_as_uint(barycentrics.y));
+ optixSetPayload_3(prim);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
+ }
+ else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
+ optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
+ optixSetPayload_2(optixGetAttribute_1());
+ optixSetPayload_3(segment.prim);
+ optixSetPayload_5(segment.type);
+ }
+ else {
+ optixSetPayload_1(0);
+ optixSetPayload_2(0);
+ optixSetPayload_3(prim);
+ optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
+ }
+}
+
+/* Custom primitive intersection functions. */
+
+#ifdef __HAIR__
+ccl_device_inline void optix_intersection_curve(const int prim, const int type)
+{
+ const int object = get_object_id();
+
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ const float3 ray_P = optixGetObjectRayOrigin();
+ const float3 ray_D = optixGetObjectRayDirection();
+ const float ray_tmin = optixGetRayTmin();
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+
+ if (curve_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
+ optixReportIntersection(isect.t,
+ type & PRIMITIVE_ALL,
+ __float_as_int(isect.u), /* Attribute_0 */
+ __float_as_int(isect.v)); /* Attribute_1 */
+ }
+}
+
+extern "C" __global__ void __intersection__curve_ribbon()
+{
+ const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
+ const int prim = segment.prim;
+ const int type = segment.type;
+ if (type & PRIMITIVE_CURVE_RIBBON) {
+ optix_intersection_curve(prim, type);
+ }
+}
+
+#endif
+
+#ifdef __POINTCLOUD__
+extern "C" __global__ void __intersection__point()
+{
+ const int prim = optixGetPrimitiveIndex();
+ const int object = get_object_id();
+ const int type = kernel_data_fetch(objects, object).primitive_type;
+
+# ifdef __VISIBILITY_FLAG__
+ const uint visibility = optixGetPayload_4();
+ if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
+ return;
+ }
+# endif
+
+ const float3 ray_P = optixGetObjectRayOrigin();
+ const float3 ray_D = optixGetObjectRayDirection();
+ const float ray_tmin = optixGetRayTmin();
+
+# ifdef __OBJECT_MOTION__
+ const float time = optixGetRayTime();
+# else
+ const float time = 0.0f;
+# endif
+
+ Intersection isect;
+ isect.t = optixGetRayTmax();
+
+ if (point_intersect(NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
+ static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
+ optixReportIntersection(isect.t, type & PRIMITIVE_ALL);
+ }
+}
+#endif
+
+/* Scene intersection. */
+
+ccl_device_intersect bool scene_intersect(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ const uint visibility,
+ ccl_private Intersection *isect)
+{
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ uint ray_flags = OPTIX_RAY_FLAG_ENFORCE_ANYHIT;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+ else if (visibility & PATH_RAY_SHADOW_OPAQUE) {
+ ray_flags |= OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ ray_flags,
+ 0, /* SBT offset for PG_HITD */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+}
+
+#ifdef __BVH_LOCAL__
+ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private LocalIntersection *local_isect,
+ int local_object,
+ ccl_private uint *lcg_state,
+ int max_hits)
+{
+ uint p0 = pointer_pack_to_uint_0(lcg_state);
+ uint p1 = pointer_pack_to_uint_1(lcg_state);
+ uint p2 = pointer_pack_to_uint_0(local_isect);
+ uint p3 = pointer_pack_to_uint_1(local_isect);
+ uint p4 = local_object;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ /* Is set to zero on miss or if ray is aborted, so can be used as return value. */
+ uint p5 = max_hits;
+
+ if (local_isect) {
+ local_isect->num_hits = 0; /* Initialize hit count to zero. */
+ }
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ 0xFF,
+ /* Need to always call into __anyhit__kernel_optix_local_hit. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 2, /* SBT offset for PG_HITL */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ return p5;
+}
+#endif
+
+#ifdef __SHADOW_RECORD_ALL__
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
+ IntegratorShadowState state,
+ ccl_private const Ray *ray,
+ uint visibility,
+ uint max_hits,
+ ccl_private uint *num_recorded_hits,
+ ccl_private float *throughput)
+{
+ uint p0 = state;
+ uint p1 = __float_as_uint(1.0f); /* Throughput. */
+ uint p2 = 0; /* Number of hits. */
+ uint p3 = max_hits;
+ uint p4 = visibility;
+ uint p5 = false;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ /* Need to always call into __anyhit__kernel_optix_shadow_all_hit. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 1, /* SBT offset for PG_HITS */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ *num_recorded_hits = uint16_unpack_from_uint_0(p2);
+ *throughput = __uint_as_float(p1);
+
+ return p5;
+}
+#endif
+
+#ifdef __VOLUME__
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
+ ccl_private const Ray *ray,
+ ccl_private Intersection *isect,
+ const uint visibility)
+{
+ uint p0 = 0;
+ uint p1 = 0;
+ uint p2 = 0;
+ uint p3 = 0;
+ uint p4 = visibility;
+ uint p5 = PRIMITIVE_NONE;
+ uint p6 = ((uint64_t)ray) & 0xFFFFFFFF;
+ uint p7 = (((uint64_t)ray) >> 32) & 0xFFFFFFFF;
+
+ uint ray_mask = visibility & 0xFF;
+ if (0 == ray_mask && (visibility & ~0xFF) != 0) {
+ ray_mask = 0xFF;
+ }
+
+ optixTrace(intersection_ray_valid(ray) ? kernel_data.device_bvh : 0,
+ ray->P,
+ ray->D,
+ ray->tmin,
+ ray->tmax,
+ ray->time,
+ ray_mask,
+ /* Need to always call into __anyhit__kernel_optix_volume_test. */
+ OPTIX_RAY_FLAG_ENFORCE_ANYHIT,
+ 3, /* SBT offset for PG_HITV */
+ 0,
+ 0,
+ p0,
+ p1,
+ p2,
+ p3,
+ p4,
+ p5,
+ p6,
+ p7);
+
+ isect->t = __uint_as_float(p0);
+ isect->u = __uint_as_float(p1);
+ isect->v = __uint_as_float(p2);
+ isect->prim = p3;
+ isect->object = p4;
+ isect->type = p5;
+
+ return p5 != PRIMITIVE_NONE;
+}
+#endif
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h
index aa4a6321a8b..1a11a533b7e 100644
--- a/intern/cycles/kernel/device/optix/compat.h
+++ b/intern/cycles/kernel/device/optix/compat.h
@@ -8,7 +8,6 @@
#include <optix.h>
#define __KERNEL_GPU__
-#define __KERNEL_GPU_RAYTRACING__
#define __KERNEL_CUDA__ /* OptiX kernels are implicitly CUDA kernels too */
#define __KERNEL_OPTIX__
#define CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu
index 510f7cca5d6..6abb5aeacb9 100644
--- a/intern/cycles/kernel/device/optix/kernel.cu
+++ b/intern/cycles/kernel/device/optix/kernel.cu
@@ -20,34 +20,6 @@
#include "kernel/integrator/intersect_volume_stack.h"
// clang-format on
-#define OPTIX_DEFINE_ABI_VERSION_ONLY
-#include <optix_function_table.h>
-
-template<typename T> ccl_device_forceinline T *get_payload_ptr_0()
-{
- return pointer_unpack_from_uint<T>(optixGetPayload_0(), optixGetPayload_1());
-}
-template<typename T> ccl_device_forceinline T *get_payload_ptr_2()
-{
- return pointer_unpack_from_uint<T>(optixGetPayload_2(), optixGetPayload_3());
-}
-
-template<typename T> ccl_device_forceinline T *get_payload_ptr_6()
-{
- return (T *)(((uint64_t)optixGetPayload_7() << 32) | optixGetPayload_6());
-}
-
-ccl_device_forceinline int get_object_id()
-{
-#ifdef __OBJECT_MOTION__
- /* Always get the instance ID from the TLAS
- * There might be a motion transform node between TLAS and BLAS which does not have one. */
- return optixGetInstanceIdFromHandle(optixGetTransformListHandle(0));
-#else
- return optixGetInstanceId();
-#endif
-}
-
extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_closest()
{
const int global_index = optixGetLaunchIndex().x;
@@ -84,411 +56,3 @@ extern "C" __global__ void __raygen__kernel_optix_integrator_intersect_volume_st
integrator_intersect_volume_stack(nullptr, path_index);
}
-extern "C" __global__ void __miss__kernel_optix_miss()
-{
- /* 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss. */
- optixSetPayload_0(__float_as_uint(optixGetRayTmax()));
- optixSetPayload_5(PRIMITIVE_NONE);
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_local_hit()
-{
-#if defined(__HAIR__) || defined(__POINTCLOUD__)
- if (!optixIsTriangleHit()) {
- /* Ignore curves and points. */
- return optixIgnoreIntersection();
- }
-#endif
-
-#ifdef __BVH_LOCAL__
- const int object = get_object_id();
- if (object != optixGetPayload_4() /* local_object */) {
- /* Only intersect with matching object. */
- return optixIgnoreIntersection();
- }
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self_local(ray->self, prim)) {
- return optixIgnoreIntersection();
- }
-
- const uint max_hits = optixGetPayload_5();
- if (max_hits == 0) {
- /* Special case for when no hit information is requested, just report that something was hit */
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
-
- int hit = 0;
- uint *const lcg_state = get_payload_ptr_0<uint>();
- LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>();
-
- if (lcg_state) {
- for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
- if (optixGetRayTmax() == local_isect->hits[i].t) {
- return optixIgnoreIntersection();
- }
- }
-
- hit = local_isect->num_hits++;
-
- if (local_isect->num_hits > max_hits) {
- hit = lcg_step_uint(lcg_state) % local_isect->num_hits;
- if (hit >= max_hits) {
- return optixIgnoreIntersection();
- }
- }
- }
- else {
- if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) {
- /* Record closest intersection only.
- * Do not terminate ray here, since there is no guarantee about distance ordering in any-hit.
- */
- return optixIgnoreIntersection();
- }
-
- local_isect->num_hits = 1;
- }
-
- Intersection *isect = &local_isect->hits[hit];
- isect->t = optixGetRayTmax();
- isect->prim = prim;
- isect->object = get_object_id();
- isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
-
- const float2 barycentrics = optixGetTriangleBarycentrics();
- isect->u = 1.0f - barycentrics.y - barycentrics.x;
- isect->v = barycentrics.x;
-
- /* Record geometric normal. */
- const uint tri_vindex = kernel_data_fetch(tri_vindex, prim).w;
- const float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0);
- const float3 tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1);
- const float3 tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
- local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
-
- /* Continue tracing (without this the trace call would return after the first hit). */
- optixIgnoreIntersection();
-#endif
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit()
-{
-#ifdef __SHADOW_RECORD_ALL__
- int prim = optixGetPrimitiveIndex();
- const uint object = get_object_id();
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-# endif
-
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self_shadow(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
-
- float u = 0.0f, v = 0.0f;
- int type = 0;
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- u = 1.0f - barycentrics.y - barycentrics.x;
- v = barycentrics.x;
- type = kernel_data_fetch(objects, object).primitive_type;
- }
-# ifdef __HAIR__
- else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
- u = __uint_as_float(optixGetAttribute_0());
- v = __uint_as_float(optixGetAttribute_1());
-
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
- type = segment.type;
- prim = segment.prim;
-
-# if OPTIX_ABI_VERSION < 55
- /* Filter out curve endcaps. */
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
-# endif
- }
-# endif
- else {
- type = kernel_data_fetch(objects, object).primitive_type;
- u = 0.0f;
- v = 0.0f;
- }
-
-# ifndef __TRANSPARENT_SHADOWS__
- /* No transparent shadows support compiled in, make opaque. */
- optixSetPayload_5(true);
- return optixTerminateRay();
-# else
- const uint max_hits = optixGetPayload_3();
- const uint num_hits_packed = optixGetPayload_2();
- const uint num_recorded_hits = uint16_unpack_from_uint_0(num_hits_packed);
- const uint num_hits = uint16_unpack_from_uint_1(num_hits_packed);
-
- /* If no transparent shadows, all light is blocked and we can stop immediately. */
- if (num_hits >= max_hits ||
- !(intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
-
- /* Always use baked shadow transparency for curves. */
- if (type & PRIMITIVE_CURVE) {
- float throughput = __uint_as_float(optixGetPayload_1());
- throughput *= intersection_curve_shadow_transparency(nullptr, object, prim, u);
- optixSetPayload_1(__float_as_uint(throughput));
- optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits, num_hits + 1));
-
- if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
- optixSetPayload_5(true);
- return optixTerminateRay();
- }
- else {
- /* Continue tracing. */
- optixIgnoreIntersection();
- return;
- }
- }
-
- /* Record transparent intersection. */
- optixSetPayload_2(uint16_pack_to_uint(num_recorded_hits + 1, num_hits + 1));
-
- uint record_index = num_recorded_hits;
-
- const IntegratorShadowState state = optixGetPayload_0();
-
- const uint max_record_hits = min(max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
- if (record_index >= max_record_hits) {
- /* If maximum number of hits reached, find a hit to replace. */
- float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
- uint max_recorded_hit = 0;
-
- for (int i = 1; i < max_record_hits; i++) {
- const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
- if (isect_t > max_recorded_t) {
- max_recorded_t = isect_t;
- max_recorded_hit = i;
- }
- }
-
- if (optixGetRayTmax() >= max_recorded_t) {
- /* Accept hit, so that OptiX won't consider any more hits beyond the distance of the
- * current hit anymore. */
- return;
- }
-
- record_index = max_recorded_hit;
- }
-
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = optixGetRayTmax();
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
- INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
-
- /* Continue tracing. */
- optixIgnoreIntersection();
-# endif /* __TRANSPARENT_SHADOWS__ */
-#endif /* __SHADOW_RECORD_ALL__ */
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_volume_test()
-{
-#if defined(__HAIR__) || defined(__POINTCLOUD__)
- if (!optixIsTriangleHit()) {
- /* Ignore curves. */
- return optixIgnoreIntersection();
- }
-#endif
-
- const uint object = get_object_id();
-#ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-#endif
-
- if ((kernel_data_fetch(object_flag, object) & SD_OBJECT_HAS_VOLUME) == 0) {
- return optixIgnoreIntersection();
- }
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
- if (intersection_skip_self(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
-}
-
-extern "C" __global__ void __anyhit__kernel_optix_visibility_test()
-{
-#ifdef __HAIR__
-# if OPTIX_ABI_VERSION < 55
- if (optixGetPrimitiveType() == OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE) {
- /* Filter out curve endcaps. */
- const float u = __uint_as_float(optixGetAttribute_0());
- if (u == 0.0f || u == 1.0f) {
- return optixIgnoreIntersection();
- }
- }
-# endif
-#endif
-
- const uint object = get_object_id();
- const uint visibility = optixGetPayload_4();
-#ifdef __VISIBILITY_FLAG__
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return optixIgnoreIntersection();
- }
-#endif
-
- const int prim = optixGetPrimitiveIndex();
- ccl_private Ray *const ray = get_payload_ptr_6<Ray>();
-
- if (visibility & PATH_RAY_SHADOW_OPAQUE) {
- if (intersection_skip_self_shadow(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
- else {
- /* Shadow ray early termination. */
- return optixTerminateRay();
- }
- }
- else {
- if (intersection_skip_self(ray->self, object, prim)) {
- return optixIgnoreIntersection();
- }
- }
-}
-
-extern "C" __global__ void __closesthit__kernel_optix_hit()
-{
- const int object = get_object_id();
- const int prim = optixGetPrimitiveIndex();
-
- optixSetPayload_0(__float_as_uint(optixGetRayTmax())); /* Intersection distance */
- optixSetPayload_4(object);
-
- if (optixIsTriangleHit()) {
- const float2 barycentrics = optixGetTriangleBarycentrics();
- optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x));
- optixSetPayload_2(__float_as_uint(barycentrics.x));
- optixSetPayload_3(prim);
- optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
- }
- else if ((optixGetHitKind() & (~PRIMITIVE_MOTION)) != PRIMITIVE_POINT) {
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
- optixSetPayload_1(optixGetAttribute_0()); /* Same as 'optixGetCurveParameter()' */
- optixSetPayload_2(optixGetAttribute_1());
- optixSetPayload_3(segment.prim);
- optixSetPayload_5(segment.type);
- }
- else {
- optixSetPayload_1(0);
- optixSetPayload_2(0);
- optixSetPayload_3(prim);
- optixSetPayload_5(kernel_data_fetch(objects, object).primitive_type);
- }
-}
-
-#ifdef __HAIR__
-ccl_device_inline void optix_intersection_curve(const int prim, const int type)
-{
- const int object = get_object_id();
-
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = optixGetObjectRayOrigin();
- float3 dir = optixGetObjectRayDirection();
- float tmin = optixGetRayTmin();
-
- /* The direction is not normalized by default, but the curve intersection routine expects that */
- float len;
- dir = normalize_len(dir, &len);
-
-# ifdef __OBJECT_MOTION__
- const float time = optixGetRayTime();
-# else
- const float time = 0.0f;
-# endif
-
- Intersection isect;
- isect.t = optixGetRayTmax();
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX)
- isect.t *= len;
-
- if (curve_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
- static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
- optixReportIntersection(isect.t / len,
- type & PRIMITIVE_ALL,
- __float_as_int(isect.u), /* Attribute_0 */
- __float_as_int(isect.v)); /* Attribute_1 */
- }
-}
-
-extern "C" __global__ void __intersection__curve_ribbon()
-{
- const KernelCurveSegment segment = kernel_data_fetch(curve_segments, optixGetPrimitiveIndex());
- const int prim = segment.prim;
- const int type = segment.type;
- if (type & PRIMITIVE_CURVE_RIBBON) {
- optix_intersection_curve(prim, type);
- }
-}
-
-#endif
-
-#ifdef __POINTCLOUD__
-extern "C" __global__ void __intersection__point()
-{
- const int prim = optixGetPrimitiveIndex();
- const int object = get_object_id();
- const int type = kernel_data_fetch(objects, object).primitive_type;
-
-# ifdef __VISIBILITY_FLAG__
- const uint visibility = optixGetPayload_4();
- if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
- return;
- }
-# endif
-
- float3 P = optixGetObjectRayOrigin();
- float3 dir = optixGetObjectRayDirection();
- float tmin = optixGetRayTmin();
-
- /* The direction is not normalized by default, the point intersection routine expects that. */
- float len;
- dir = normalize_len(dir, &len);
-
-# ifdef __OBJECT_MOTION__
- const float time = optixGetRayTime();
-# else
- const float time = 0.0f;
-# endif
-
- Intersection isect;
- isect.t = optixGetRayTmax();
- /* Transform maximum distance into object space. */
- if (isect.t != FLT_MAX) {
- isect.t *= len;
- }
-
- if (point_intersect(NULL, &isect, P, dir, tmin, isect.t, object, prim, time, type)) {
- static_assert(PRIMITIVE_ALL < 128, "Values >= 128 are reserved for OptiX internal use");
- optixReportIntersection(isect.t / len, type & PRIMITIVE_ALL);
- }
-}
-#endif
diff --git a/intern/cycles/kernel/film/adaptive_sampling.h b/intern/cycles/kernel/film/adaptive_sampling.h
index 16867c39d99..d28c87747c3 100644
--- a/intern/cycles/kernel/film/adaptive_sampling.h
+++ b/intern/cycles/kernel/film/adaptive_sampling.h
@@ -3,15 +3,15 @@
#pragma once
-#include "kernel/film/write_passes.h"
+#include "kernel/film/write.h"
CCL_NAMESPACE_BEGIN
/* Check whether the pixel has converged and should not be sampled anymore. */
-ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg,
- ConstIntegratorState state,
- ccl_global float *render_buffer)
+ccl_device_forceinline bool film_need_sample_pixel(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_global float *render_buffer)
{
if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
return true;
@@ -28,14 +28,14 @@ ccl_device_forceinline bool kernel_need_sample_pixel(KernelGlobals kg,
/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
-ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg,
- ccl_global float *render_buffer,
- int x,
- int y,
- float threshold,
- bool reset,
- int offset,
- int stride)
+ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int x,
+ int y,
+ float threshold,
+ bool reset,
+ int offset,
+ int stride)
{
kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
kernel_assert(kernel_data.film.pass_sample_count != PASS_UNUSED);
@@ -78,13 +78,13 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(KernelGlobals kg,
/* This is a simple box filter in two passes.
* When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
-ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg,
- ccl_global float *render_buffer,
- int y,
- int start_x,
- int width,
- int offset,
- int stride)
+ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int y,
+ int start_x,
+ int width,
+ int offset,
+ int stride)
{
kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
@@ -111,13 +111,13 @@ ccl_device void kernel_adaptive_sampling_filter_x(KernelGlobals kg,
}
}
-ccl_device void kernel_adaptive_sampling_filter_y(KernelGlobals kg,
- ccl_global float *render_buffer,
- int x,
- int start_y,
- int height,
- int offset,
- int stride)
+ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int x,
+ int start_y,
+ int height,
+ int offset,
+ int stride)
{
kernel_assert(kernel_data.film.pass_adaptive_aux_buffer != PASS_UNUSED);
diff --git a/intern/cycles/kernel/film/aov_passes.h b/intern/cycles/kernel/film/aov_passes.h
new file mode 100644
index 00000000000..3fbb250340f
--- /dev/null
+++ b/intern/cycles/kernel/film/aov_passes.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/film/write.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void film_write_aov_pass_value(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_global float *ccl_restrict render_buffer,
+ const int aov_id,
+ const float value)
+{
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+ film_write_pass_float(buffer + kernel_data.film.pass_aov_value + aov_id, value);
+}
+
+ccl_device_inline void film_write_aov_pass_color(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_global float *ccl_restrict render_buffer,
+ const int aov_id,
+ const float3 color)
+{
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+ film_write_pass_float4(buffer + kernel_data.film.pass_aov_color + aov_id,
+ make_float4(color.x, color.y, color.z, 1.0f));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/id_passes.h b/intern/cycles/kernel/film/cryptomatte_passes.h
index c8317512bb2..4765777e7e2 100644
--- a/intern/cycles/kernel/film/id_passes.h
+++ b/intern/cycles/kernel/film/cryptomatte_passes.h
@@ -8,15 +8,15 @@ CCL_NAMESPACE_BEGIN
/* Element of ID pass stored in the render buffers.
* It is `float2` semantically, but it must be unaligned since the offset of ID passes in the
* render buffers might not meet expected by compiler alignment. */
-typedef struct IDPassBufferElement {
+typedef struct CryptoPassBufferElement {
float x;
float y;
-} IDPassBufferElement;
+} CryptoPassBufferElement;
-ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
- int num_slots,
- float id,
- float weight)
+ccl_device_inline void film_write_cryptomatte_slots(ccl_global float *buffer,
+ int num_slots,
+ float id,
+ float weight)
{
kernel_assert(id != ID_NONE);
if (weight == 0.0f) {
@@ -24,7 +24,7 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
}
for (int slot = 0; slot < num_slots; slot++) {
- ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
+ ccl_global CryptoPassBufferElement *id_buffer = (ccl_global CryptoPassBufferElement *)buffer;
#ifdef __ATOMIC_PASS_WRITE__
/* If the loop reaches an empty slot, the ID isn't in any slot yet - so add it! */
if (id_buffer[slot].x == ID_NONE) {
@@ -60,9 +60,9 @@ ccl_device_inline void kernel_write_id_slots(ccl_global float *buffer,
}
}
-ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_slots)
+ccl_device_inline void film_sort_cryptomatte_slots(ccl_global float *buffer, int num_slots)
{
- ccl_global IDPassBufferElement *id_buffer = (ccl_global IDPassBufferElement *)buffer;
+ ccl_global CryptoPassBufferElement *id_buffer = (ccl_global CryptoPassBufferElement *)buffer;
for (int slot = 1; slot < num_slots; ++slot) {
if (id_buffer[slot].x == ID_NONE) {
return;
@@ -70,7 +70,7 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl
/* Since we're dealing with a tiny number of elements, insertion sort should be fine. */
int i = slot;
while (i > 0 && id_buffer[i].y > id_buffer[i - 1].y) {
- const IDPassBufferElement swap = id_buffer[i];
+ const CryptoPassBufferElement swap = id_buffer[i];
id_buffer[i] = id_buffer[i - 1];
id_buffer[i - 1] = swap;
--i;
@@ -79,15 +79,15 @@ ccl_device_inline void kernel_sort_id_slots(ccl_global float *buffer, int num_sl
}
/* post-sorting for Cryptomatte */
-ccl_device_inline void kernel_cryptomatte_post(KernelGlobals kg,
- ccl_global float *render_buffer,
- int pixel_index)
+ccl_device_inline void film_cryptomatte_post(KernelGlobals kg,
+ ccl_global float *render_buffer,
+ int pixel_index)
{
const int pass_stride = kernel_data.film.pass_stride;
const uint64_t render_buffer_offset = (uint64_t)pixel_index * pass_stride;
ccl_global float *cryptomatte_buffer = render_buffer + render_buffer_offset +
kernel_data.film.pass_cryptomatte;
- kernel_sort_id_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
+ film_sort_cryptomatte_slots(cryptomatte_buffer, 2 * kernel_data.film.cryptomatte_depth);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/data_passes.h b/intern/cycles/kernel/film/data_passes.h
new file mode 100644
index 00000000000..efdf616749f
--- /dev/null
+++ b/intern/cycles/kernel/film/data_passes.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/camera/camera.h"
+
+#include "kernel/film/cryptomatte_passes.h"
+#include "kernel/film/write.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline size_t film_write_cryptomatte_pass(ccl_global float *ccl_restrict buffer,
+ size_t depth,
+ float id,
+ float matte_weight)
+{
+ film_write_cryptomatte_slots(buffer, depth * 2, id, matte_weight);
+ return depth * 4;
+}
+
+ccl_device_inline void film_write_data_passes(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_global float *ccl_restrict render_buffer)
+{
+#ifdef __PASSES__
+ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+
+ if (!(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+ return;
+ }
+
+ const int flag = kernel_data.film.pass_flag;
+
+ if (!(flag & PASS_ANY)) {
+ return;
+ }
+
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+
+ if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
+ if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
+ average(surface_shader_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+ if (INTEGRATOR_STATE(state, path, sample) == 0) {
+ if (flag & PASSMASK(DEPTH)) {
+ const float depth = camera_z_depth(kg, sd->P);
+ film_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+ }
+ if (flag & PASSMASK(OBJECT_ID)) {
+ const float id = object_pass_id(kg, sd->object);
+ film_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+ }
+ if (flag & PASSMASK(MATERIAL_ID)) {
+ const float id = shader_pass_id(kg, sd);
+ film_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+ }
+ if (flag & PASSMASK(POSITION)) {
+ const float3 position = sd->P;
+ film_write_pass_float3(buffer + kernel_data.film.pass_position, position);
+ }
+ }
+
+ if (flag & PASSMASK(NORMAL)) {
+ const float3 normal = surface_shader_average_normal(kg, sd);
+ film_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
+ }
+ if (flag & PASSMASK(ROUGHNESS)) {
+ const float roughness = surface_shader_average_roughness(sd);
+ film_write_pass_float(buffer + kernel_data.film.pass_roughness, roughness);
+ }
+ if (flag & PASSMASK(UV)) {
+ const float3 uv = primitive_uv(kg, sd);
+ film_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
+ }
+ if (flag & PASSMASK(MOTION)) {
+ const float4 speed = primitive_motion_vector(kg, sd);
+ film_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+ film_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
+ }
+
+ INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_SINGLE_PASS_DONE;
+ }
+ }
+
+ if (kernel_data.film.cryptomatte_passes) {
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ const float matte_weight = average(throughput) *
+ (1.0f - average(surface_shader_transparency(kg, sd)));
+ if (matte_weight > 0.0f) {
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ const float id = object_cryptomatte_id(kg, sd->object);
+ cryptomatte_buffer += film_write_cryptomatte_pass(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ const float id = kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).cryptomatte_id;
+ cryptomatte_buffer += film_write_cryptomatte_pass(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ const float id = object_cryptomatte_asset_id(kg, sd->object);
+ cryptomatte_buffer += film_write_cryptomatte_pass(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight);
+ }
+ }
+ }
+
+ if (flag & PASSMASK(DIFFUSE_COLOR)) {
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_diffuse_color,
+ surface_shader_diffuse(kg, sd) * throughput);
+ }
+ if (flag & PASSMASK(GLOSSY_COLOR)) {
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_glossy_color,
+ surface_shader_glossy(kg, sd) * throughput);
+ }
+ if (flag & PASSMASK(TRANSMISSION_COLOR)) {
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_transmission_color,
+ surface_shader_transmission(kg, sd) * throughput);
+ }
+ if (flag & PASSMASK(MIST)) {
+ /* Bring depth into 0..1 range. */
+ const float mist_start = kernel_data.film.mist_start;
+ const float mist_inv_depth = kernel_data.film.mist_inv_depth;
+
+ const float depth = camera_distance(kg, sd->P);
+ float mist = saturatef((depth - mist_start) * mist_inv_depth);
+
+ /* Falloff */
+ const float mist_falloff = kernel_data.film.mist_falloff;
+
+ if (mist_falloff == 1.0f)
+ ;
+ else if (mist_falloff == 2.0f)
+ mist = mist * mist;
+ else if (mist_falloff == 0.5f)
+ mist = sqrtf(mist);
+ else
+ mist = powf(mist, mist_falloff);
+
+ /* Modulate by transparency */
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ const Spectrum alpha = surface_shader_alpha(kg, sd);
+ const float mist_output = (1.0f - mist) * average(throughput * alpha);
+
+ /* Note that the final value in the render buffer we want is 1 - mist_output,
+ * to avoid having to tracking this in the Integrator state we do the negation
+ * after rendering. */
+ film_write_pass_float(buffer + kernel_data.film.pass_mist, mist_output);
+ }
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/denoising_passes.h b/intern/cycles/kernel/film/denoising_passes.h
new file mode 100644
index 00000000000..11672235b06
--- /dev/null
+++ b/intern/cycles/kernel/film/denoising_passes.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/geom/geom.h"
+
+#include "kernel/film/write.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __DENOISING_FEATURES__
+ccl_device_forceinline void film_write_denoising_features_surface(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_global float *ccl_restrict
+ render_buffer)
+{
+ if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_DENOISING_FEATURES)) {
+ return;
+ }
+
+ /* Skip implicitly transparent surfaces. */
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ return;
+ }
+
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+
+ if (kernel_data.film.pass_denoising_depth != PASS_UNUSED) {
+ const Spectrum denoising_feature_throughput = INTEGRATOR_STATE(
+ state, path, denoising_feature_throughput);
+ const float depth = sd->ray_length - INTEGRATOR_STATE(state, ray, tmin);
+ const float denoising_depth = ensure_finite(average(denoising_feature_throughput) * depth);
+ film_write_pass_float(buffer + kernel_data.film.pass_denoising_depth, denoising_depth);
+ }
+
+ float3 normal = zero_float3();
+ Spectrum diffuse_albedo = zero_spectrum();
+ Spectrum specular_albedo = zero_spectrum();
+ float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ continue;
+ }
+
+ /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
+ normal += sc->N * sc->sample_weight;
+ sum_weight += sc->sample_weight;
+
+ Spectrum closure_albedo = sc->weight;
+ /* Closures that include a Fresnel term typically have weights close to 1 even though their
+ * actual contribution is significantly lower.
+ * To account for this, we scale their weight by the average fresnel factor (the same is also
+ * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
+ if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)sc;
+ closure_albedo *= bsdf->extra->fresnel_color;
+ }
+ else if (CLOSURE_IS_BSDF_SHEEN(sc->type)) {
+ ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)sc;
+ closure_albedo *= bsdf->avg_value;
+ }
+ else if (sc->type == CLOSURE_BSDF_HAIR_PRINCIPLED_ID) {
+ closure_albedo *= bsdf_principled_hair_albedo(sc);
+ }
+ else if (sc->type == CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID) {
+ /* BSSRDF already accounts for weight, retro-reflection would double up. */
+ ccl_private const PrincipledDiffuseBsdf *bsdf = (ccl_private const PrincipledDiffuseBsdf *)
+ sc;
+ if (bsdf->components == PRINCIPLED_DIFFUSE_RETRO_REFLECTION) {
+ continue;
+ }
+ }
+
+ if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+ diffuse_albedo += closure_albedo;
+ sum_nonspecular_weight += sc->sample_weight;
+ }
+ else {
+ specular_albedo += closure_albedo;
+ }
+ }
+
+ /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
+ if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
+ if (sum_weight != 0.0f) {
+ normal /= sum_weight;
+ }
+
+ if (kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
+ /* Transform normal into camera space. */
+ const Transform worldtocamera = kernel_data.cam.worldtocamera;
+ normal = transform_direction(&worldtocamera, normal);
+
+ const float3 denoising_normal = ensure_finite(normal);
+ film_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
+ }
+
+ if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+ const Spectrum denoising_feature_throughput = INTEGRATOR_STATE(
+ state, path, denoising_feature_throughput);
+ const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput *
+ diffuse_albedo);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+ }
+
+ INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+ }
+ else {
+ INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) *= specular_albedo;
+ }
+}
+
+ccl_device_forceinline void film_write_denoising_features_volume(KernelGlobals kg,
+ IntegratorState state,
+ const Spectrum albedo,
+ const bool scatter,
+ ccl_global float *ccl_restrict
+ render_buffer)
+{
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+ const Spectrum denoising_feature_throughput = INTEGRATOR_STATE(
+ state, path, denoising_feature_throughput);
+
+ if (scatter && kernel_data.film.pass_denoising_normal != PASS_UNUSED) {
+ /* Assume scatter is sufficiently diffuse to stop writing denoising features. */
+ INTEGRATOR_STATE_WRITE(state, path, flag) &= ~PATH_RAY_DENOISING_FEATURES;
+
+ /* Write view direction as normal. */
+ const float3 denoising_normal = make_float3(0.0f, 0.0f, -1.0f);
+ film_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
+ }
+
+ if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
+ /* Write albedo. */
+ const Spectrum denoising_albedo = ensure_finite(denoising_feature_throughput * albedo);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+ }
+}
+#endif /* __DENOISING_FEATURES__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/film/accumulate.h b/intern/cycles/kernel/film/light_passes.h
index 33c35a68ad0..b45b5305119 100644
--- a/intern/cycles/kernel/film/accumulate.h
+++ b/intern/cycles/kernel/film/light_passes.h
@@ -4,7 +4,7 @@
#pragma once
#include "kernel/film/adaptive_sampling.h"
-#include "kernel/film/write_passes.h"
+#include "kernel/film/write.h"
#include "kernel/integrator/shadow_catcher.h"
@@ -21,10 +21,10 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
const ClosureType closure_type,
- float3 value)
+ Spectrum value)
{
- eval->diffuse = zero_float3();
- eval->glossy = zero_float3();
+ eval->diffuse = zero_spectrum();
+ eval->glossy = zero_spectrum();
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
eval->diffuse = value;
@@ -38,7 +38,7 @@ ccl_device_inline void bsdf_eval_init(ccl_private BsdfEval *eval,
ccl_device_inline void bsdf_eval_accum(ccl_private BsdfEval *eval,
const ClosureType closure_type,
- float3 value)
+ Spectrum value)
{
if (CLOSURE_IS_BSDF_DIFFUSE(closure_type)) {
eval->diffuse += value;
@@ -62,26 +62,26 @@ ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float value)
eval->sum *= value;
}
-ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, float3 value)
+ccl_device_inline void bsdf_eval_mul(ccl_private BsdfEval *eval, Spectrum value)
{
eval->diffuse *= value;
eval->glossy *= value;
eval->sum *= value;
}
-ccl_device_inline float3 bsdf_eval_sum(ccl_private const BsdfEval *eval)
+ccl_device_inline Spectrum bsdf_eval_sum(ccl_private const BsdfEval *eval)
{
return eval->sum;
}
-ccl_device_inline float3 bsdf_eval_pass_diffuse_weight(ccl_private const BsdfEval *eval)
+ccl_device_inline Spectrum bsdf_eval_pass_diffuse_weight(ccl_private const BsdfEval *eval)
{
/* Ratio of diffuse weight to recover proportions for writing to render pass.
* We assume reflection, transmission and volume scatter to be exclusive. */
return safe_divide(eval->diffuse, eval->sum);
}
-ccl_device_inline float3 bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval *eval)
+ccl_device_inline Spectrum bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval *eval)
{
/* Ratio of glossy weight to recover proportions for writing to render pass.
* We assume reflection, transmission and volume scatter to be exclusive. */
@@ -95,7 +95,7 @@ ccl_device_inline float3 bsdf_eval_pass_glossy_weight(ccl_private const BsdfEval
* to render buffers instead of using per-thread memory, and to avoid the
* impact of clamping on other contributions. */
-ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private float3 *L, int bounce)
+ccl_device_forceinline void film_clamp_light(KernelGlobals kg, ccl_private Spectrum *L, int bounce)
{
#ifdef __KERNEL_DEBUG_NAN__
if (!isfinite_safe(*L)) {
@@ -121,55 +121,49 @@ ccl_device_forceinline void kernel_accum_clamp(KernelGlobals kg, ccl_private flo
* Pass accumulation utilities.
*/
-/* Get pointer to pixel in render buffer. */
-ccl_device_forceinline ccl_global float *kernel_accum_pixel_render_buffer(
- KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
-{
- const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
- const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
- kernel_data.film.pass_stride;
- return render_buffer + render_buffer_offset;
-}
-
/* --------------------------------------------------------------------
* Adaptive sampling.
*/
-ccl_device_inline int kernel_accum_sample(KernelGlobals kg,
- ConstIntegratorState state,
- ccl_global float *ccl_restrict render_buffer,
- int sample,
- int sample_offset)
+ccl_device_inline int film_write_sample(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_global float *ccl_restrict render_buffer,
+ int sample,
+ int sample_offset)
{
if (kernel_data.film.pass_sample_count == PASS_UNUSED) {
return sample;
}
- ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
return atomic_fetch_and_add_uint32(
(ccl_global uint *)(buffer) + kernel_data.film.pass_sample_count, 1) +
sample_offset;
}
-ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
- const int sample,
- const float3 contribution,
- ccl_global float *ccl_restrict buffer)
+ccl_device void film_write_adaptive_buffer(KernelGlobals kg,
+ const int sample,
+ const Spectrum contribution,
+ ccl_global float *ccl_restrict buffer)
{
- /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping
- * criteria. This is the heuristic from "A hierarchical automatic stopping condition for Monte
- * Carlo global illumination" except that here it is applied per pixel and not in hierarchical
- * tiles. */
+ /* Adaptive Sampling. Fill the additional buffer with only one half of the samples and
+ * calculate our stopping criteria. This is the heuristic from "A hierarchical automatic
+ * stopping condition for Monte Carlo global illumination" except that here it is applied
+ * per pixel and not in hierarchical tiles. */
if (kernel_data.film.pass_adaptive_aux_buffer == PASS_UNUSED) {
return;
}
- if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
- kernel_write_pass_float4(
- buffer + kernel_data.film.pass_adaptive_aux_buffer,
- make_float4(contribution.x * 2.0f, contribution.y * 2.0f, contribution.z * 2.0f, 0.0f));
+ if (sample_is_class_A(kernel_data.integrator.sampling_pattern, sample)) {
+ const float3 contribution_rgb = spectrum_to_rgb(contribution);
+
+ film_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer,
+ make_float4(contribution_rgb.x * 2.0f,
+ contribution_rgb.y * 2.0f,
+ contribution_rgb.z * 2.0f,
+ 0.0f));
}
}
@@ -184,10 +178,10 @@ ccl_device void kernel_accum_adaptive_buffer(KernelGlobals kg,
* Returns truth if the contribution is fully handled here and is not to be added to the other
* passes (like combined, adaptive sampling). */
-ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg,
- const uint32_t path_flag,
- const float3 contribution,
- ccl_global float *ccl_restrict buffer)
+ccl_device bool film_write_shadow_catcher(KernelGlobals kg,
+ const uint32_t path_flag,
+ const Spectrum contribution,
+ ccl_global float *ccl_restrict buffer)
{
if (!kernel_data.integrator.has_shadow_catcher) {
return false;
@@ -198,7 +192,7 @@ ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg,
/* Matte pass. */
if (kernel_shadow_catcher_is_matte_path(path_flag)) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher_matte, contribution);
/* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
* sampling is based on how noisy the combined pass is as if there were no catchers in the
* scene. */
@@ -206,18 +200,18 @@ ccl_device bool kernel_accum_shadow_catcher(KernelGlobals kg,
/* Shadow catcher pass. */
if (kernel_shadow_catcher_is_object_pass(path_flag)) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution);
return true;
}
return false;
}
-ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg,
- const uint32_t path_flag,
- const float3 contribution,
- const float transparent,
- ccl_global float *ccl_restrict buffer)
+ccl_device bool film_write_shadow_catcher_transparent(KernelGlobals kg,
+ const uint32_t path_flag,
+ const Spectrum contribution,
+ const float transparent,
+ ccl_global float *ccl_restrict buffer)
{
if (!kernel_data.integrator.has_shadow_catcher) {
return false;
@@ -232,9 +226,11 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg,
/* Matte pass. */
if (kernel_shadow_catcher_is_matte_path(path_flag)) {
- kernel_write_pass_float4(
+ const float3 contribution_rgb = spectrum_to_rgb(contribution);
+
+ film_write_pass_float4(
buffer + kernel_data.film.pass_shadow_catcher_matte,
- make_float4(contribution.x, contribution.y, contribution.z, transparent));
+ make_float4(contribution_rgb.x, contribution_rgb.y, contribution_rgb.z, transparent));
/* NOTE: Accumulate the combined pass and to the samples count pass, so that the adaptive
* sampling is based on how noisy the combined pass is as if there were no catchers in the
* scene. */
@@ -245,17 +241,17 @@ ccl_device bool kernel_accum_shadow_catcher_transparent(KernelGlobals kg,
/* NOTE: The transparency of the shadow catcher pass is ignored. It is not needed for the
* calculation and the alpha channel of the pass contains numbers of samples contributed to a
* pixel of the pass. */
- kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow_catcher, contribution);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow_catcher, contribution);
return true;
}
return false;
}
-ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg,
- const uint32_t path_flag,
- const float transparent,
- ccl_global float *ccl_restrict buffer)
+ccl_device void film_write_shadow_catcher_transparent_only(KernelGlobals kg,
+ const uint32_t path_flag,
+ const float transparent,
+ ccl_global float *ccl_restrict buffer)
{
if (!kernel_data.integrator.has_shadow_catcher) {
return;
@@ -265,10 +261,29 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg,
/* Matte pass. */
if (kernel_shadow_catcher_is_matte_path(path_flag)) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
+ film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3, transparent);
}
}
+/* Write shadow catcher passes on a bounce from the shadow catcher object. */
+ccl_device_forceinline void film_write_shadow_catcher_bounce_data(
+ KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+ kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
+ kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
+
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+
+ /* Count sample for the shadow catcher object. */
+ film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
+
+ /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
+ * transparency to the matte. */
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ film_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
+ average(throughput));
+}
+
#endif /* __SHADOW_CATCHER__ */
/* --------------------------------------------------------------------
@@ -276,54 +291,55 @@ ccl_device void kernel_accum_shadow_catcher_transparent_only(KernelGlobals kg,
*/
/* Write combined pass. */
-ccl_device_inline void kernel_accum_combined_pass(KernelGlobals kg,
- const uint32_t path_flag,
- const int sample,
- const float3 contribution,
- ccl_global float *ccl_restrict buffer)
+ccl_device_inline void film_write_combined_pass(KernelGlobals kg,
+ const uint32_t path_flag,
+ const int sample,
+ const Spectrum contribution,
+ ccl_global float *ccl_restrict buffer)
{
#ifdef __SHADOW_CATCHER__
- if (kernel_accum_shadow_catcher(kg, path_flag, contribution, buffer)) {
+ if (film_write_shadow_catcher(kg, path_flag, contribution, buffer)) {
return;
}
#endif
if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_combined, contribution);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_combined, contribution);
}
- kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
+ film_write_adaptive_buffer(kg, sample, contribution, buffer);
}
/* Write combined pass with transparency. */
-ccl_device_inline void kernel_accum_combined_transparent_pass(KernelGlobals kg,
- const uint32_t path_flag,
- const int sample,
- const float3 contribution,
- const float transparent,
- ccl_global float *ccl_restrict
- buffer)
+ccl_device_inline void film_write_combined_transparent_pass(KernelGlobals kg,
+ const uint32_t path_flag,
+ const int sample,
+ const Spectrum contribution,
+ const float transparent,
+ ccl_global float *ccl_restrict buffer)
{
#ifdef __SHADOW_CATCHER__
- if (kernel_accum_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) {
+ if (film_write_shadow_catcher_transparent(kg, path_flag, contribution, transparent, buffer)) {
return;
}
#endif
if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
- kernel_write_pass_float4(
+ const float3 contribution_rgb = spectrum_to_rgb(contribution);
+
+ film_write_pass_float4(
buffer + kernel_data.film.pass_combined,
- make_float4(contribution.x, contribution.y, contribution.z, transparent));
+ make_float4(contribution_rgb.x, contribution_rgb.y, contribution_rgb.z, transparent));
}
- kernel_accum_adaptive_buffer(kg, sample, contribution, buffer);
+ film_write_adaptive_buffer(kg, sample, contribution, buffer);
}
/* Write background or emission to appropriate pass. */
-ccl_device_inline void kernel_accum_emission_or_background_pass(
+ccl_device_inline void film_write_emission_or_background_pass(
KernelGlobals kg,
ConstIntegratorState state,
- float3 contribution,
+ Spectrum contribution,
ccl_global float *ccl_restrict buffer,
const int pass,
const int lightgroup = LIGHTGROUP_NONE)
@@ -340,16 +356,16 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(
# ifdef __DENOISING_FEATURES__
if (path_flag & PATH_RAY_DENOISING_FEATURES) {
if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
- const float3 denoising_feature_throughput = INTEGRATOR_STATE(
+ const Spectrum denoising_feature_throughput = INTEGRATOR_STATE(
state, path, denoising_feature_throughput);
- const float3 denoising_albedo = denoising_feature_throughput * contribution;
- kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
+ const Spectrum denoising_albedo = denoising_feature_throughput * contribution;
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
}
}
# endif /* __DENOISING_FEATURES__ */
if (lightgroup != LIGHTGROUP_NONE && kernel_data.film.pass_lightgroup != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup,
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup,
contribution);
}
@@ -366,15 +382,15 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(
if (path_flag & PATH_RAY_SURFACE_PASS) {
/* Indirectly visible through reflection. */
- const float3 diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight);
- const float3 glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight);
+ const Spectrum diffuse_weight = INTEGRATOR_STATE(state, path, pass_diffuse_weight);
+ const Spectrum glossy_weight = INTEGRATOR_STATE(state, path, pass_glossy_weight);
/* Glossy */
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, path, bounce) == 1) ?
kernel_data.film.pass_glossy_direct :
kernel_data.film.pass_glossy_indirect);
if (glossy_pass_offset != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
+ film_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution);
}
/* Transmission */
@@ -385,8 +401,8 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(
if (transmission_pass_offset != PASS_UNUSED) {
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
* GPU memory. */
- const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
- kernel_write_pass_float3(buffer + transmission_pass_offset,
+ const Spectrum transmission_weight = one_spectrum() - diffuse_weight - glossy_weight;
+ film_write_pass_spectrum(buffer + transmission_pass_offset,
transmission_weight * contribution);
}
@@ -408,19 +424,19 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(
/* Single write call for GPU coherence. */
if (pass_offset != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + pass_offset, contribution);
+ film_write_pass_spectrum(buffer + pass_offset, contribution);
}
#endif /* __PASSES__ */
}
/* Write light contribution to render buffer. */
-ccl_device_inline void kernel_accum_light(KernelGlobals kg,
- ConstIntegratorShadowState state,
- ccl_global float *ccl_restrict render_buffer)
+ccl_device_inline void film_write_direct_light(KernelGlobals kg,
+ ConstIntegratorShadowState state,
+ ccl_global float *ccl_restrict render_buffer)
{
/* The throughput for shadow paths already contains the light shader evaluation. */
- float3 contribution = INTEGRATOR_STATE(state, shadow_path, throughput);
- kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce));
+ Spectrum contribution = INTEGRATOR_STATE(state, shadow_path, throughput);
+ film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, shadow_path, bounce));
const uint32_t render_pixel_index = INTEGRATOR_STATE(state, shadow_path, render_pixel_index);
const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
@@ -433,17 +449,17 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
/* Ambient occlusion. */
if (path_flag & PATH_RAY_SHADOW_FOR_AO) {
if ((kernel_data.kernel_features & KERNEL_FEATURE_AO_PASS) && (path_flag & PATH_RAY_CAMERA)) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, contribution);
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_ao, contribution);
}
if (kernel_data.kernel_features & KERNEL_FEATURE_AO_ADDITIVE) {
- const float3 ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput);
- kernel_accum_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer);
+ const Spectrum ao_weight = INTEGRATOR_STATE(state, shadow_path, unshadowed_throughput);
+ film_write_combined_pass(kg, path_flag, sample, contribution * ao_weight, buffer);
}
return;
}
/* Direct light shadow. */
- kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
+ film_write_combined_pass(kg, path_flag, sample, contribution, buffer);
#ifdef __PASSES__
if (kernel_data.film.light_pass_flag & PASS_ANY) {
@@ -458,7 +474,7 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
/* Write lightgroup pass. LIGHTGROUP_NONE is ~0 so decode from unsigned to signed */
const int lightgroup = (int)(INTEGRATOR_STATE(state, shadow_path, lightgroup)) - 1;
if (lightgroup != LIGHTGROUP_NONE && kernel_data.film.pass_lightgroup != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup,
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_lightgroup + 3 * lightgroup,
contribution);
}
@@ -467,15 +483,15 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
if (path_flag & PATH_RAY_SURFACE_PASS) {
/* Indirectly visible through reflection. */
- const float3 diffuse_weight = INTEGRATOR_STATE(state, shadow_path, pass_diffuse_weight);
- const float3 glossy_weight = INTEGRATOR_STATE(state, shadow_path, pass_glossy_weight);
+ const Spectrum diffuse_weight = INTEGRATOR_STATE(state, shadow_path, pass_diffuse_weight);
+ const Spectrum glossy_weight = INTEGRATOR_STATE(state, shadow_path, pass_glossy_weight);
/* Glossy */
const int glossy_pass_offset = ((INTEGRATOR_STATE(state, shadow_path, bounce) == 0) ?
kernel_data.film.pass_glossy_direct :
kernel_data.film.pass_glossy_indirect);
if (glossy_pass_offset != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + glossy_pass_offset, glossy_weight * contribution);
+ film_write_pass_spectrum(buffer + glossy_pass_offset, glossy_weight * contribution);
}
/* Transmission */
@@ -486,8 +502,8 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
if (transmission_pass_offset != PASS_UNUSED) {
/* Transmission is what remains if not diffuse and glossy, not stored explicitly to save
* GPU memory. */
- const float3 transmission_weight = one_float3() - diffuse_weight - glossy_weight;
- kernel_write_pass_float3(buffer + transmission_pass_offset,
+ const Spectrum transmission_weight = one_spectrum() - diffuse_weight - glossy_weight;
+ film_write_pass_spectrum(buffer + transmission_pass_offset,
transmission_weight * contribution);
}
@@ -508,19 +524,19 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
/* Single write call for GPU coherence. */
if (pass_offset != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + pass_offset, contribution);
+ film_write_pass_spectrum(buffer + pass_offset, contribution);
}
}
/* Write shadow pass. */
if (kernel_data.film.pass_shadow != PASS_UNUSED && (path_flag & PATH_RAY_SHADOW_FOR_LIGHT) &&
(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
- const float3 unshadowed_throughput = INTEGRATOR_STATE(
+ const Spectrum unshadowed_throughput = INTEGRATOR_STATE(
state, shadow_path, unshadowed_throughput);
- const float3 shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
- const float3 shadow = safe_divide(shadowed_throughput, unshadowed_throughput) *
- kernel_data.film.pass_shadow_scale;
- kernel_write_pass_float3(buffer + kernel_data.film.pass_shadow, shadow);
+ const Spectrum shadowed_throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+ const Spectrum shadow = safe_divide(shadowed_throughput, unshadowed_throughput) *
+ kernel_data.film.pass_shadow_scale;
+ film_write_pass_spectrum(buffer + kernel_data.film.pass_shadow, shadow);
}
}
#endif
@@ -531,78 +547,96 @@ ccl_device_inline void kernel_accum_light(KernelGlobals kg,
* Note that we accumulate transparency = 1 - alpha in the render buffer.
* Otherwise we'd have to write alpha on path termination, which happens
* in many places. */
-ccl_device_inline void kernel_accum_transparent(KernelGlobals kg,
- ConstIntegratorState state,
- const uint32_t path_flag,
- const float transparent,
- ccl_global float *ccl_restrict buffer)
+ccl_device_inline void film_write_transparent(KernelGlobals kg,
+ ConstIntegratorState state,
+ const uint32_t path_flag,
+ const float transparent,
+ ccl_global float *ccl_restrict buffer)
{
if (kernel_data.film.light_pass_flag & PASSMASK(COMBINED)) {
- kernel_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
+ film_write_pass_float(buffer + kernel_data.film.pass_combined + 3, transparent);
}
- kernel_accum_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer);
+ film_write_shadow_catcher_transparent_only(kg, path_flag, transparent, buffer);
}
/* Write holdout to render buffer. */
-ccl_device_inline void kernel_accum_holdout(KernelGlobals kg,
- ConstIntegratorState state,
- const uint32_t path_flag,
- const float transparent,
- ccl_global float *ccl_restrict render_buffer)
+ccl_device_inline void film_write_holdout(KernelGlobals kg,
+ ConstIntegratorState state,
+ const uint32_t path_flag,
+ const float transparent,
+ ccl_global float *ccl_restrict render_buffer)
{
- ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
- kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+ film_write_transparent(kg, state, path_flag, transparent, buffer);
}
/* Write background contribution to render buffer.
*
- * Includes transparency, matching kernel_accum_transparent. */
-ccl_device_inline void kernel_accum_background(KernelGlobals kg,
- ConstIntegratorState state,
- const float3 L,
- const float transparent,
- const bool is_transparent_background_ray,
- ccl_global float *ccl_restrict render_buffer)
+ * Includes transparency, matching film_write_transparent. */
+ccl_device_inline void film_write_background(KernelGlobals kg,
+ ConstIntegratorState state,
+ const Spectrum L,
+ const float transparent,
+ const bool is_transparent_background_ray,
+ ccl_global float *ccl_restrict render_buffer)
{
- float3 contribution = float3(INTEGRATOR_STATE(state, path, throughput)) * L;
- kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
+ Spectrum contribution = INTEGRATOR_STATE(state, path, throughput) * L;
+ film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
- ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
if (is_transparent_background_ray) {
- kernel_accum_transparent(kg, state, path_flag, transparent, buffer);
+ film_write_transparent(kg, state, path_flag, transparent, buffer);
}
else {
const int sample = INTEGRATOR_STATE(state, path, sample);
- kernel_accum_combined_transparent_pass(
- kg, path_flag, sample, contribution, transparent, buffer);
- }
- kernel_accum_emission_or_background_pass(kg,
- state,
- contribution,
- buffer,
- kernel_data.film.pass_background,
- kernel_data.background.lightgroup);
+ film_write_combined_transparent_pass(kg, path_flag, sample, contribution, transparent, buffer);
+ }
+ film_write_emission_or_background_pass(kg,
+ state,
+ contribution,
+ buffer,
+ kernel_data.film.pass_background,
+ kernel_data.background.lightgroup);
}
/* Write emission to render buffer. */
-ccl_device_inline void kernel_accum_emission(KernelGlobals kg,
- ConstIntegratorState state,
- const float3 L,
- ccl_global float *ccl_restrict render_buffer,
- const int lightgroup = LIGHTGROUP_NONE)
+ccl_device_inline void film_write_volume_emission(KernelGlobals kg,
+ ConstIntegratorState state,
+ const Spectrum L,
+ ccl_global float *ccl_restrict render_buffer,
+ const int lightgroup = LIGHTGROUP_NONE)
+{
+ Spectrum contribution = L;
+ film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
+
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
+ const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
+ const int sample = INTEGRATOR_STATE(state, path, sample);
+
+ film_write_combined_pass(kg, path_flag, sample, contribution, buffer);
+ film_write_emission_or_background_pass(
+ kg, state, contribution, buffer, kernel_data.film.pass_emission, lightgroup);
+}
+
+ccl_device_inline void film_write_surface_emission(KernelGlobals kg,
+ ConstIntegratorState state,
+ const Spectrum L,
+ const float mis_weight,
+ ccl_global float *ccl_restrict render_buffer,
+ const int lightgroup = LIGHTGROUP_NONE)
{
- float3 contribution = L;
- kernel_accum_clamp(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
+ Spectrum contribution = INTEGRATOR_STATE(state, path, throughput) * L * mis_weight;
+ film_clamp_light(kg, &contribution, INTEGRATOR_STATE(state, path, bounce) - 1);
- ccl_global float *buffer = kernel_accum_pixel_render_buffer(kg, state, render_buffer);
+ ccl_global float *buffer = film_pass_pixel_render_buffer(kg, state, render_buffer);
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const int sample = INTEGRATOR_STATE(state, path, sample);
- kernel_accum_combined_pass(kg, path_flag, sample, contribution, buffer);
- kernel_accum_emission_or_background_pass(
+ film_write_combined_pass(kg, path_flag, sample, contribution, buffer);
+ film_write_emission_or_background_pass(
kg, state, contribution, buffer, kernel_data.film.pass_emission, lightgroup);
}
diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/read.h
index a0236909f4b..108f992e29d 100644
--- a/intern/cycles/kernel/film/read.h
+++ b/intern/cycles/kernel/film/read.h
@@ -1,6 +1,10 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
+/* Functions to retrieving render passes for display or output. Reading from
+ * the raw render buffer and normalizing based on the number of samples,
+ * computing alpha, compositing shadow catchers, etc. */
+
#pragma once
CCL_NAMESPACE_BEGIN
@@ -235,6 +239,21 @@ ccl_device_inline void film_get_pass_pixel_float3(ccl_global const KernelFilmCon
pixel[0] = f.x;
pixel[1] = f.y;
pixel[2] = f.z;
+
+ /* Optional alpha channel. */
+ if (kfilm_convert->num_components >= 4) {
+ if (kfilm_convert->pass_combined != PASS_UNUSED) {
+ float scale, scale_exposure;
+ film_get_scale_and_scale_exposure(kfilm_convert, buffer, &scale, &scale_exposure);
+
+ ccl_global const float *in_combined = buffer + kfilm_convert->pass_combined;
+ const float alpha = in_combined[3] * scale;
+ pixel[3] = film_transparency_to_alpha(alpha);
+ }
+ else {
+ pixel[3] = 1.0f;
+ }
+ }
}
/* --------------------------------------------------------------------
diff --git a/intern/cycles/kernel/film/write_passes.h b/intern/cycles/kernel/film/write.h
index 9148d73518f..c630a522ee3 100644
--- a/intern/cycles/kernel/film/write_passes.h
+++ b/intern/cycles/kernel/film/write.h
@@ -3,13 +3,26 @@
#pragma once
+#include "kernel/util/color.h"
+
#ifdef __KERNEL_GPU__
# define __ATOMIC_PASS_WRITE__
#endif
CCL_NAMESPACE_BEGIN
-ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
+/* Get pointer to pixel in render buffer. */
+ccl_device_forceinline ccl_global float *film_pass_pixel_render_buffer(
+ KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
+{
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ return render_buffer + render_buffer_offset;
+}
+
+/* Write to pixel. */
+ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
{
#ifdef __ATOMIC_PASS_WRITE__
atomic_add_and_fetch_float(buffer, value);
@@ -18,8 +31,7 @@ ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict bu
#endif
}
-ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict buffer,
- float3 value)
+ccl_device_inline void film_write_pass_float3(ccl_global float *ccl_restrict buffer, float3 value)
{
#ifdef __ATOMIC_PASS_WRITE__
ccl_global float *buf_x = buffer + 0;
@@ -36,8 +48,13 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict b
#endif
}
-ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict buffer,
- float4 value)
+ccl_device_inline void film_write_pass_spectrum(ccl_global float *ccl_restrict buffer,
+ Spectrum value)
+{
+ film_write_pass_float3(buffer, spectrum_to_rgb(value));
+}
+
+ccl_device_inline void film_write_pass_float4(ccl_global float *ccl_restrict buffer, float4 value)
{
#ifdef __ATOMIC_PASS_WRITE__
ccl_global float *buf_x = buffer + 0;
diff --git a/intern/cycles/kernel/geom/attribute.h b/intern/cycles/kernel/geom/attribute.h
index 31a9e39d528..3a0ee1b09d1 100644
--- a/intern/cycles/kernel/geom/attribute.h
+++ b/intern/cycles/kernel/geom/attribute.h
@@ -16,14 +16,14 @@ CCL_NAMESPACE_BEGIN
/* Patch index for triangle, -1 if not subdivision triangle */
-ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, ccl_private const ShaderData *sd)
+ccl_device_inline uint subd_triangle_patch(KernelGlobals kg, int prim)
{
- return (sd->prim != PRIM_NONE) ? kernel_data_fetch(tri_patch, sd->prim) : ~0;
+ return (prim != PRIM_NONE) ? kernel_data_fetch(tri_patch, prim) : ~0;
}
-ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, ccl_private const ShaderData *sd)
+ccl_device_inline uint attribute_primitive_type(KernelGlobals kg, int prim, int type)
{
- if ((sd->type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, sd) != ~0) {
+ if ((type & PRIMITIVE_TRIANGLE) && subd_triangle_patch(kg, prim) != ~0) {
return ATTR_PRIM_SUBD;
}
else {
@@ -45,17 +45,16 @@ ccl_device_inline uint object_attribute_map_offset(KernelGlobals kg, int object)
return kernel_data_fetch(objects, object).attribute_map_offset;
}
-ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- uint id)
+ccl_device_inline AttributeDescriptor
+find_attribute(KernelGlobals kg, int object, int prim, int type, uint64_t id)
{
- if (sd->object == OBJECT_NONE) {
+ if (object == OBJECT_NONE) {
return attribute_not_found();
}
/* for SVM, find attribute by unique id */
- uint attr_offset = object_attribute_map_offset(kg, sd->object);
- attr_offset += attribute_primitive_type(kg, sd);
+ uint attr_offset = object_attribute_map_offset(kg, object);
+ attr_offset += attribute_primitive_type(kg, prim, type);
AttributeMap attr_map = kernel_data_fetch(attributes_map, attr_offset);
while (attr_map.id != id) {
@@ -77,7 +76,7 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
AttributeDescriptor desc;
desc.element = (AttributeElement)attr_map.element;
- if (sd->prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
+ if (prim == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH &&
desc.element != ATTR_ELEMENT_VOXEL && desc.element != ATTR_ELEMENT_OBJECT) {
return attribute_not_found();
}
@@ -91,11 +90,16 @@ ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
return desc;
}
+ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ uint64_t id)
+{
+ return find_attribute(kg, sd->object, sd->prim, sd->type, id);
+}
+
/* Transform matrix attribute on meshes */
-ccl_device Transform primitive_attribute_matrix(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- const AttributeDescriptor desc)
+ccl_device Transform primitive_attribute_matrix(KernelGlobals kg, const AttributeDescriptor desc)
{
Transform tfm;
diff --git a/intern/cycles/kernel/geom/curve_intersect.h b/intern/cycles/kernel/geom/curve_intersect.h
index 9770105dd81..97644aacaa8 100644
--- a/intern/cycles/kernel/geom/curve_intersect.h
+++ b/intern/cycles/kernel/geom/curve_intersect.h
@@ -72,7 +72,7 @@ ccl_device_inline float sqr_point_to_line_distance(const float3 PmQ0, const floa
ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
const float3 cylinder_end,
const float cylinder_radius,
- const float3 ray_dir,
+ const float3 ray_D,
ccl_private float2 *t_o,
ccl_private float *u0_o,
ccl_private float3 *Ng0_o,
@@ -82,7 +82,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
/* Calculate quadratic equation to solve. */
const float rl = 1.0f / len(cylinder_end - cylinder_start);
const float3 P0 = cylinder_start, dP = (cylinder_end - cylinder_start) * rl;
- const float3 O = -P0, dO = ray_dir;
+ const float3 O = -P0, dO = ray_D;
const float dOdO = dot(dO, dO);
const float OdO = dot(dO, O);
@@ -123,7 +123,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
/* Calculates u and Ng for near hit. */
{
*u0_o = (t0 * dOz + Oz) * rl;
- const float3 Pr = t0 * ray_dir;
+ const float3 Pr = t0 * ray_D;
const float3 Pl = (*u0_o) * (cylinder_end - cylinder_start) + cylinder_start;
*Ng0_o = Pr - Pl;
}
@@ -131,7 +131,7 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
/* Calculates u and Ng for far hit. */
{
*u1_o = (t1 * dOz + Oz) * rl;
- const float3 Pr = t1 * ray_dir;
+ const float3 Pr = t1 * ray_D;
const float3 Pl = (*u1_o) * (cylinder_end - cylinder_start) + cylinder_start;
*Ng1_o = Pr - Pl;
}
@@ -141,10 +141,10 @@ ccl_device_inline bool cylinder_intersect(const float3 cylinder_start,
return true;
}
-ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_dir)
+ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, const float3 ray_D)
{
const float3 O = -P;
- const float3 D = ray_dir;
+ const float3 D = ray_D;
const float ON = dot(O, N);
const float DN = dot(D, N);
const float min_rcp_input = 1e-18f;
@@ -155,7 +155,7 @@ ccl_device_inline float2 half_plane_intersect(const float3 P, const float3 N, co
return make_float2(lower, upper);
}
-ccl_device bool curve_intersect_iterative(const float3 ray_dir,
+ccl_device bool curve_intersect_iterative(const float3 ray_D,
const float ray_tmin,
ccl_private float *ray_tmax,
const float dt,
@@ -165,7 +165,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
const bool use_backfacing,
ccl_private Intersection *isect)
{
- const float length_ray_dir = len(ray_dir);
+ const float length_ray_D = len(ray_D);
/* Error of curve evaluations is proportional to largest coordinate. */
const float4 box_min = min(min(curve[0], curve[1]), min(curve[2], curve[3]));
@@ -176,9 +176,9 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
const float radius_max = box_max.w;
for (int i = 0; i < CURVE_NUM_JACOBIAN_ITERATIONS; i++) {
- const float3 Q = ray_dir * t;
- const float3 dQdt = ray_dir;
- const float Q_err = 16.0f * FLT_EPSILON * length_ray_dir * t;
+ const float3 Q = ray_D * t;
+ const float3 dQdt = ray_D;
+ const float Q_err = 16.0f * FLT_EPSILON * length_ray_D * t;
const float4 P4 = catmull_rom_basis_eval(curve, u);
const float4 dPdu4 = catmull_rom_basis_derivative(curve, u);
@@ -233,7 +233,7 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
const float3 U = dradiusdu * R + dPdu;
const float3 V = cross(dPdu, R);
const float3 Ng = cross(V, U);
- if (!use_backfacing && dot(ray_dir, Ng) > 0.0f) {
+ if (!use_backfacing && dot(ray_D, Ng) > 0.0f) {
return false;
}
@@ -249,8 +249,8 @@ ccl_device bool curve_intersect_iterative(const float3 ray_dir,
return false;
}
-ccl_device bool curve_intersect_recursive(const float3 ray_orig,
- const float3 ray_dir,
+ccl_device bool curve_intersect_recursive(const float3 ray_P,
+ const float3 ray_D,
const float ray_tmin,
float ray_tmax,
float4 curve[4],
@@ -258,8 +258,8 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
{
/* Move ray closer to make intersection stable. */
const float3 center = float4_to_float3(0.25f * (curve[0] + curve[1] + curve[2] + curve[3]));
- const float dt = dot(center - ray_orig, ray_dir) / dot(ray_dir, ray_dir);
- const float3 ref = ray_orig + ray_dir * dt;
+ const float dt = dot(center - ray_P, ray_D) / dot(ray_D, ray_D);
+ const float3 ref = ray_P + ray_D * dt;
const float4 ref4 = make_float4(ref.x, ref.y, ref.z, 0.0f);
curve[0] -= ref4;
curve[1] -= ref4;
@@ -322,7 +322,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
valid = cylinder_intersect(float4_to_float3(P0),
float4_to_float3(P3),
r_outer,
- ray_dir,
+ ray_D,
&tc_outer,
&u_outer0,
&Ng_outer0,
@@ -335,11 +335,10 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
/* Intersect with cap-planes. */
float2 tp = make_float2(ray_tmin - dt, ray_tmax - dt);
tp = make_float2(max(tp.x, tc_outer.x), min(tp.y, tc_outer.y));
- const float2 h0 = half_plane_intersect(
- float4_to_float3(P0), float4_to_float3(dP0du), ray_dir);
+ const float2 h0 = half_plane_intersect(float4_to_float3(P0), float4_to_float3(dP0du), ray_D);
tp = make_float2(max(tp.x, h0.x), min(tp.y, h0.y));
const float2 h1 = half_plane_intersect(
- float4_to_float3(P3), -float4_to_float3(dP3du), ray_dir);
+ float4_to_float3(P3), -float4_to_float3(dP3du), ray_D);
tp = make_float2(max(tp.x, h1.x), min(tp.y, h1.y));
valid = tp.x <= tp.y;
if (!valid) {
@@ -359,7 +358,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
const bool valid_inner = cylinder_intersect(float4_to_float3(P0),
float4_to_float3(P3),
r_inner,
- ray_dir,
+ ray_D,
&tc_inner,
&u_inner0,
&Ng_inner0,
@@ -369,9 +368,9 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
/* At the unstable area we subdivide deeper. */
# if 0
const bool unstable0 = (!valid_inner) |
- (fabsf(dot(normalize(ray_dir), normalize(Ng_inner0))) < 0.3f);
+ (fabsf(dot(normalize(ray_D), normalize(Ng_inner0))) < 0.3f);
const bool unstable1 = (!valid_inner) |
- (fabsf(dot(normalize(ray_dir), normalize(Ng_inner1))) < 0.3f);
+ (fabsf(dot(normalize(ray_D), normalize(Ng_inner1))) < 0.3f);
# else
/* On the GPU appears to be a little faster if always enabled. */
(void)valid_inner;
@@ -396,7 +395,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
CURVE_NUM_BEZIER_SUBDIVISIONS;
if (depth >= termDepth) {
found |= curve_intersect_iterative(
- ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
+ ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer0, tp0.x, use_backfacing, isect);
}
else {
recurse = true;
@@ -409,7 +408,7 @@ ccl_device bool curve_intersect_recursive(const float3 ray_orig,
CURVE_NUM_BEZIER_SUBDIVISIONS;
if (depth >= termDepth) {
found |= curve_intersect_iterative(
- ray_dir, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
+ ray_D, ray_tmin, &ray_tmax, dt, curve, u_outer1, tp1.y, use_backfacing, isect);
}
else {
recurse = true;
@@ -519,13 +518,16 @@ ccl_device_inline bool ribbon_intersect_quad(const float ray_tmin,
return true;
}
-ccl_device_inline void ribbon_ray_space(const float3 ray_dir, float3 ray_space[3])
+ccl_device_inline void ribbon_ray_space(const float3 ray_D,
+ const float ray_D_invlen,
+ float3 ray_space[3])
{
- const float3 dx0 = make_float3(0, ray_dir.z, -ray_dir.y);
- const float3 dx1 = make_float3(-ray_dir.z, 0, ray_dir.x);
+ const float3 D = ray_D * ray_D_invlen;
+ const float3 dx0 = make_float3(0, D.z, -D.y);
+ const float3 dx1 = make_float3(-D.z, 0, D.x);
ray_space[0] = normalize(dot(dx0, dx0) > dot(dx1, dx1) ? dx0 : dx1);
- ray_space[1] = normalize(cross(ray_dir, ray_space[0]));
- ray_space[2] = ray_dir;
+ ray_space[1] = normalize(cross(D, ray_space[0]));
+ ray_space[2] = D * ray_D_invlen;
}
ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
@@ -537,7 +539,7 @@ ccl_device_inline float4 ribbon_to_ray_space(const float3 ray_space[3],
}
ccl_device_inline bool ribbon_intersect(const float3 ray_org,
- const float3 ray_dir,
+ const float3 ray_D,
const float ray_tmin,
float ray_tmax,
const int N,
@@ -545,8 +547,9 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
ccl_private Intersection *isect)
{
/* Transform control points into ray space. */
+ const float ray_D_invlen = 1.0f / len(ray_D);
float3 ray_space[3];
- ribbon_ray_space(ray_dir, ray_space);
+ ribbon_ray_space(ray_D, ray_D_invlen, ray_space);
curve[0] = ribbon_to_ray_space(ray_space, ray_org, curve[0]);
curve[1] = ribbon_to_ray_space(ray_space, ray_org, curve[1]);
@@ -594,7 +597,7 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
const float avoidance_factor = 2.0f;
if (avoidance_factor != 0.0f) {
float r = mix(p0.w, p1.w, vu);
- valid0 = vt > avoidance_factor * r;
+ valid0 = vt > avoidance_factor * r * ray_D_invlen;
}
if (valid0) {
@@ -619,8 +622,8 @@ ccl_device_inline bool ribbon_intersect(const float3 ray_org,
ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
ccl_private Intersection *isect,
- const float3 P,
- const float3 dir,
+ const float3 ray_P,
+ const float3 ray_D,
const float tmin,
const float tmax,
int object,
@@ -651,7 +654,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
if (type & PRIMITIVE_CURVE_RIBBON) {
/* todo: adaptive number of subdivisions could help performance here. */
const int subdivisions = kernel_data.bvh.curve_subdivisions;
- if (ribbon_intersect(P, dir, tmin, tmax, subdivisions, curve, isect)) {
+ if (ribbon_intersect(ray_P, ray_D, tmin, tmax, subdivisions, curve, isect)) {
isect->prim = prim;
isect->object = object;
isect->type = type;
@@ -661,7 +664,7 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals kg,
return false;
}
else {
- if (curve_intersect_recursive(P, dir, tmin, tmax, curve, isect)) {
+ if (curve_intersect_recursive(ray_P, ray_D, tmin, tmax, curve, isect)) {
isect->prim = prim;
isect->object = object;
isect->type = type;
diff --git a/intern/cycles/kernel/geom/motion_triangle_intersect.h b/intern/cycles/kernel/geom/motion_triangle_intersect.h
index b59c5c43c20..b30ee7258dc 100644
--- a/intern/cycles/kernel/geom/motion_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/motion_triangle_intersect.h
@@ -27,8 +27,8 @@ ccl_device_inline float3 motion_triangle_point_from_uv(KernelGlobals kg,
const float v,
float3 verts[3])
{
- float w = 1.0f - u - v;
- float3 P = u * verts[0] + v * verts[1] + w * verts[2];
+ /* This appears to give slightly better precision than interpolating with w = (1 - u - v). */
+ float3 P = verts[0] + u * (verts[1] - verts[0]) + v * (verts[2] - verts[0]);
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
const Transform tfm = object_get_transform(kg, sd);
diff --git a/intern/cycles/kernel/geom/motion_triangle_shader.h b/intern/cycles/kernel/geom/motion_triangle_shader.h
index 236e737b785..413a61b380a 100644
--- a/intern/cycles/kernel/geom/motion_triangle_shader.h
+++ b/intern/cycles/kernel/geom/motion_triangle_shader.h
@@ -68,8 +68,8 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg,
sd->N = Ng;
/* Compute derivatives of P w.r.t. uv. */
#ifdef __DPDU__
- sd->dPdu = (verts[0] - verts[2]);
- sd->dPdv = (verts[1] - verts[2]);
+ sd->dPdu = (verts[1] - verts[0]);
+ sd->dPdv = (verts[2] - verts[0]);
#endif
/* Compute smooth normal. */
if (sd->shader & SHADER_SMOOTH_NORMAL) {
@@ -89,7 +89,7 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals kg,
float u = sd->u;
float v = sd->v;
float w = 1.0f - u - v;
- sd->N = (u * normals[0] + v * normals[1] + w * normals[2]);
+ sd->N = (w * normals[0] + u * normals[1] + v * normals[2]);
}
}
diff --git a/intern/cycles/kernel/geom/object.h b/intern/cycles/kernel/geom/object.h
index b15f6b5dda5..14ceb636e2e 100644
--- a/intern/cycles/kernel/geom/object.h
+++ b/intern/cycles/kernel/geom/object.h
@@ -86,7 +86,7 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
Transform tfm = object_fetch_transform_motion(kg, object, time);
if (itfm)
- *itfm = transform_quick_inverse(tfm);
+ *itfm = transform_inverse(tfm);
return tfm;
}
@@ -488,127 +488,54 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
/* Transform ray into object space to enter static object in BVH */
-ccl_device_inline float bvh_instance_push(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir)
-{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-
- *P = transform_point(&tfm, ray->P);
-
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
-
- return len;
-}
-
-/* Transform ray to exit static object in BVH. */
-
-ccl_device_inline float bvh_instance_pop(KernelGlobals kg,
+ccl_device_inline void bvh_instance_push(KernelGlobals kg,
int object,
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
- ccl_private float3 *idir,
- float t)
-{
- if (t != FLT_MAX) {
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- t /= len(transform_direction(&tfm, ray->D));
- }
-
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
-
- return t;
-}
-
-/* Same as above, but returns scale factor to apply to multiple intersection distances */
-
-ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir,
- ccl_private float *t_fac)
+ ccl_private float3 *idir)
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
- *t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
+ *P = transform_point(&tfm, ray->P);
+
+ *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D));
*idir = bvh_inverse_direction(*dir);
}
#ifdef __OBJECT_MOTION__
/* Transform ray into object space to enter motion blurred object in BVH */
-ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir,
- ccl_private Transform *itfm)
-{
- object_fetch_transform_motion_test(kg, object, ray->time, itfm);
-
- *P = transform_point(itfm, ray->P);
-
- float len;
- *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
- *idir = bvh_inverse_direction(*dir);
-
- return len;
-}
-
-/* Transform ray to exit motion blurred object in BVH. */
-
-ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg,
+ccl_device_inline void bvh_instance_motion_push(KernelGlobals kg,
int object,
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
- ccl_private float3 *idir,
- float t,
- ccl_private Transform *itfm)
+ ccl_private float3 *idir)
{
- if (t != FLT_MAX) {
- t /= len(transform_direction(itfm, ray->D));
- }
+ Transform tfm;
+ object_fetch_transform_motion_test(kg, object, ray->time, &tfm);
- *P = ray->P;
- *dir = bvh_clamp_direction(ray->D);
- *idir = bvh_inverse_direction(*dir);
+ *P = transform_point(&tfm, ray->P);
- return t;
+ *dir = bvh_clamp_direction(transform_direction(&tfm, ray->D));
+ *idir = bvh_inverse_direction(*dir);
}
-/* Same as above, but returns scale factor to apply to multiple intersection distances */
+#endif
+
+/* Transform ray to exit static object in BVH. */
-ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg,
- int object,
- ccl_private const Ray *ray,
- ccl_private float3 *P,
- ccl_private float3 *dir,
- ccl_private float3 *idir,
- ccl_private float *t_fac,
- ccl_private Transform *itfm)
+ccl_device_inline void bvh_instance_pop(ccl_private const Ray *ray,
+ ccl_private float3 *P,
+ ccl_private float3 *dir,
+ ccl_private float3 *idir)
{
- *t_fac = 1.0f / len(transform_direction(itfm, ray->D));
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
}
-#endif
-
/* TODO: This can be removed when we know if no devices will require explicit
* address space qualifiers for this case. */
diff --git a/intern/cycles/kernel/geom/point_intersect.h b/intern/cycles/kernel/geom/point_intersect.h
index ee5a564947b..15fb814c58d 100644
--- a/intern/cycles/kernel/geom/point_intersect.h
+++ b/intern/cycles/kernel/geom/point_intersect.h
@@ -10,20 +10,20 @@ CCL_NAMESPACE_BEGIN
#ifdef __POINTCLOUD__
ccl_device_forceinline bool point_intersect_test(const float4 point,
- const float3 P,
- const float3 dir,
- const float tmin,
- const float tmax,
+ const float3 ray_P,
+ const float3 ray_D,
+ const float ray_tmin,
+ const float ray_tmax,
ccl_private float *t)
{
const float3 center = float4_to_float3(point);
const float radius = point.w;
- const float rd2 = 1.0f / dot(dir, dir);
+ const float rd2 = 1.0f / dot(ray_D, ray_D);
- const float3 c0 = center - P;
- const float projC0 = dot(c0, dir) * rd2;
- const float3 perp = c0 - projC0 * dir;
+ const float3 c0 = center - ray_P;
+ const float projC0 = dot(c0, ray_D) * rd2;
+ const float3 perp = c0 - projC0 * ray_D;
const float l2 = dot(perp, perp);
const float r2 = radius * radius;
if (!(l2 <= r2)) {
@@ -32,12 +32,12 @@ ccl_device_forceinline bool point_intersect_test(const float4 point,
const float td = sqrt((r2 - l2) * rd2);
const float t_front = projC0 - td;
- const bool valid_front = (tmin <= t_front) & (t_front <= tmax);
+ const bool valid_front = (ray_tmin <= t_front) & (t_front <= ray_tmax);
/* Always back-face culling for now. */
# if 0
const float t_back = projC0 + td;
- const bool valid_back = (tmin <= t_back) & (t_back <= tmax);
+ const bool valid_back = (ray_tmin <= t_back) & (t_back <= ray_tmax);
/* check if there is a first hit */
const bool valid_first = valid_front | valid_back;
@@ -58,10 +58,10 @@ ccl_device_forceinline bool point_intersect_test(const float4 point,
ccl_device_forceinline bool point_intersect(KernelGlobals kg,
ccl_private Intersection *isect,
- const float3 P,
- const float3 dir,
- const float tmin,
- const float tmax,
+ const float3 ray_P,
+ const float3 ray_D,
+ const float ray_tmin,
+ const float ray_tmax,
const int object,
const int prim,
const float time,
@@ -70,7 +70,7 @@ ccl_device_forceinline bool point_intersect(KernelGlobals kg,
const float4 point = (type & PRIMITIVE_MOTION) ? motion_point(kg, object, prim, time) :
kernel_data_fetch(points, prim);
- if (!point_intersect_test(point, P, dir, tmin, tmax, &isect->t)) {
+ if (!point_intersect_test(point, ray_P, ray_D, ray_tmin, ray_tmax, &isect->t)) {
return false;
}
diff --git a/intern/cycles/kernel/geom/primitive.h b/intern/cycles/kernel/geom/primitive.h
index 0f1a3fc11bc..04b04ff5985 100644
--- a/intern/cycles/kernel/geom/primitive.h
+++ b/intern/cycles/kernel/geom/primitive.h
@@ -25,7 +25,7 @@ ccl_device_forceinline float primitive_surface_attribute_float(KernelGlobals kg,
ccl_private float *dy)
{
if (sd->type & PRIMITIVE_TRIANGLE) {
- if (subd_triangle_patch(kg, sd) == ~0)
+ if (subd_triangle_patch(kg, sd->prim) == ~0)
return triangle_attribute_float(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
@@ -56,7 +56,7 @@ ccl_device_forceinline float2 primitive_surface_attribute_float2(KernelGlobals k
ccl_private float2 *dy)
{
if (sd->type & PRIMITIVE_TRIANGLE) {
- if (subd_triangle_patch(kg, sd) == ~0)
+ if (subd_triangle_patch(kg, sd->prim) == ~0)
return triangle_attribute_float2(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float2(kg, sd, desc, dx, dy);
@@ -87,7 +87,7 @@ ccl_device_forceinline float3 primitive_surface_attribute_float3(KernelGlobals k
ccl_private float3 *dy)
{
if (sd->type & PRIMITIVE_TRIANGLE) {
- if (subd_triangle_patch(kg, sd) == ~0)
+ if (subd_triangle_patch(kg, sd->prim) == ~0)
return triangle_attribute_float3(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
@@ -118,7 +118,7 @@ ccl_device_forceinline float4 primitive_surface_attribute_float4(KernelGlobals k
ccl_private float4 *dy)
{
if (sd->type & PRIMITIVE_TRIANGLE) {
- if (subd_triangle_patch(kg, sd) == ~0)
+ if (subd_triangle_patch(kg, sd->prim) == ~0)
return triangle_attribute_float4(kg, sd, desc, dx, dy);
else
return subd_triangle_attribute_float4(kg, sd, desc, dx, dy);
@@ -320,7 +320,7 @@ ccl_device_forceinline float4 primitive_motion_vector(KernelGlobals kg,
#endif
if (sd->type & PRIMITIVE_TRIANGLE) {
/* Triangle */
- if (subd_triangle_patch(kg, sd) == ~0) {
+ if (subd_triangle_patch(kg, sd->prim) == ~0) {
motion_pre = triangle_attribute_float3(kg, sd, desc, NULL, NULL);
desc.offset += numverts;
motion_post = triangle_attribute_float3(kg, sd, desc, NULL, NULL);
diff --git a/intern/cycles/kernel/geom/shader_data.h b/intern/cycles/kernel/geom/shader_data.h
index 99b9289cb4a..b67d19365a3 100644
--- a/intern/cycles/kernel/geom/shader_data.h
+++ b/intern/cycles/kernel/geom/shader_data.h
@@ -7,6 +7,8 @@
#pragma once
+#include "kernel/util/differential.h"
+
CCL_NAMESPACE_BEGIN
/* ShaderData setup from incoming ray */
@@ -18,7 +20,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals kg,
{
if (sd->object_flag & SD_OBJECT_MOTION) {
sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time);
- sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion);
+ sd->ob_itfm_motion = transform_inverse(sd->ob_tfm_motion);
}
}
#endif
@@ -123,9 +125,9 @@ ccl_device_inline void shader_setup_from_ray(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
/* differentials */
- differential_transfer_compact(&sd->dP, ray->dP, ray->D, ray->dD, sd->Ng, sd->ray_length);
- differential_incoming_compact(&sd->dI, ray->D, ray->dD);
- differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
+ sd->dP = differential_transfer_compact(ray->dP, ray->D, ray->dD, sd->ray_length);
+ sd->dI = differential_incoming_compact(ray->dD);
+ differential_dudv_compact(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
#endif
}
@@ -240,8 +242,8 @@ ccl_device_inline void shader_setup_from_sample(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
/* no ray differentials here yet */
- sd->dP = differential3_zero();
- sd->dI = differential3_zero();
+ sd->dP = differential_zero_compact();
+ sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
@@ -348,8 +350,8 @@ ccl_device void shader_setup_from_curve(KernelGlobals kg,
/* No ray differentials currently. */
#ifdef __RAY_DIFFERENTIALS__
- sd->dP = differential3_zero();
- sd->dI = differential3_zero();
+ sd->dP = differential_zero_compact();
+ sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
@@ -391,8 +393,8 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
/* differentials */
- sd->dP = differential3_zero(); /* TODO: ray->dP */
- differential_incoming(&sd->dI, sd->dP);
+ sd->dP = differential_zero_compact(); /* TODO: ray->dP */
+ sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
#endif
@@ -433,8 +435,8 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals kg,
# ifdef __RAY_DIFFERENTIALS__
/* differentials */
- sd->dP = differential3_zero(); /* TODO ray->dD */
- differential_incoming(&sd->dI, sd->dP);
+ sd->dP = differential_zero_compact(); /* TODO ray->dD */
+ sd->dI = differential_zero_compact();
sd->du = differential_zero();
sd->dv = differential_zero();
# endif
diff --git a/intern/cycles/kernel/geom/subd_triangle.h b/intern/cycles/kernel/geom/subd_triangle.h
index 8b73b342e16..784ba377318 100644
--- a/intern/cycles/kernel/geom/subd_triangle.h
+++ b/intern/cycles/kernel/geom/subd_triangle.h
@@ -87,18 +87,18 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
ccl_private float *dx,
ccl_private float *dy)
{
- int patch = subd_triangle_patch(kg, sd);
+ int patch = subd_triangle_patch(kg, sd->prim);
#ifdef __PATCH_EVAL__
if (desc.flags & ATTR_SUBDIVIDED) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float2 dpdu = uv[0] - uv[2];
- float2 dpdv = uv[1] - uv[2];
+ float2 dpdu = uv[1] - uv[0];
+ float2 dpdv = uv[2] - uv[0];
/* p is [s, t] */
- float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
float a, dads, dadt;
a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
@@ -165,12 +165,12 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_CORNER) {
float2 uv[3];
@@ -195,12 +195,12 @@ ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
if (dx)
@@ -226,18 +226,18 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
ccl_private float2 *dx,
ccl_private float2 *dy)
{
- int patch = subd_triangle_patch(kg, sd);
+ int patch = subd_triangle_patch(kg, sd->prim);
#ifdef __PATCH_EVAL__
if (desc.flags & ATTR_SUBDIVIDED) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float2 dpdu = uv[0] - uv[2];
- float2 dpdv = uv[1] - uv[2];
+ float2 dpdu = uv[1] - uv[0];
+ float2 dpdv = uv[2] - uv[0];
/* p is [s, t] */
- float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
float2 a, dads, dadt;
@@ -305,12 +305,12 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_CORNER) {
float2 uv[3];
@@ -337,12 +337,12 @@ ccl_device_noinline float2 subd_triangle_attribute_float2(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
if (dx)
@@ -368,18 +368,18 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
ccl_private float3 *dx,
ccl_private float3 *dy)
{
- int patch = subd_triangle_patch(kg, sd);
+ int patch = subd_triangle_patch(kg, sd->prim);
#ifdef __PATCH_EVAL__
if (desc.flags & ATTR_SUBDIVIDED) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float2 dpdu = uv[0] - uv[2];
- float2 dpdv = uv[1] - uv[2];
+ float2 dpdu = uv[1] - uv[0];
+ float2 dpdv = uv[2] - uv[0];
/* p is [s, t] */
- float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
float3 a, dads, dadt;
a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
@@ -446,12 +446,12 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_CORNER) {
float2 uv[3];
@@ -478,12 +478,12 @@ ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
if (dx)
@@ -509,18 +509,18 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
ccl_private float4 *dx,
ccl_private float4 *dy)
{
- int patch = subd_triangle_patch(kg, sd);
+ int patch = subd_triangle_patch(kg, sd->prim);
#ifdef __PATCH_EVAL__
if (desc.flags & ATTR_SUBDIVIDED) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float2 dpdu = uv[0] - uv[2];
- float2 dpdv = uv[1] - uv[2];
+ float2 dpdu = uv[1] - uv[0];
+ float2 dpdv = uv[2] - uv[0];
/* p is [s, t] */
- float2 p = dpdu * sd->u + dpdv * sd->v + uv[2];
+ float2 p = dpdu * sd->u + dpdv * sd->v + uv[0];
float4 a, dads, dadt;
if (desc.type == NODE_ATTR_RGBA) {
@@ -592,12 +592,12 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
float2 uv[3];
@@ -636,12 +636,12 @@ ccl_device_noinline float4 subd_triangle_attribute_float4(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * a + sd->dv.dx * b - (sd->du.dx + sd->dv.dx) * c;
+ *dx = sd->du.dx * b + sd->dv.dx * c - (sd->du.dx + sd->dv.dx) * a;
if (dy)
- *dy = sd->du.dy * a + sd->dv.dy * b - (sd->du.dy + sd->dv.dy) * c;
+ *dy = sd->du.dy * b + sd->dv.dy * c - (sd->du.dy + sd->dv.dy) * a;
#endif
- return sd->u * a + sd->v * b + (1.0f - sd->u - sd->v) * c;
+ return sd->u * b + sd->v * c + (1.0f - sd->u - sd->v) * a;
}
else if (desc.element == ATTR_ELEMENT_OBJECT || desc.element == ATTR_ELEMENT_MESH) {
if (dx)
diff --git a/intern/cycles/kernel/geom/triangle.h b/intern/cycles/kernel/geom/triangle.h
index 788bfaca7cf..6b9450d59ef 100644
--- a/intern/cycles/kernel/geom/triangle.h
+++ b/intern/cycles/kernel/geom/triangle.h
@@ -45,8 +45,8 @@ ccl_device_inline void triangle_point_normal(KernelGlobals kg,
float3 v1 = kernel_data_fetch(tri_verts, tri_vindex.w + 1);
float3 v2 = kernel_data_fetch(tri_verts, tri_vindex.w + 2);
/* compute point */
- float t = 1.0f - u - v;
- *P = (u * v0 + v * v1 + t * v2);
+ float w = 1.0f - u - v;
+ *P = (w * v0 + u * v1 + v * v2);
/* get object flags */
int object_flag = kernel_data_fetch(object_flag, object);
/* compute normal */
@@ -97,7 +97,7 @@ triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v)
float3 n1 = kernel_data_fetch(tri_vnormal, tri_vindex.y);
float3 n2 = kernel_data_fetch(tri_vnormal, tri_vindex.z);
- float3 N = safe_normalize((1.0f - u - v) * n2 + u * n0 + v * n1);
+ float3 N = safe_normalize((1.0f - u - v) * n0 + u * n1 + v * n2);
return is_zero(N) ? Ng : N;
}
@@ -118,7 +118,7 @@ ccl_device_inline float3 triangle_smooth_normal_unnormalized(
object_inverse_normal_transform(kg, sd, &n2);
}
- float3 N = (1.0f - u - v) * n2 + u * n0 + v * n1;
+ float3 N = (1.0f - u - v) * n0 + u * n1 + v * n2;
return is_zero(N) ? Ng : N;
}
@@ -137,8 +137,8 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals kg,
const float3 p2 = kernel_data_fetch(tri_verts, tri_vindex.w + 2);
/* compute derivatives of P w.r.t. uv */
- *dPdu = (p0 - p2);
- *dPdv = (p1 - p2);
+ *dPdu = (p1 - p0);
+ *dPdv = (p2 - p0);
}
/* Reading attributes on various triangle elements */
@@ -167,12 +167,12 @@ ccl_device float triangle_attribute_float(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
if (dy)
- *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+ *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
#endif
- return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -217,12 +217,12 @@ ccl_device float2 triangle_attribute_float2(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
if (dy)
- *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+ *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
#endif
- return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -267,12 +267,12 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
if (dy)
- *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+ *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
#endif
- return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
}
else {
#ifdef __RAY_DIFFERENTIALS__
@@ -328,12 +328,12 @@ ccl_device float4 triangle_attribute_float4(KernelGlobals kg,
#ifdef __RAY_DIFFERENTIALS__
if (dx)
- *dx = sd->du.dx * f0 + sd->dv.dx * f1 - (sd->du.dx + sd->dv.dx) * f2;
+ *dx = sd->du.dx * f1 + sd->dv.dx * f2 - (sd->du.dx + sd->dv.dx) * f0;
if (dy)
- *dy = sd->du.dy * f0 + sd->dv.dy * f1 - (sd->du.dy + sd->dv.dy) * f2;
+ *dy = sd->du.dy * f1 + sd->dv.dy * f2 - (sd->du.dy + sd->dv.dy) * f0;
#endif
- return sd->u * f0 + sd->v * f1 + (1.0f - sd->u - sd->v) * f2;
+ return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
}
else {
#ifdef __RAY_DIFFERENTIALS__
diff --git a/intern/cycles/kernel/geom/triangle_intersect.h b/intern/cycles/kernel/geom/triangle_intersect.h
index f968e537cfa..847ed22fddd 100644
--- a/intern/cycles/kernel/geom/triangle_intersect.h
+++ b/intern/cycles/kernel/geom/triangle_intersect.h
@@ -145,9 +145,9 @@ ccl_device_inline float3 triangle_point_from_uv(KernelGlobals kg,
const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0),
tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
- float w = 1.0f - u - v;
- float3 P = u * tri_a + v * tri_b + w * tri_c;
+ /* This appears to give slightly better precision than interpolating with w = (1 - u - v). */
+ float3 P = tri_a + u * (tri_b - tri_a) + v * (tri_c - tri_a);
if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
const Transform tfm = object_get_transform(kg, sd);
diff --git a/intern/cycles/kernel/geom/volume.h b/intern/cycles/kernel/geom/volume.h
index 3510a905def..885a420c97f 100644
--- a/intern/cycles/kernel/geom/volume.h
+++ b/intern/cycles/kernel/geom/volume.h
@@ -29,7 +29,7 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals kg,
object_inverse_position_transform(kg, sd, &P);
if (desc.offset != ATTR_STD_NOT_FOUND) {
- Transform tfm = primitive_attribute_matrix(kg, sd, desc);
+ Transform tfm = primitive_attribute_matrix(kg, desc);
P = transform_point(&tfm, P);
}
diff --git a/intern/cycles/kernel/integrator/displacement_shader.h b/intern/cycles/kernel/integrator/displacement_shader.h
new file mode 100644
index 00000000000..839dfe244ac
--- /dev/null
+++ b/intern/cycles/kernel/integrator/displacement_shader.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/* Functions to evaluate displacement shader. */
+
+#pragma once
+
+#ifdef __SVM__
+# include "kernel/svm/svm.h"
+#endif
+#ifdef __OSL__
+# include "kernel/osl/osl.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+template<typename ConstIntegratorGenericState>
+ccl_device void displacement_shader_eval(KernelGlobals kg,
+ ConstIntegratorGenericState state,
+ ccl_private ShaderData *sd)
+{
+ sd->num_closure = 0;
+ sd->num_closure_left = 0;
+
+ /* this will modify sd->P */
+#ifdef __OSL__
+ if (kg->osl) {
+ OSLShader::eval_displacement(kg, state, sd);
+ }
+ else
+#endif
+ {
+#ifdef __SVM__
+ svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>(
+ kg, state, sd, NULL, 0);
+#endif
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/guiding.h b/intern/cycles/kernel/integrator/guiding.h
new file mode 100644
index 00000000000..634bba2a9b4
--- /dev/null
+++ b/intern/cycles/kernel/integrator/guiding.h
@@ -0,0 +1,547 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/film/write.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Utilities. */
+
+#if defined(__PATH_GUIDING__)
+static pgl_vec3f guiding_vec3f(const float3 v)
+{
+ return openpgl::cpp::Vector3(v.x, v.y, v.z);
+}
+
+static pgl_point3f guiding_point3f(const float3 v)
+{
+ return openpgl::cpp::Point3(v.x, v.y, v.z);
+}
+#endif
+
+/* Path recording for guiding. */
+
+/* Record Surface Interactions */
+
+/* Records/Adds a new path segment with the current path vertex on a surface.
+ * If the path is not terminated this call is usually followed by a call of
+ * guiding_record_surface_bounce. */
+ccl_device_forceinline void guiding_record_surface_segment(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+
+ const pgl_vec3f zero = guiding_vec3f(zero_float3());
+ const pgl_vec3f one = guiding_vec3f(one_float3());
+
+ state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment();
+ openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(sd->P));
+ openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(sd->I));
+ openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false);
+ openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one);
+ openpgl::cpp::SetEta(state->guiding.path_segment, 1.0);
+#endif
+}
+
+/* Records the surface scattering event at the current vertex position of the segment.*/
+ccl_device_forceinline void guiding_record_surface_bounce(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ const Spectrum weight,
+ const float pdf,
+ const float3 N,
+ const float3 omega_in,
+ const float2 roughness,
+ const float eta)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ const float min_roughness = safe_sqrtf(fminf(roughness.x, roughness.y));
+ const bool is_delta = (min_roughness == 0.0f);
+ const float3 weight_rgb = spectrum_to_rgb(weight);
+ const float3 normal = clamp(N, -one_float3(), one_float3());
+
+ kernel_assert(state->guiding.path_segment != nullptr);
+
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(one_float3()));
+ openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false);
+ openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal));
+ openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in));
+ openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf);
+ openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb));
+ openpgl::cpp::SetIsDelta(state->guiding.path_segment, is_delta);
+ openpgl::cpp::SetEta(state->guiding.path_segment, eta);
+ openpgl::cpp::SetRoughness(state->guiding.path_segment, min_roughness);
+#endif
+}
+
+/* Records the emission at the current surface intersection (physical or virtual) */
+ccl_device_forceinline void guiding_record_surface_emission(KernelGlobals kg,
+ IntegratorState state,
+ const Spectrum Le,
+ const float mis_weight)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ const float3 Le_rgb = spectrum_to_rgb(Le);
+
+ openpgl::cpp::SetDirectContribution(state->guiding.path_segment, guiding_vec3f(Le_rgb));
+ openpgl::cpp::SetMiWeight(state->guiding.path_segment, mis_weight);
+#endif
+}
+
+/* Record BSSRDF Interactions */
+
+/* Records/Adds a new path segment where the vertex position is the point of entry
+ * of the sub surface scattering boundary.
+ * If the path is not terminated this call is usually followed by a call of
+ * guiding_record_bssrdf_weight and guiding_record_bssrdf_bounce. */
+ccl_device_forceinline void guiding_record_bssrdf_segment(KernelGlobals kg,
+ IntegratorState state,
+ const float3 P,
+ const float3 I)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ const pgl_vec3f zero = guiding_vec3f(zero_float3());
+ const pgl_vec3f one = guiding_vec3f(one_float3());
+
+ state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment();
+ openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(P));
+ openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(I));
+ openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true);
+ openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one);
+ openpgl::cpp::SetEta(state->guiding.path_segment, 1.0);
+#endif
+}
+
+/* Records the transmission of the path at the point of entry while passing
+ * the surface boundary.*/
+ccl_device_forceinline void guiding_record_bssrdf_weight(KernelGlobals kg,
+ IntegratorState state,
+ const Spectrum weight,
+ const Spectrum albedo)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+
+ /* Note albedo left out here, will be included in guiding_record_bssrdf_bounce. */
+ const float3 weight_rgb = spectrum_to_rgb(safe_divide_color(weight, albedo));
+
+ kernel_assert(state->guiding.path_segment != nullptr);
+
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(zero_float3()));
+ openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb));
+ openpgl::cpp::SetIsDelta(state->guiding.path_segment, false);
+ openpgl::cpp::SetEta(state->guiding.path_segment, 1.0f);
+ openpgl::cpp::SetRoughness(state->guiding.path_segment, 1.0f);
+#endif
+}
+
+/* Records the direction at the point of entry the path takes when sampling the SSS contribution.
+ * If not terminated this function is usually followed by a call of
+ * guiding_record_volume_transmission to record the transmittance between the point of entry and
+ * the point of exit.*/
+ccl_device_forceinline void guiding_record_bssrdf_bounce(KernelGlobals kg,
+ IntegratorState state,
+ const float pdf,
+ const float3 N,
+ const float3 omega_in,
+ const Spectrum weight,
+ const Spectrum albedo)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ const float3 normal = clamp(N, -one_float3(), one_float3());
+ const float3 weight_rgb = spectrum_to_rgb(weight * albedo);
+
+ kernel_assert(state->guiding.path_segment != nullptr);
+
+ openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false);
+ openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal));
+ openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in));
+ openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf);
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb));
+#endif
+}
+
+/* Record Volume Interactions */
+
+/* Records/Adds a new path segment with the current path vertex being inside a volume.
+ * If the path is not terminated this call is usually followed by a call of
+ * guiding_record_volume_bounce. */
+ccl_device_forceinline void guiding_record_volume_segment(KernelGlobals kg,
+ IntegratorState state,
+ const float3 P,
+ const float3 I)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ const pgl_vec3f zero = guiding_vec3f(zero_float3());
+ const pgl_vec3f one = guiding_vec3f(one_float3());
+
+ state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment();
+
+ openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(P));
+ openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(I));
+ openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true);
+ openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one);
+ openpgl::cpp::SetEta(state->guiding.path_segment, 1.0);
+#endif
+}
+
+/* Records the volume scattering event at the current vertex position of the segment.*/
+ccl_device_forceinline void guiding_record_volume_bounce(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ const Spectrum weight,
+ const float pdf,
+ const float3 omega_in,
+ const float roughness)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ const float3 weight_rgb = spectrum_to_rgb(weight);
+ const float3 normal = make_float3(0.0f, 0.0f, 1.0f);
+
+ kernel_assert(state->guiding.path_segment != nullptr);
+
+ openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, true);
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, guiding_vec3f(one_float3()));
+ openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(normal));
+ openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(omega_in));
+ openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, pdf);
+ openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, guiding_vec3f(weight_rgb));
+ openpgl::cpp::SetIsDelta(state->guiding.path_segment, false);
+ openpgl::cpp::SetEta(state->guiding.path_segment, 1.f);
+ openpgl::cpp::SetRoughness(state->guiding.path_segment, roughness);
+#endif
+}
+
+/* Records the transmission (a.k.a. transmittance weight) between the current path segment
+ * and the next one, when the path is inside or passes a volume.*/
+ccl_device_forceinline void guiding_record_volume_transmission(KernelGlobals kg,
+ IntegratorState state,
+ const float3 transmittance_weight)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+
+ if (state->guiding.path_segment) {
+ // TODO (sherholz): need to find a better way to avoid this check
+ if ((transmittance_weight[0] < 0.f || !std::isfinite(transmittance_weight[0]) ||
+ std::isnan(transmittance_weight[0])) ||
+ (transmittance_weight[1] < 0.f || !std::isfinite(transmittance_weight[1]) ||
+ std::isnan(transmittance_weight[1])) ||
+ (transmittance_weight[2] < 0.f || !std::isfinite(transmittance_weight[2]) ||
+ std::isnan(transmittance_weight[2]))) {
+ }
+ else {
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment,
+ guiding_vec3f(transmittance_weight));
+ }
+ }
+#endif
+}
+
+/* Records the emission of a volume at the vertex of the current path segment. */
+ccl_device_forceinline void guiding_record_volume_emission(KernelGlobals kg,
+ IntegratorState state,
+ const Spectrum Le)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+
+ if (state->guiding.path_segment) {
+ const float3 Le_rgb = spectrum_to_rgb(Le);
+
+ openpgl::cpp::SetDirectContribution(state->guiding.path_segment, guiding_vec3f(Le_rgb));
+ openpgl::cpp::SetMiWeight(state->guiding.path_segment, 1.0f);
+ }
+#endif
+}
+
+/* Record Light Interactions */
+
+/* Adds a pseudo path vertex/segment when intersecting a virtual light source.
+ * (e.g., area, sphere, or disk light). This call is often followed
+ * a call of guiding_record_surface_emission, if the intersected light source
+ * emits light in the direction of the path. */
+ccl_device_forceinline void guiding_record_light_surface_segment(
+ KernelGlobals kg, IntegratorState state, ccl_private const Intersection *ccl_restrict isect)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ const pgl_vec3f zero = guiding_vec3f(zero_float3());
+ const pgl_vec3f one = guiding_vec3f(one_float3());
+ const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+ const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+ const float3 P = ray_P + isect->t * ray_D;
+
+ state->guiding.path_segment = kg->opgl_path_segment_storage->NextSegment();
+ openpgl::cpp::SetPosition(state->guiding.path_segment, guiding_point3f(P));
+ openpgl::cpp::SetDirectionOut(state->guiding.path_segment, guiding_vec3f(-ray_D));
+ openpgl::cpp::SetNormal(state->guiding.path_segment, guiding_vec3f(-ray_D));
+ openpgl::cpp::SetDirectionIn(state->guiding.path_segment, guiding_vec3f(ray_D));
+ openpgl::cpp::SetPDFDirectionIn(state->guiding.path_segment, 1.0f);
+ openpgl::cpp::SetVolumeScatter(state->guiding.path_segment, false);
+ openpgl::cpp::SetScatteredContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetDirectContribution(state->guiding.path_segment, zero);
+ openpgl::cpp::SetTransmittanceWeight(state->guiding.path_segment, one);
+ openpgl::cpp::SetScatteringWeight(state->guiding.path_segment, one);
+ openpgl::cpp::SetEta(state->guiding.path_segment, 1.0f);
+#endif
+}
+
+/* Records/Adds a final path segment when the path leaves the scene and
+ * intersects with a background light (e.g., background color,
+ * distant light, or env map). The vertex for this segment is placed along
+ * the current ray far out the scene.*/
+ccl_device_forceinline void guiding_record_background(KernelGlobals kg,
+ IntegratorState state,
+ const Spectrum L,
+ const float mis_weight)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+
+ const float3 L_rgb = spectrum_to_rgb(L);
+ const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+ const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+ const float3 P = ray_P + (1e6f) * ray_D;
+ const float3 normal = make_float3(0.0f, 0.0f, 1.0f);
+
+ openpgl::cpp::PathSegment background_segment;
+ openpgl::cpp::SetPosition(&background_segment, guiding_vec3f(P));
+ openpgl::cpp::SetNormal(&background_segment, guiding_vec3f(normal));
+ openpgl::cpp::SetDirectionOut(&background_segment, guiding_vec3f(-ray_D));
+ openpgl::cpp::SetDirectContribution(&background_segment, guiding_vec3f(L_rgb));
+ openpgl::cpp::SetMiWeight(&background_segment, mis_weight);
+ kg->opgl_path_segment_storage->AddSegment(background_segment);
+#endif
+}
+
+/* Records the scattered contribution of a next event estimation
+ * (i.e., a direct light estimate scattered at the current path vertex
+ * towards the previous vertex).*/
+ccl_device_forceinline void guiding_record_direct_light(KernelGlobals kg,
+ IntegratorShadowState state)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+ if (state->shadow_path.path_segment) {
+ const Spectrum Lo = safe_divide_color(INTEGRATOR_STATE(state, shadow_path, throughput),
+ INTEGRATOR_STATE(state, shadow_path, unlit_throughput));
+
+ const float3 Lo_rgb = spectrum_to_rgb(Lo);
+ openpgl::cpp::AddScatteredContribution(state->shadow_path.path_segment, guiding_vec3f(Lo_rgb));
+ }
+#endif
+}
+
+/* Record Russian Roulette */
+/* Records the probability of continuing the path at the current path segment. */
+ccl_device_forceinline void guiding_record_continuation_probability(
+ KernelGlobals kg, IntegratorState state, const float continuation_probability)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+
+ if (state->guiding.path_segment) {
+ openpgl::cpp::SetRussianRouletteProbability(state->guiding.path_segment,
+ continuation_probability);
+ }
+#endif
+}
+
+/* Path guiding debug render passes. */
+
+/* Write a set of path guiding related debug information (e.g., guiding probability at first
+ * bounce) into separate rendering passes.*/
+ccl_device_forceinline void guiding_write_debug_passes(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_global float *ccl_restrict
+ render_buffer)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+# ifdef WITH_CYCLES_DEBUG
+ if (!kernel_data.integrator.train_guiding) {
+ return;
+ }
+
+ if (INTEGRATOR_STATE(state, path, bounce) != 0) {
+ return;
+ }
+
+ const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
+ const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = render_buffer + render_buffer_offset;
+
+ if (kernel_data.film.pass_guiding_probability != PASS_UNUSED) {
+ float guiding_prob = state->guiding.surface_guiding_sampling_prob;
+ film_write_pass_float(buffer + kernel_data.film.pass_guiding_probability, guiding_prob);
+ }
+
+ if (kernel_data.film.pass_guiding_avg_roughness != PASS_UNUSED) {
+ float avg_roughness = 0.0f;
+ float sum_sample_weight = 0.0f;
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ continue;
+ }
+ avg_roughness += sc->sample_weight * bsdf_get_specular_roughness_squared(sc);
+ sum_sample_weight += sc->sample_weight;
+ }
+
+ avg_roughness = avg_roughness > 0.f ? avg_roughness / sum_sample_weight : 0.f;
+
+ film_write_pass_float(buffer + kernel_data.film.pass_guiding_avg_roughness, avg_roughness);
+ }
+# endif
+#endif
+}
+
+/* Guided BSDFs */
+
+ccl_device_forceinline bool guiding_bsdf_init(KernelGlobals kg,
+ IntegratorState state,
+ const float3 P,
+ const float3 N,
+ ccl_private float &rand)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ if (kg->opgl_surface_sampling_distribution->Init(
+ kg->opgl_guiding_field, guiding_point3f(P), rand, true)) {
+ kg->opgl_surface_sampling_distribution->ApplyCosineProduct(guiding_point3f(N));
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+ccl_device_forceinline float guiding_bsdf_sample(KernelGlobals kg,
+ IntegratorState state,
+ const float2 rand_bsdf,
+ ccl_private float3 *omega_in)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ pgl_vec3f wo;
+ const pgl_point2f rand = openpgl::cpp::Point2(rand_bsdf.x, rand_bsdf.y);
+ const float pdf = kg->opgl_surface_sampling_distribution->SamplePDF(rand, wo);
+ *omega_in = make_float3(wo.x, wo.y, wo.z);
+ return pdf;
+#else
+ return 0.0f;
+#endif
+}
+
+ccl_device_forceinline float guiding_bsdf_pdf(KernelGlobals kg,
+ IntegratorState state,
+ const float3 omega_in)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ return kg->opgl_surface_sampling_distribution->PDF(guiding_vec3f(omega_in));
+#else
+ return 0.0f;
+#endif
+}
+
+/* Guided Volume Phases */
+
+ccl_device_forceinline bool guiding_phase_init(KernelGlobals kg,
+ IntegratorState state,
+ const float3 P,
+ const float3 D,
+ const float g,
+ ccl_private float &rand)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ /* we do not need to guide almost delta phase functions */
+ if (fabsf(g) >= 0.99f) {
+ return false;
+ }
+
+ if (kg->opgl_volume_sampling_distribution->Init(
+ kg->opgl_guiding_field, guiding_point3f(P), rand, true)) {
+ kg->opgl_volume_sampling_distribution->ApplySingleLobeHenyeyGreensteinProduct(guiding_vec3f(D),
+ g);
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+ccl_device_forceinline float guiding_phase_sample(KernelGlobals kg,
+ IntegratorState state,
+ const float2 rand_phase,
+ ccl_private float3 *omega_in)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ pgl_vec3f wo;
+ const pgl_point2f rand = openpgl::cpp::Point2(rand_phase.x, rand_phase.y);
+ const float pdf = kg->opgl_volume_sampling_distribution->SamplePDF(rand, wo);
+ *omega_in = make_float3(wo.x, wo.y, wo.z);
+ return pdf;
+#else
+ return 0.0f;
+#endif
+}
+
+ccl_device_forceinline float guiding_phase_pdf(KernelGlobals kg,
+ IntegratorState state,
+ const float3 omega_in)
+{
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ return kg->opgl_volume_sampling_distribution->PDF(guiding_vec3f(omega_in));
+#else
+ return 0.0f;
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/init_from_bake.h b/intern/cycles/kernel/integrator/init_from_bake.h
index bf3f41b52b9..667ba949760 100644
--- a/intern/cycles/kernel/integrator/init_from_bake.h
+++ b/intern/cycles/kernel/integrator/init_from_bake.h
@@ -5,8 +5,8 @@
#include "kernel/camera/camera.h"
-#include "kernel/film/accumulate.h"
#include "kernel/film/adaptive_sampling.h"
+#include "kernel/film/light_passes.h"
#include "kernel/integrator/path_state.h"
@@ -92,12 +92,12 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
path_state_init(state, tile, x, y);
/* Check whether the pixel has converged and should not be sampled anymore. */
- if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
+ if (!film_need_sample_pixel(kg, state, render_buffer)) {
return false;
}
/* Always count the sample, even if the camera sample will reject the ray. */
- const int sample = kernel_accum_sample(
+ const int sample = film_write_sample(
kg, state, render_buffer, scheduled_sample, tile->sample_offset);
/* Setup render buffers. */
@@ -112,8 +112,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
int prim = __float_as_uint(primitive[1]);
if (prim == -1) {
/* Accumulate transparency for empty pixels. */
- kernel_accum_transparent(kg, state, 0, 1.0f, buffer);
- return false;
+ film_write_transparent(kg, state, 0, 1.0f, buffer);
+ return true;
}
prim += kernel_data.bake.tri_offset;
@@ -121,13 +121,8 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Random number generator. */
const uint rng_hash = hash_uint(seed) ^ kernel_data.integrator.seed;
- float filter_x, filter_y;
- if (sample == 0) {
- filter_x = filter_y = 0.5f;
- }
- else {
- path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_x, &filter_y);
- }
+ const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
+ path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);
/* Initialize path state for path integration. */
path_state_init_integrator(kg, state, sample, rng_hash);
@@ -150,11 +145,17 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Sub-pixel offset. */
if (sample > 0) {
- u = bake_clamp_mirror_repeat(u + dudx * (filter_x - 0.5f) + dudy * (filter_y - 0.5f), 1.0f);
- v = bake_clamp_mirror_repeat(v + dvdx * (filter_x - 0.5f) + dvdy * (filter_y - 0.5f),
+ u = bake_clamp_mirror_repeat(u + dudx * (rand_filter.x - 0.5f) + dudy * (rand_filter.y - 0.5f),
+ 1.0f);
+ v = bake_clamp_mirror_repeat(v + dvdx * (rand_filter.x - 0.5f) + dvdy * (rand_filter.y - 0.5f),
1.0f - u);
}
+ /* Convert from Blender to Cycles/Embree/OptiX barycentric convention. */
+ const float tmp = u;
+ u = v;
+ v = 1.0f - tmp - v;
+
/* Position and normal on triangle. */
const int object = kernel_data.bake.object_index;
float3 P, Ng;
@@ -199,18 +200,61 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
/* Fast path for position and normal passes not affected by shaders. */
if (kernel_data.film.pass_position != PASS_UNUSED) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_position, P);
+ film_write_pass_float3(buffer + kernel_data.film.pass_position, P);
return true;
}
else if (kernel_data.film.pass_normal != PASS_UNUSED && !(shader_flags & SD_HAS_BUMP)) {
- kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, N);
+ film_write_pass_float3(buffer + kernel_data.film.pass_normal, N);
return true;
}
/* Setup ray. */
Ray ray ccl_optional_struct_init;
- ray.P = P + N;
- ray.D = -N;
+
+ if (kernel_data.bake.use_camera) {
+ float3 D = camera_direction_from_point(kg, P);
+
+ const float DN = dot(D, N);
+
+ /* Nudge camera direction, so that the faces facing away from the camera still have
+ * somewhat usable shading. (Otherwise, glossy faces would be simply black.)
+ *
+ * The surface normal offset affects smooth surfaces. Lower values will make
+ * smooth surfaces more faceted, but higher values may show up from the camera
+ * at grazing angles.
+ *
+ * This value can actually be pretty high before it's noticeably wrong. */
+ const float surface_normal_offset = 0.2f;
+
+ /* Keep the ray direction at least `surface_normal_offset` "above" the smooth normal. */
+ if (DN <= surface_normal_offset) {
+ D -= N * (DN - surface_normal_offset);
+ D = normalize(D);
+ }
+
+ /* On the backside, just lerp towards the surface normal for the ray direction,
+ * as DN goes from 0.0 to -1.0. */
+ if (DN <= 0.0f) {
+ D = normalize(mix(D, N, -DN));
+ }
+
+ /* We don't want to bake the back face, so make sure the ray direction never
+ * goes behind the geometry (flat) normal. This is a fail-safe, and should rarely happen. */
+ const float true_normal_epsilon = 0.00001f;
+
+ if (dot(D, Ng) <= true_normal_epsilon) {
+ D -= Ng * (dot(D, Ng) - true_normal_epsilon);
+ D = normalize(D);
+ }
+
+ ray.P = P + D;
+ ray.D = -D;
+ }
+ else {
+ ray.P = P + N;
+ ray.D = -N;
+ }
+
ray.tmin = 0.0f;
ray.tmax = FLT_MAX;
ray.time = 0.5f;
diff --git a/intern/cycles/kernel/integrator/init_from_camera.h b/intern/cycles/kernel/integrator/init_from_camera.h
index e89ab3991c7..8df3e1b9fb3 100644
--- a/intern/cycles/kernel/integrator/init_from_camera.h
+++ b/intern/cycles/kernel/integrator/init_from_camera.h
@@ -5,8 +5,8 @@
#include "kernel/camera/camera.h"
-#include "kernel/film/accumulate.h"
#include "kernel/film/adaptive_sampling.h"
+#include "kernel/film/light_passes.h"
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/shadow_catcher.h"
@@ -23,31 +23,21 @@ ccl_device_inline void integrate_camera_sample(KernelGlobals kg,
ccl_private Ray *ray)
{
/* Filter sampling. */
- float filter_u, filter_v;
-
- if (sample == 0) {
- filter_u = 0.5f;
- filter_v = 0.5f;
- }
- else {
- path_rng_2D(kg, rng_hash, sample, PRNG_FILTER_U, &filter_u, &filter_v);
- }
+ const float2 rand_filter = (sample == 0) ? make_float2(0.5f, 0.5f) :
+ path_rng_2D(kg, rng_hash, sample, PRNG_FILTER);
/* Depth of field sampling. */
- float lens_u = 0.0f, lens_v = 0.0f;
- if (kernel_data.cam.aperturesize > 0.0f) {
- path_rng_2D(kg, rng_hash, sample, PRNG_LENS_U, &lens_u, &lens_v);
- }
+ const float2 rand_lens = (kernel_data.cam.aperturesize > 0.0f) ?
+ path_rng_2D(kg, rng_hash, sample, PRNG_LENS) :
+ zero_float2();
/* Motion blur time sampling. */
- float time = 0.0f;
-#ifdef __CAMERA_MOTION__
- if (kernel_data.cam.shuttertime != -1.0f)
- time = path_rng_1D(kg, rng_hash, sample, PRNG_TIME);
-#endif
+ const float rand_time = (kernel_data.cam.shuttertime != -1.0f) ?
+ path_rng_1D(kg, rng_hash, sample, PRNG_TIME) :
+ 0.0f;
/* Generate camera ray. */
- camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+ camera_sample(kg, x, y, rand_filter.x, rand_filter.y, rand_lens.x, rand_lens.y, rand_time, ray);
}
/* Return false to indicate that this pixel is finished.
@@ -67,7 +57,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
path_state_init(state, tile, x, y);
/* Check whether the pixel has converged and should not be sampled anymore. */
- if (!kernel_need_sample_pixel(kg, state, render_buffer)) {
+ if (!film_need_sample_pixel(kg, state, render_buffer)) {
return false;
}
@@ -76,7 +66,7 @@ ccl_device bool integrator_init_from_camera(KernelGlobals kg,
* This logic allows to both count actual number of samples per pixel, and to add samples to this
* pixel after it was converged and samples were added somewhere else (in which case the
* `scheduled_sample` will be different from actual number of samples in this pixel). */
- const int sample = kernel_accum_sample(
+ const int sample = film_write_sample(
kg, state, render_buffer, scheduled_sample, tile->sample_offset);
/* Initialize random number seed for path. */
diff --git a/intern/cycles/kernel/integrator/intersect_closest.h b/intern/cycles/kernel/integrator/intersect_closest.h
index 60299f2cb2f..b9a81e25bcc 100644
--- a/intern/cycles/kernel/integrator/intersect_closest.h
+++ b/intern/cycles/kernel/integrator/intersect_closest.h
@@ -5,13 +5,14 @@
#include "kernel/camera/projection.h"
+#include "kernel/film/light_passes.h"
+
+#include "kernel/integrator/guiding.h"
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/shadow_catcher.h"
#include "kernel/light/light.h"
-#include "kernel/util/differential.h"
-
#include "kernel/geom/geom.h"
#include "kernel/bvh/bvh.h"
@@ -48,13 +49,15 @@ ccl_device_forceinline bool integrator_intersect_terminate(KernelGlobals kg,
* surfaces in front of emission do we need to evaluate the shader, since we
* perform MIS as part of indirect rays. */
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
- const float probability = path_state_continuation_probability(kg, state, path_flag);
- INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = probability;
+ const float continuation_probability = path_state_continuation_probability(kg, state, path_flag);
+ INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = continuation_probability;
+
+ guiding_record_continuation_probability(kg, state, continuation_probability);
- if (probability != 1.0f) {
+ if (continuation_probability != 1.0f) {
const float terminate = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE);
- if (probability == 0.0f || terminate >= probability) {
+ if (continuation_probability == 0.0f || terminate >= continuation_probability) {
if (shader_flags & SD_HAS_EMISSION) {
/* Mark path to be terminated right after shader evaluation on the surface. */
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_TERMINATE_ON_NEXT_SURFACE;
@@ -87,7 +90,7 @@ ccl_device_forceinline void integrator_split_shadow_catcher(
return;
}
- kernel_write_shadow_catcher_bounce_data(kg, state, render_buffer);
+ film_write_shadow_catcher_bounce_data(kg, state, render_buffer);
/* Mark state as having done a shadow catcher split so that it stops contributing to
* the shadow catcher matte pass, but keeps contributing to the combined pass. */
diff --git a/intern/cycles/kernel/integrator/intersect_shadow.h b/intern/cycles/kernel/integrator/intersect_shadow.h
index 1b48b360858..25ff3d5b23f 100644
--- a/intern/cycles/kernel/integrator/intersect_shadow.h
+++ b/intern/cycles/kernel/integrator/intersect_shadow.h
@@ -51,7 +51,7 @@ ccl_device_forceinline int integrate_shadow_max_transparent_hits(KernelGlobals k
}
#ifdef __TRANSPARENT_SHADOWS__
-# if defined(__KERNEL_CPU__)
+# ifndef __KERNEL_GPU__
ccl_device int shadow_intersections_compare(const void *a, const void *b)
{
const Intersection *isect_a = (const Intersection *)a;
diff --git a/intern/cycles/kernel/integrator/intersect_volume_stack.h b/intern/cycles/kernel/integrator/intersect_volume_stack.h
index 9ba4a0a3964..c2490581e4d 100644
--- a/intern/cycles/kernel/integrator/intersect_volume_stack.h
+++ b/intern/cycles/kernel/integrator/intersect_volume_stack.h
@@ -5,7 +5,6 @@
#include "kernel/bvh/bvh.h"
#include "kernel/geom/geom.h"
-#include "kernel/integrator/shader_eval.h"
#include "kernel/integrator/volume_stack.h"
CCL_NAMESPACE_BEGIN
@@ -38,8 +37,7 @@ ccl_device void integrator_volume_stack_update_for_subsurface(KernelGlobals kg,
#ifdef __VOLUME_RECORD_ALL__
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(
- kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
+ uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
if (num_hits > 0) {
Intersection *isect = hits;
@@ -108,8 +106,7 @@ ccl_device void integrator_volume_stack_init(KernelGlobals kg, IntegratorState s
#ifdef __VOLUME_RECORD_ALL__
Intersection hits[2 * MAX_VOLUME_STACK_SIZE + 1];
- uint num_hits = scene_intersect_volume_all(
- kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
+ uint num_hits = scene_intersect_volume(kg, &volume_ray, hits, 2 * volume_stack_size, visibility);
if (num_hits > 0) {
int enclosed_volumes[MAX_VOLUME_STACK_SIZE];
Intersection *isect = hits;
diff --git a/intern/cycles/kernel/integrator/mnee.h b/intern/cycles/kernel/integrator/mnee.h
index f5d2bcfe9f2..142977f1ac7 100644
--- a/intern/cycles/kernel/integrator/mnee.h
+++ b/intern/cycles/kernel/integrator/mnee.h
@@ -186,7 +186,7 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
triangle_vertices_and_normals(kg, sd_vtx->prim, verts, normals);
/* Compute refined position (same code as in triangle_point_from_uv). */
- sd_vtx->P = isect->u * verts[0] + isect->v * verts[1] + (1.f - isect->u - isect->v) * verts[2];
+ sd_vtx->P = (1.f - isect->u - isect->v) * verts[0] + isect->u * verts[1] + isect->v * verts[2];
if (!(sd_vtx->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
const Transform tfm = object_get_transform(kg, sd_vtx);
sd_vtx->P = transform_point(&tfm, sd_vtx->P);
@@ -213,8 +213,8 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
}
/* Tangent space (position derivatives) WRT barycentric (u, v). */
- float3 dp_du = verts[0] - verts[2];
- float3 dp_dv = verts[1] - verts[2];
+ float3 dp_du = verts[1] - verts[0];
+ float3 dp_dv = verts[2] - verts[0];
/* Geometric normal. */
vtx->ng = normalize(cross(dp_du, dp_dv));
@@ -223,16 +223,16 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
/* Shading normals: Interpolate normals between vertices. */
float n_len;
- vtx->n = normalize_len(normals[0] * sd_vtx->u + normals[1] * sd_vtx->v +
- normals[2] * (1.0f - sd_vtx->u - sd_vtx->v),
+ vtx->n = normalize_len(normals[0] * (1.0f - sd_vtx->u - sd_vtx->v) + normals[1] * sd_vtx->u +
+ normals[2] * sd_vtx->v,
&n_len);
/* Shading normal derivatives WRT barycentric (u, v)
* we calculate the derivative of n = |u*n0 + v*n1 + (1-u-v)*n2| using:
* d/du [f(u)/|f(u)|] = [d/du f(u)]/|f(u)| - f(u)/|f(u)|^3 <f(u), d/du f(u)>. */
const float inv_n_len = 1.f / n_len;
- float3 dn_du = inv_n_len * (normals[0] - normals[2]);
- float3 dn_dv = inv_n_len * (normals[1] - normals[2]);
+ float3 dn_du = inv_n_len * (normals[1] - normals[0]);
+ float3 dn_dv = inv_n_len * (normals[2] - normals[0]);
dn_du -= vtx->n * dot(vtx->n, dn_du);
dn_dv -= vtx->n * dot(vtx->n, dn_dv);
@@ -279,7 +279,15 @@ ccl_device_forceinline void mnee_setup_manifold_vertex(KernelGlobals kg,
}
/* Compute constraint derivatives. */
-ccl_device_forceinline bool mnee_compute_constraint_derivatives(
+
+# if defined(__KERNEL_METAL__)
+/* Temporary workaround for front-end compilation bug (incorrect MNEE rendering when this is
+ * inlined). */
+__attribute__((noinline))
+# else
+ccl_device_forceinline
+# endif
+bool mnee_compute_constraint_derivatives(
int vertex_count,
ccl_private ManifoldVertex *vertices,
ccl_private const float3 &surface_sample_pos,
@@ -392,7 +400,7 @@ ccl_device_forceinline bool mnee_compute_constraint_derivatives(
/* Invert (block) constraint derivative matrix and solve linear system so we can map dh back to dx:
* dh / dx = A
* dx = inverse(A) x dh
- * to use for specular specular manifold walk
+ * to use for specular manifold walk
* (See for example http://faculty.washington.edu/finlayso/ebook/algebraic/advanced/LUtri.htm
* for block tridiagonal matrix based linear system solve) */
ccl_device_forceinline bool mnee_solve_matrix_h_to_x(int vertex_count,
@@ -634,9 +642,9 @@ mnee_sample_bsdf_dh(ClosureType type, float alpha_x, float alpha_y, float sample
* We assume here that the pdf (in half-vector measure) is the same as
* the one calculation when sampling the microfacet normals from the
* specular chain above: this allows us to simplify the bsdf weight */
-ccl_device_forceinline float3 mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure,
- float3 wi,
- float3 wo)
+ccl_device_forceinline Spectrum mnee_eval_bsdf_contribution(ccl_private ShaderClosure *closure,
+ float3 wi,
+ float3 wo)
{
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)closure;
@@ -808,7 +816,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
float3 wo = normalize_len(vertices[0].p - sd->P, &wo_len);
/* Initialize throughput and evaluate receiver bsdf * |n.wo|. */
- shader_bsdf_eval(kg, sd, wo, false, throughput, ls->shader);
+ surface_shader_bsdf_eval(kg, state, sd, wo, throughput, ls->shader);
/* Update light sample with new position / direct.ion
* and keep pdf in vertex area measure */
@@ -836,7 +844,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
1;
INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + vertex_count;
- float3 light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time);
+ Spectrum light_eval = light_sample_shader_eval(kg, state, sd_mnee, ls, sd->time);
bsdf_eval_mul(throughput, light_eval / ls->pdf);
/* Generalized geometry term. */
@@ -914,7 +922,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, bounce) = bounce + 1 + vi;
/* Evaluate shader nodes at solution vi. */
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
/* Set light looking dir. */
@@ -925,7 +933,7 @@ ccl_device_forceinline bool mnee_path_contribution(KernelGlobals kg,
/* Evaluate product term inside eq.6 at solution interface. vi
* divided by corresponding sampled pdf:
* fr(vi)_do / pdf_dh(vi) x |do/dh| x |n.wo / n.h| */
- float3 bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo);
+ Spectrum bsdf_contribution = mnee_eval_bsdf_contribution(v.bsdf, wi, wo);
bsdf_eval_mul(throughput, bsdf_contribution);
}
@@ -1007,7 +1015,7 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
return 0;
/* Last bool argument is the MNEE flag (for TINY_MAX_CLOSURE cap in kernel_shader.h). */
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, sd_mnee, NULL, PATH_RAY_DIFFUSE, true);
/* Get and sample refraction bsdf */
@@ -1034,10 +1042,12 @@ ccl_device_forceinline int kernel_path_mnee_sample(KernelGlobals kg,
float2 h = zero_float2();
if (microfacet_bsdf->alpha_x > 0.f && microfacet_bsdf->alpha_y > 0.f) {
/* Sample transmissive microfacet bsdf. */
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- h = mnee_sample_bsdf_dh(
- bsdf->type, microfacet_bsdf->alpha_x, microfacet_bsdf->alpha_y, bsdf_u, bsdf_v);
+ const float2 bsdf_uv = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
+ h = mnee_sample_bsdf_dh(bsdf->type,
+ microfacet_bsdf->alpha_x,
+ microfacet_bsdf->alpha_y,
+ bsdf_uv.x,
+ bsdf_uv.y);
}
/* Setup differential geometry on vertex. */
diff --git a/intern/cycles/kernel/integrator/path_state.h b/intern/cycles/kernel/integrator/path_state.h
index 912c380cdb6..7197f0f2f3a 100644
--- a/intern/cycles/kernel/integrator/path_state.h
+++ b/intern/cycles/kernel/integrator/path_state.h
@@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void path_state_init_queues(IntegratorState state)
{
INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
INTEGRATOR_STATE_WRITE(&state->shadow, shadow_path, queued_kernel) = 0;
INTEGRATOR_STATE_WRITE(&state->ao, shadow_path, queued_kernel) = 0;
#endif
@@ -48,13 +48,25 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, path, volume_bounce) = 0;
INTEGRATOR_STATE_WRITE(state, path, volume_bounds_bounce) = 0;
INTEGRATOR_STATE_WRITE(state, path, rng_hash) = rng_hash;
- INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BASE_NUM;
+ INTEGRATOR_STATE_WRITE(state, path, rng_offset) = PRNG_BOUNCE_NUM;
INTEGRATOR_STATE_WRITE(state, path, flag) = PATH_RAY_CAMERA | PATH_RAY_MIS_SKIP |
PATH_RAY_TRANSPARENT_BACKGROUND;
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = 0.0f;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = FLT_MAX;
INTEGRATOR_STATE_WRITE(state, path, continuation_probability) = 1.0f;
- INTEGRATOR_STATE_WRITE(state, path, throughput) = make_float3(1.0f, 1.0f, 1.0f);
+ INTEGRATOR_STATE_WRITE(state, path, throughput) = one_spectrum();
+
+#ifdef __PATH_GUIDING__
+ INTEGRATOR_STATE_WRITE(state, path, unguided_throughput) = 1.0f;
+ INTEGRATOR_STATE_WRITE(state, guiding, path_segment) = nullptr;
+ INTEGRATOR_STATE_WRITE(state, guiding, use_surface_guiding) = false;
+ INTEGRATOR_STATE_WRITE(state, guiding, sample_surface_guiding_rand) = 0.5f;
+ INTEGRATOR_STATE_WRITE(state, guiding, surface_guiding_sampling_prob) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, guiding, bssrdf_sampling_prob) = 0.0f;
+ INTEGRATOR_STATE_WRITE(state, guiding, use_volume_guiding) = false;
+ INTEGRATOR_STATE_WRITE(state, guiding, sample_volume_guiding_rand) = 0.5f;
+ INTEGRATOR_STATE_WRITE(state, guiding, volume_guiding_sampling_prob) = 0.0f;
+#endif
#ifdef __MNEE__
INTEGRATOR_STATE_WRITE(state, path, mnee) = 0;
@@ -74,7 +86,7 @@ ccl_device_inline void path_state_init_integrator(KernelGlobals kg,
#ifdef __DENOISING_FEATURES__
if (kernel_data.kernel_features & KERNEL_FEATURE_DENOISING) {
INTEGRATOR_STATE_WRITE(state, path, flag) |= PATH_RAY_DENOISING_FEATURES;
- INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_float3();
+ INTEGRATOR_STATE_WRITE(state, path, denoising_feature_throughput) = one_spectrum();
}
#endif
}
@@ -249,7 +261,11 @@ ccl_device_inline float path_state_continuation_probability(KernelGlobals kg,
/* Probabilistic termination: use sqrt() to roughly match typical view
* transform and do path termination a bit later on average. */
- return min(sqrtf(reduce_max(fabs(INTEGRATOR_STATE(state, path, throughput)))), 1.0f);
+ Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ throughput *= INTEGRATOR_STATE(state, path, unguided_throughput);
+#endif
+ return min(sqrtf(reduce_max(fabs(throughput))), 1.0f);
}
ccl_device_inline bool path_state_ao_bounce(KernelGlobals kg, ConstIntegratorState state)
@@ -298,38 +314,25 @@ ccl_device_inline void shadow_path_state_rng_load(ConstIntegratorShadowState sta
ccl_device_inline float path_state_rng_1D(KernelGlobals kg,
ccl_private const RNGState *rng_state,
- int dimension)
+ const int dimension)
{
return path_rng_1D(
kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
}
-ccl_device_inline void path_state_rng_2D(KernelGlobals kg,
- ccl_private const RNGState *rng_state,
- int dimension,
- ccl_private float *fx,
- ccl_private float *fy)
-{
- path_rng_2D(
- kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension, fx, fy);
-}
-
-ccl_device_inline float path_state_rng_1D_hash(KernelGlobals kg,
- ccl_private const RNGState *rng_state,
- uint hash)
+ccl_device_inline float2 path_state_rng_2D(KernelGlobals kg,
+ ccl_private const RNGState *rng_state,
+ const int dimension)
{
- /* Use a hash instead of dimension, this is not great but avoids adding
- * more dimensions to each bounce which reduces quality of dimensions we
- * are already using. */
- return path_rng_1D(
- kg, cmj_hash_simple(rng_state->rng_hash, hash), rng_state->sample, rng_state->rng_offset);
+ return path_rng_2D(
+ kg, rng_state->rng_hash, rng_state->sample, rng_state->rng_offset + dimension);
}
ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
ccl_private const RNGState *rng_state,
- int branch,
- int num_branches,
- int dimension)
+ const int branch,
+ const int num_branches,
+ const int dimension)
{
return path_rng_1D(kg,
rng_state->rng_hash,
@@ -337,20 +340,16 @@ ccl_device_inline float path_branched_rng_1D(KernelGlobals kg,
rng_state->rng_offset + dimension);
}
-ccl_device_inline void path_branched_rng_2D(KernelGlobals kg,
- ccl_private const RNGState *rng_state,
- int branch,
- int num_branches,
- int dimension,
- ccl_private float *fx,
- ccl_private float *fy)
+ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg,
+ ccl_private const RNGState *rng_state,
+ const int branch,
+ const int num_branches,
+ const int dimension)
{
- path_rng_2D(kg,
- rng_state->rng_hash,
- rng_state->sample * num_branches + branch,
- rng_state->rng_offset + dimension,
- fx,
- fy);
+ return path_rng_2D(kg,
+ rng_state->rng_hash,
+ rng_state->sample * num_branches + branch,
+ rng_state->rng_offset + dimension);
}
/* Utility functions to get light termination value,
diff --git a/intern/cycles/kernel/integrator/shade_background.h b/intern/cycles/kernel/integrator/shade_background.h
index a7edfffd175..8fc5689683a 100644
--- a/intern/cycles/kernel/integrator/shade_background.h
+++ b/intern/cycles/kernel/integrator/shade_background.h
@@ -3,18 +3,20 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/film/light_passes.h"
+
+#include "kernel/integrator/guiding.h"
+#include "kernel/integrator/surface_shader.h"
+
#include "kernel/light/light.h"
#include "kernel/light/sample.h"
CCL_NAMESPACE_BEGIN
-ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
- IntegratorState state,
- ccl_global float *ccl_restrict render_buffer)
+ccl_device Spectrum integrator_eval_background_shader(KernelGlobals kg,
+ IntegratorState state,
+ ccl_global float *ccl_restrict render_buffer)
{
-#ifdef __BACKGROUND__
const int shader = kernel_data.background.surface_shader;
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
@@ -26,55 +28,35 @@ ccl_device float3 integrator_eval_background_shader(KernelGlobals kg,
((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)) ||
((shader & SHADER_EXCLUDE_SCATTER) && (path_flag & PATH_RAY_VOLUME_SCATTER)))
- return zero_float3();
+ return zero_spectrum();
}
/* Use fast constant background color if available. */
- float3 L = zero_float3();
- if (!shader_constant_emission_eval(kg, shader, &L)) {
- /* Evaluate background shader. */
-
- /* TODO: does aliasing like this break automatic SoA in CUDA?
- * Should we instead store closures separate from ShaderData? */
- ShaderDataTinyStorage emission_sd_storage;
- ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-
- PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
- shader_setup_from_background(kg,
- emission_sd,
- INTEGRATOR_STATE(state, ray, P),
- INTEGRATOR_STATE(state, ray, D),
- INTEGRATOR_STATE(state, ray, time));
-
- PROFILING_SHADER(emission_sd->object, emission_sd->shader);
- PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>(
- kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
-
- L = shader_background_eval(emission_sd);
+ Spectrum L = zero_spectrum();
+ if (surface_shader_constant_emission(kg, shader, &L)) {
+ return L;
}
- /* Background MIS weights. */
-# ifdef __BACKGROUND_MIS__
- /* Check if background light exists or if we should skip pdf. */
- if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
- kernel_data.background.use_mis) {
- const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
- const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
- const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
-
- /* multiple importance sampling, get background light pdf for ray
- * direction, and compute weight with respect to BSDF pdf */
- const float pdf = background_light_pdf(kg, ray_P, ray_D);
- const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
- L *= mis_weight;
- }
-# endif
+ /* Evaluate background shader. */
- return L;
-#else
- return make_float3(0.8f, 0.8f, 0.8f);
-#endif
+ /* TODO: does aliasing like this break automatic SoA in CUDA?
+ * Should we instead store closures separate from ShaderData? */
+ ShaderDataTinyStorage emission_sd_storage;
+ ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
+ PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
+ shader_setup_from_background(kg,
+ emission_sd,
+ INTEGRATOR_STATE(state, ray, P),
+ INTEGRATOR_STATE(state, ray, D),
+ INTEGRATOR_STATE(state, ray, time));
+
+ PROFILING_SHADER(emission_sd->object, emission_sd->shader);
+ PROFILING_EVENT(PROFILING_SHADE_LIGHT_EVAL);
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_BACKGROUND>(
+ kg, state, emission_sd, render_buffer, path_flag | PATH_RAY_EMISSION);
+
+ return surface_shader_background(emission_sd);
}
ccl_device_inline void integrate_background(KernelGlobals kg,
@@ -117,17 +99,38 @@ ccl_device_inline void integrate_background(KernelGlobals kg,
#endif /* __MNEE__ */
/* Evaluate background shader. */
- float3 L = (eval_background) ? integrator_eval_background_shader(kg, state, render_buffer) :
- zero_float3();
+ Spectrum L = zero_spectrum();
+
+ if (eval_background) {
+ L = integrator_eval_background_shader(kg, state, render_buffer);
+
+ /* When using the ao bounces approximation, adjust background
+ * shader intensity with ao factor. */
+ if (path_state_ao_bounce(kg, state)) {
+ L *= kernel_data.integrator.ao_bounces_factor;
+ }
+
+ /* Background MIS weights. */
+ float mis_weight = 1.0f;
+ /* Check if background light exists or if we should skip pdf. */
+ if (!(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_MIS_SKIP) &&
+ kernel_data.background.use_mis) {
+ const float3 ray_P = INTEGRATOR_STATE(state, ray, P);
+ const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
+ const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
+
+ /* multiple importance sampling, get background light pdf for ray
+ * direction, and compute weight with respect to BSDF pdf */
+ const float pdf = background_light_pdf(kg, ray_P, ray_D);
+ mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, pdf);
+ }
- /* When using the ao bounces approximation, adjust background
- * shader intensity with ao factor. */
- if (path_state_ao_bounce(kg, state)) {
- L *= kernel_data.integrator.ao_bounces_factor;
+ guiding_record_background(kg, state, L, mis_weight);
+ L *= mis_weight;
}
/* Write to render buffer. */
- kernel_accum_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
+ film_write_background(kg, state, L, transparent, is_transparent_background_ray, render_buffer);
}
ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
@@ -169,24 +172,24 @@ ccl_device_inline void integrate_distant_lights(KernelGlobals kg,
/* TODO: does aliasing like this break automatic SoA in CUDA? */
ShaderDataTinyStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
+ Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
if (is_zero(light_eval)) {
return;
}
/* MIS weighting. */
+ float mis_weight = 1.0f;
if (!(path_flag & PATH_RAY_MIS_SKIP)) {
/* multiple importance sampling, get regular light pdf,
* and compute weight with respect to BSDF pdf */
const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
- const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
- light_eval *= mis_weight;
+ mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
}
/* Write to render buffer. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_accum_emission(
- kg, state, throughput * light_eval, render_buffer, kernel_data.background.lightgroup);
+ guiding_record_background(kg, state, light_eval, mis_weight);
+ film_write_surface_emission(
+ kg, state, light_eval, mis_weight, render_buffer, kernel_data.background.lightgroup);
}
}
}
diff --git a/intern/cycles/kernel/integrator/shade_light.h b/intern/cycles/kernel/integrator/shade_light.h
index 910e3383f51..e0b0500dc78 100644
--- a/intern/cycles/kernel/integrator/shade_light.h
+++ b/intern/cycles/kernel/integrator/shade_light.h
@@ -3,8 +3,8 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/film/light_passes.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/light/light.h"
#include "kernel/light/sample.h"
@@ -18,6 +18,8 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
Intersection isect ccl_optional_struct_init;
integrator_state_read_isect(kg, state, &isect);
+ guiding_record_light_surface_segment(kg, state, &isect);
+
float3 ray_P = INTEGRATOR_STATE(state, ray, P);
const float3 ray_D = INTEGRATOR_STATE(state, ray, D);
const float ray_time = INTEGRATOR_STATE(state, ray, time);
@@ -51,23 +53,23 @@ ccl_device_inline void integrate_light(KernelGlobals kg,
/* TODO: does aliasing like this break automatic SoA in CUDA? */
ShaderDataTinyStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
+ Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, ray_time);
if (is_zero(light_eval)) {
return;
}
/* MIS weighting. */
+ float mis_weight = 1.0f;
if (!(path_flag & PATH_RAY_MIS_SKIP)) {
/* multiple importance sampling, get regular light pdf,
* and compute weight with respect to BSDF pdf */
const float mis_ray_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
- const float mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
- light_eval *= mis_weight;
+ mis_weight = light_sample_mis_weight_forward(kg, mis_ray_pdf, ls.pdf);
}
/* Write to render buffer. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_accum_emission(kg, state, throughput * light_eval, render_buffer, ls.group);
+ guiding_record_surface_emission(kg, state, light_eval, mis_weight);
+ film_write_surface_emission(kg, state, light_eval, mis_weight, render_buffer, ls.group);
}
ccl_device void integrator_shade_light(KernelGlobals kg,
diff --git a/intern/cycles/kernel/integrator/shade_shadow.h b/intern/cycles/kernel/integrator/shade_shadow.h
index 4b002a47bee..bedb15ddf89 100644
--- a/intern/cycles/kernel/integrator/shade_shadow.h
+++ b/intern/cycles/kernel/integrator/shade_shadow.h
@@ -3,8 +3,9 @@
#pragma once
+#include "kernel/integrator/guiding.h"
#include "kernel/integrator/shade_volume.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/integrator/volume_stack.h"
CCL_NAMESPACE_BEGIN
@@ -15,9 +16,9 @@ ccl_device_inline bool shadow_intersections_has_remaining(const uint num_hits)
}
#ifdef __TRANSPARENT_SHADOWS__
-ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
- IntegratorShadowState state,
- const int hit)
+ccl_device_inline Spectrum integrate_transparent_surface_shadow(KernelGlobals kg,
+ IntegratorShadowState state,
+ const int hit)
{
PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_SURFACE);
@@ -40,7 +41,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
/* Evaluate shader. */
if (!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_SHADOW>(
kg, state, shadow_sd, NULL, PATH_RAY_SHADOW);
}
@@ -50,7 +51,7 @@ ccl_device_inline float3 integrate_transparent_surface_shadow(KernelGlobals kg,
# endif
/* Compute transparency from closures. */
- return shader_bsdf_transparency(kg, shadow_sd);
+ return surface_shader_transparency(kg, shadow_sd);
}
# ifdef __VOLUME__
@@ -58,7 +59,7 @@ ccl_device_inline void integrate_transparent_volume_shadow(KernelGlobals kg,
IntegratorShadowState state,
const int hit,
const int num_recorded_hits,
- ccl_private float3 *ccl_restrict
+ ccl_private Spectrum *ccl_restrict
throughput)
{
PROFILING_INIT(kg, PROFILING_SHADE_SHADOW_VOLUME);
@@ -100,7 +101,7 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
if (hit < num_recorded_hits || !shadow_intersections_has_remaining(num_hits)) {
# ifdef __VOLUME__
if (!integrator_state_shadow_volume_stack_is_empty(kg, state)) {
- float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
+ Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput);
integrate_transparent_volume_shadow(kg, state, hit, num_recorded_hits, &throughput);
if (is_zero(throughput)) {
return true;
@@ -113,8 +114,8 @@ ccl_device_inline bool integrate_transparent_shadow(KernelGlobals kg,
/* Surface shaders. */
if (hit < num_recorded_hits) {
- const float3 shadow = integrate_transparent_surface_shadow(kg, state, hit);
- const float3 throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
+ const Spectrum shadow = integrate_transparent_surface_shadow(kg, state, hit);
+ const Spectrum throughput = INTEGRATOR_STATE(state, shadow_path, throughput) * shadow;
if (is_zero(throughput)) {
return true;
}
@@ -165,7 +166,8 @@ ccl_device void integrator_shade_shadow(KernelGlobals kg,
return;
}
else {
- kernel_accum_light(kg, state, render_buffer);
+ guiding_record_direct_light(kg, state);
+ film_write_direct_light(kg, state, render_buffer);
integrator_shadow_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW);
return;
}
diff --git a/intern/cycles/kernel/integrator/shade_surface.h b/intern/cycles/kernel/integrator/shade_surface.h
index 1514b3956ad..067d35ef9e3 100644
--- a/intern/cycles/kernel/integrator/shade_surface.h
+++ b/intern/cycles/kernel/integrator/shade_surface.h
@@ -3,14 +3,16 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/film/passes.h"
+#include "kernel/film/data_passes.h"
+#include "kernel/film/denoising_passes.h"
+#include "kernel/film/light_passes.h"
#include "kernel/integrator/mnee.h"
+#include "kernel/integrator/guiding.h"
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
#include "kernel/integrator/subsurface.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/integrator/volume_stack.h"
#include "kernel/light/light.h"
@@ -31,7 +33,52 @@ ccl_device_forceinline void integrate_surface_shader_setup(KernelGlobals kg,
shader_setup_from_ray(kg, sd, &ray, &isect);
}
-#ifdef __HOLDOUT__
+ccl_device_forceinline float3 integrate_surface_ray_offset(KernelGlobals kg,
+ const ccl_private ShaderData *sd,
+ const float3 ray_P,
+ const float3 ray_D)
+{
+ /* No ray offset needed for other primitive types. */
+ if (!(sd->type & PRIMITIVE_TRIANGLE)) {
+ return ray_P;
+ }
+
+ /* Self intersection tests already account for the case where a ray hits the
+ * same primitive. However precision issues can still cause neighboring
+ * triangles to be hit. Here we test if the ray-triangle intersection with
+ * the same primitive would miss, implying that a neighboring triangle would
+ * be hit instead.
+ *
+ * This relies on triangle intersection to be watertight, and the object inverse
+ * object transform to match the one used by ray intersection exactly.
+ *
+ * Potential improvements:
+ * - It appears this happens when either barycentric coordinates are small,
+ * or dot(sd->Ng, ray_D) is small. Detect such cases and skip test?
+ * - Instead of ray offset, can we tweak P to lie within the triangle?
+ */
+ const uint tri_vindex = kernel_data_fetch(tri_vindex, sd->prim).w;
+ const packed_float3 tri_a = kernel_data_fetch(tri_verts, tri_vindex + 0),
+ tri_b = kernel_data_fetch(tri_verts, tri_vindex + 1),
+ tri_c = kernel_data_fetch(tri_verts, tri_vindex + 2);
+
+ float3 local_ray_P = ray_P;
+ float3 local_ray_D = ray_D;
+
+ if (!(sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
+ const Transform itfm = object_get_inverse_transform(kg, sd);
+ local_ray_P = transform_point(&itfm, local_ray_P);
+ local_ray_D = transform_direction(&itfm, local_ray_D);
+ }
+
+ if (ray_triangle_intersect_self(local_ray_P, local_ray_D, tri_a, tri_b, tri_c)) {
+ return ray_P;
+ }
+ else {
+ return ray_offset(ray_P, sd->Ng);
+ }
+}
+
ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
ConstIntegratorState state,
ccl_private ShaderData *sd,
@@ -42,24 +89,20 @@ ccl_device_forceinline bool integrate_surface_holdout(KernelGlobals kg,
if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
(path_flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
- const float3 holdout_weight = shader_holdout_apply(kg, sd);
- if (kernel_data.background.transparent) {
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- const float transparent = average(holdout_weight * throughput);
- kernel_accum_holdout(kg, state, path_flag, transparent, render_buffer);
- }
- if (isequal(holdout_weight, one_float3())) {
+ const Spectrum holdout_weight = surface_shader_apply_holdout(kg, sd);
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ const float transparent = average(holdout_weight * throughput);
+ film_write_holdout(kg, state, path_flag, transparent, render_buffer);
+ if (isequal(holdout_weight, one_spectrum())) {
return false;
}
}
return true;
}
-#endif /* __HOLDOUT__ */
-#ifdef __EMISSION__
ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
- ConstIntegratorState state,
+ IntegratorState state,
ccl_private const ShaderData *sd,
ccl_global float *ccl_restrict
render_buffer)
@@ -67,14 +110,15 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
/* Evaluate emissive closure. */
- float3 L = shader_emissive_eval(sd);
+ Spectrum L = surface_shader_emission(sd);
+ float mis_weight = 1.0f;
-# ifdef __HAIR__
+#ifdef __HAIR__
if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) &&
(sd->type & PRIMITIVE_TRIANGLE))
-# else
+#else
if (!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
-# endif
+#endif
{
const float bsdf_pdf = INTEGRATOR_STATE(state, path, mis_ray_pdf);
const float t = sd->ray_length;
@@ -82,17 +126,14 @@ ccl_device_forceinline void integrate_surface_emission(KernelGlobals kg,
/* Multiple importance sampling, get triangle light pdf,
* and compute weight with respect to BSDF pdf. */
float pdf = triangle_light_pdf(kg, sd, t);
- float mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
- L *= mis_weight;
+ mis_weight = light_sample_mis_weight_forward(kg, bsdf_pdf, pdf);
}
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_accum_emission(
- kg, state, throughput * L, render_buffer, object_lightgroup(kg, sd->object));
+ guiding_record_surface_emission(kg, state, L, mis_weight);
+ film_write_surface_emission(
+ kg, state, L, mis_weight, render_buffer, object_lightgroup(kg, sd->object));
}
-#endif /* __EMISSION__ */
-#ifdef __EMISSION__
/* Path tracing: sample point on light and evaluate light shader, then
* queue shadow ray to be traced. */
template<uint node_feature_mask>
@@ -111,11 +152,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
- float light_u, light_v;
- path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+ const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
if (!light_distribution_sample_from_position(
- kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, &ls)) {
+ kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, &ls)) {
return;
}
}
@@ -133,9 +173,10 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
Ray ray ccl_optional_struct_init;
BsdfEval bsdf_eval ccl_optional_struct_init;
- const bool is_transmission = shader_bsdf_is_transmission(sd, ls.D);
-# ifdef __MNEE__
+ const bool is_transmission = dot(ls.D, sd->N) < 0.0f;
+
+#ifdef __MNEE__
int mnee_vertex_count = 0;
IF_KERNEL_FEATURE(MNEE)
{
@@ -144,13 +185,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
const bool use_caustics = kernel_data_fetch(lights, ls.lamp).use_caustics;
if (use_caustics) {
/* Are we on a caustic caster? */
- if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER))
+ if (is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_CASTER)) {
return;
+ }
/* Are we on a caustic receiver? */
- if (!is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_RECEIVER))
+ if (!is_transmission && (sd->object_flag & SD_OBJECT_CAUSTICS_RECEIVER)) {
mnee_vertex_count = kernel_path_mnee_sample(
kg, state, sd, emission_sd, rng_state, &ls, &bsdf_eval);
+ }
}
}
}
@@ -161,15 +204,15 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
light_sample_to_surface_shadow_ray(kg, emission_sd, &ls, &ray);
}
else
-# endif /* __MNEE__ */
+#endif /* __MNEE__ */
{
- const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
+ const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
if (is_zero(light_eval)) {
return;
}
/* Evaluate BSDF. */
- const float bsdf_pdf = shader_bsdf_eval(kg, sd, ls.D, is_transmission, &bsdf_eval, ls.shader);
+ const float bsdf_pdf = surface_shader_bsdf_eval(kg, state, sd, ls.D, &bsdf_eval, ls.shader);
bsdf_eval_mul(&bsdf_eval, light_eval / ls.pdf);
if (ls.shader & SHADER_USE_MIS) {
@@ -197,9 +240,13 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
if (is_transmission) {
-# ifdef __VOLUME__
+#ifdef __VOLUME__
shadow_volume_stack_enter_exit(kg, shadow_state, sd);
-# endif
+#endif
+ }
+
+ if (ray.self.object != OBJECT_NONE) {
+ ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D);
}
/* Write shadow ray and associated state to global memory. */
@@ -213,11 +260,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
/* Copy state from main path to shadow path. */
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * bsdf_eval_sum(&bsdf_eval);
+ const Spectrum unlit_throughput = INTEGRATOR_STATE(state, path, throughput);
+ const Spectrum throughput = unlit_throughput * bsdf_eval_sum(&bsdf_eval);
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
- packed_float3 pass_diffuse_weight;
- packed_float3 pass_glossy_weight;
+ PackedSpectrum pass_diffuse_weight;
+ PackedSpectrum pass_glossy_weight;
if (shadow_flag & PATH_RAY_ANY_PASS) {
/* Indirect bounce, use weights from earlier surface or volume bounce. */
@@ -227,8 +275,8 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
else {
/* Direct light, use BSDFs at this bounce. */
shadow_flag |= PATH_RAY_SURFACE_PASS;
- pass_diffuse_weight = packed_float3(bsdf_eval_pass_diffuse_weight(&bsdf_eval));
- pass_glossy_weight = packed_float3(bsdf_eval_pass_glossy_weight(&bsdf_eval));
+ pass_diffuse_weight = PackedSpectrum(bsdf_eval_pass_diffuse_weight(&bsdf_eval));
+ pass_glossy_weight = PackedSpectrum(bsdf_eval_pass_glossy_weight(&bsdf_eval));
}
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
@@ -250,7 +298,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, glossy_bounce) = INTEGRATOR_STATE(
state, path, glossy_bounce);
-# ifdef __MNEE__
+#ifdef __MNEE__
if (mnee_vertex_count > 0) {
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) =
INTEGRATOR_STATE(state, path, transmission_bounce) + mnee_vertex_count - 1;
@@ -262,7 +310,7 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
bounce) = INTEGRATOR_STATE(state, path, bounce) + mnee_vertex_count;
}
else
-# endif
+#endif
{
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, transmission_bounce) = INTEGRATOR_STATE(
state, path, transmission_bounce);
@@ -283,8 +331,12 @@ ccl_device_forceinline void integrate_surface_direct_light(KernelGlobals kg,
shadow_state, shadow_path, lightgroup) = (ls.type != LIGHT_BACKGROUND) ?
ls.group + 1 :
kernel_data.background.lightgroup + 1;
-}
+#ifdef __PATH_GUIDING__
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = unlit_throughput;
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = INTEGRATOR_STATE(
+ state, guiding, path_segment);
#endif
+}
/* Path tracing: bounce off or through surface with new direction. */
ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
@@ -298,9 +350,8 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
return LABEL_NONE;
}
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
- ccl_private const ShaderClosure *sc = shader_bsdf_bssrdf_pick(sd, &bsdf_u);
+ float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
+ ccl_private const ShaderClosure *sc = surface_shader_bsdf_bssrdf_pick(sd, &rand_bsdf);
#ifdef __SUBSURFACE__
/* BSSRDF closure, we schedule subsurface intersection kernel. */
@@ -310,17 +361,52 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
#endif
/* BSDF closure, sample direction. */
- float bsdf_pdf;
+ float bsdf_pdf = 0.0f, unguided_bsdf_pdf = 0.0f;
BsdfEval bsdf_eval ccl_optional_struct_init;
float3 bsdf_omega_in ccl_optional_struct_init;
- differential3 bsdf_domega_in ccl_optional_struct_init;
int label;
- label = shader_bsdf_sample_closure(
- kg, sd, sc, bsdf_u, bsdf_v, &bsdf_eval, &bsdf_omega_in, &bsdf_domega_in, &bsdf_pdf);
+ float2 bsdf_sampled_roughness = make_float2(1.0f, 1.0f);
+ float bsdf_eta = 1.0f;
+
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ if (kernel_data.integrator.use_surface_guiding) {
+ label = surface_shader_bsdf_guided_sample_closure(kg,
+ state,
+ sd,
+ sc,
+ rand_bsdf,
+ &bsdf_eval,
+ &bsdf_omega_in,
+ &bsdf_pdf,
+ &unguided_bsdf_pdf,
+ &bsdf_sampled_roughness,
+ &bsdf_eta);
+
+ if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
+ return LABEL_NONE;
+ }
- if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
- return LABEL_NONE;
+ INTEGRATOR_STATE_WRITE(state, path, unguided_throughput) *= bsdf_pdf / unguided_bsdf_pdf;
+ }
+ else
+#endif
+ {
+ label = surface_shader_bsdf_sample_closure(kg,
+ sd,
+ sc,
+ rand_bsdf,
+ &bsdf_eval,
+ &bsdf_omega_in,
+ &bsdf_pdf,
+ &bsdf_sampled_roughness,
+ &bsdf_eta);
+
+ if (bsdf_pdf == 0.0f || bsdf_eval_is_zero(&bsdf_eval)) {
+ return LABEL_NONE;
+ }
+
+ unguided_bsdf_pdf = bsdf_pdf;
}
if (label & LABEL_TRANSPARENT) {
@@ -329,20 +415,19 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
}
else {
/* Setup ray with changed origin and direction. */
- INTEGRATOR_STATE_WRITE(state, ray, P) = sd->P;
- INTEGRATOR_STATE_WRITE(state, ray, D) = normalize(bsdf_omega_in);
+ const float3 D = normalize(bsdf_omega_in);
+ INTEGRATOR_STATE_WRITE(state, ray, P) = integrate_surface_ray_offset(kg, sd, sd->P, D);
+ INTEGRATOR_STATE_WRITE(state, ray, D) = D;
INTEGRATOR_STATE_WRITE(state, ray, tmin) = 0.0f;
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
#ifdef __RAY_DIFFERENTIALS__
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
- INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(bsdf_domega_in);
#endif
}
/* Update throughput. */
- float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- throughput *= bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
- INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
+ const Spectrum bsdf_weight = bsdf_eval_sum(&bsdf_eval) / bsdf_pdf;
+ INTEGRATOR_STATE_WRITE(state, path, throughput) *= bsdf_weight;
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
@@ -357,10 +442,21 @@ ccl_device_forceinline int integrate_surface_bsdf_bssrdf_bounce(
if (!(label & LABEL_TRANSPARENT)) {
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = bsdf_pdf;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
- bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
+ unguided_bsdf_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
}
path_state_next(kg, state, label);
+
+ guiding_record_surface_bounce(kg,
+ state,
+ sd,
+ bsdf_weight,
+ bsdf_pdf,
+ sd->N,
+ normalize(bsdf_omega_in),
+ bsdf_sampled_roughness,
+ bsdf_eta);
+
return label;
}
@@ -382,14 +478,15 @@ ccl_device_forceinline int integrate_surface_volume_only_bounce(IntegratorState
ccl_device_forceinline bool integrate_surface_terminate(IntegratorState state,
const uint32_t path_flag)
{
- const float probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
- 0.0f :
- INTEGRATOR_STATE(state, path, continuation_probability);
- if (probability == 0.0f) {
+ const float continuation_probability = (path_flag & PATH_RAY_TERMINATE_ON_NEXT_SURFACE) ?
+ 0.0f :
+ INTEGRATOR_STATE(
+ state, path, continuation_probability);
+ if (continuation_probability == 0.0f) {
return true;
}
- else if (probability != 1.0f) {
- INTEGRATOR_STATE_WRITE(state, path, throughput) /= probability;
+ else if (continuation_probability != 1.0f) {
+ INTEGRATOR_STATE_WRITE(state, path, throughput) /= continuation_probability;
}
return false;
@@ -408,22 +505,24 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
return;
}
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+ const float2 rand_bsdf = path_state_rng_2D(kg, rng_state, PRNG_SURFACE_BSDF);
float3 ao_N;
- const float3 ao_weight = shader_bsdf_ao(
+ const Spectrum ao_weight = surface_shader_ao(
kg, sd, kernel_data.integrator.ao_additive_factor, &ao_N);
float3 ao_D;
float ao_pdf;
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+ sample_cos_hemisphere(ao_N, rand_bsdf.x, rand_bsdf.y, &ao_D, &ao_pdf);
bool skip_self = true;
Ray ray ccl_optional_struct_init;
ray.P = shadow_ray_offset(kg, sd, ao_D, &skip_self);
ray.D = ao_D;
+ if (skip_self) {
+ ray.P = integrate_surface_ray_offset(kg, sd, ray.P, ray.D);
+ }
ray.tmin = 0.0f;
ray.tmax = kernel_data.integrator.ao_bounces_distance;
ray.time = sd->time;
@@ -452,7 +551,8 @@ ccl_device_forceinline void integrate_surface_ao(KernelGlobals kg,
const uint16_t bounce = INTEGRATOR_STATE(state, path, bounce);
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag) | PATH_RAY_SHADOW_FOR_AO;
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput) * shader_bsdf_alpha(kg, sd);
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput) *
+ surface_shader_alpha(kg, sd);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, render_pixel_index) = INTEGRATOR_STATE(
state, path, render_pixel_index);
@@ -494,6 +594,8 @@ ccl_device bool integrate_surface(KernelGlobals kg,
#ifdef __VOLUME__
if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
#endif
+ guiding_record_surface_segment(kg, state, &sd);
+
#ifdef __SUBSURFACE__
/* Can skip shader evaluation for BSSRDF exit point without bump mapping. */
if (!(path_flag & PATH_RAY_SUBSURFACE) || ((sd.flag & SD_HAS_BSSRDF_BUMP)))
@@ -501,7 +603,7 @@ ccl_device bool integrate_surface(KernelGlobals kg,
{
/* Evaluate shader. */
PROFILING_EVENT(PROFILING_SHADE_SURFACE_EVAL);
- shader_eval_surface<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
+ surface_shader_eval<node_feature_mask>(kg, state, &sd, render_buffer, path_flag);
/* Initialize additional RNG for BSDFs. */
if (sd.flag & SD_BSDF_NEEDS_LCG) {
@@ -523,21 +625,17 @@ ccl_device bool integrate_surface(KernelGlobals kg,
#endif
{
/* Filter closures. */
- shader_prepare_surface_closures(kg, state, &sd, path_flag);
+ surface_shader_prepare_closures(kg, state, &sd, path_flag);
-#ifdef __HOLDOUT__
/* Evaluate holdout. */
if (!integrate_surface_holdout(kg, state, &sd, render_buffer)) {
return false;
}
-#endif
-#ifdef __EMISSION__
/* Write emission. */
if (sd.flag & SD_EMISSION) {
integrate_surface_emission(kg, state, &sd, render_buffer);
}
-#endif
/* Perform path termination. Most paths have already been terminated in
* the intersect_closest kernel, this is just for emission and for dividing
@@ -551,11 +649,11 @@ ccl_device bool integrate_surface(KernelGlobals kg,
/* Write render passes. */
#ifdef __PASSES__
PROFILING_EVENT(PROFILING_SHADE_SURFACE_PASSES);
- kernel_write_data_passes(kg, state, &sd, render_buffer);
+ film_write_data_passes(kg, state, &sd, render_buffer);
#endif
#ifdef __DENOISING_FEATURES__
- kernel_write_denoising_features_surface(kg, state, &sd, render_buffer);
+ film_write_denoising_features_surface(kg, state, &sd, render_buffer);
#endif
}
@@ -563,6 +661,10 @@ ccl_device bool integrate_surface(KernelGlobals kg,
RNGState rng_state;
path_state_rng_load(state, &rng_state);
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ surface_shader_prepare_guiding(kg, state, &sd, &rng_state);
+ guiding_write_debug_passes(kg, state, &sd, render_buffer);
+#endif
/* Direct light. */
PROFILING_EVENT(PROFILING_SHADE_SURFACE_DIRECT_LIGHT);
integrate_surface_direct_light<node_feature_mask>(kg, state, &sd, &rng_state);
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index 4aab097a7d8..a8324cda2dc 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -3,12 +3,14 @@
#pragma once
-#include "kernel/film/accumulate.h"
-#include "kernel/film/passes.h"
+#include "kernel/film/data_passes.h"
+#include "kernel/film/denoising_passes.h"
+#include "kernel/film/light_passes.h"
+#include "kernel/integrator/guiding.h"
#include "kernel/integrator/intersect_closest.h"
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/volume_shader.h"
#include "kernel/integrator/volume_stack.h"
#include "kernel/light/light.h"
@@ -29,13 +31,13 @@ typedef enum VolumeIntegrateEvent {
typedef struct VolumeIntegrateResult {
/* Throughput and offset for direct light scattering. */
bool direct_scatter;
- float3 direct_throughput;
+ Spectrum direct_throughput;
float direct_t;
ShaderVolumePhases direct_phases;
/* Throughput and offset for indirect light scattering. */
bool indirect_scatter;
- float3 indirect_throughput;
+ Spectrum indirect_throughput;
float indirect_t;
ShaderVolumePhases indirect_phases;
} VolumeIntegrateResult;
@@ -52,19 +54,19 @@ typedef struct VolumeIntegrateResult {
* sigma_t = sigma_a + sigma_s */
typedef struct VolumeShaderCoefficients {
- float3 sigma_t;
- float3 sigma_s;
- float3 emission;
+ Spectrum sigma_t;
+ Spectrum sigma_s;
+ Spectrum emission;
} VolumeShaderCoefficients;
/* Evaluate shader to get extinction coefficient at P. */
ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg,
IntegratorShadowState state,
ccl_private ShaderData *ccl_restrict sd,
- ccl_private float3 *ccl_restrict extinction)
+ ccl_private Spectrum *ccl_restrict extinction)
{
VOLUME_READ_LAMBDA(integrator_state_read_shadow_volume_stack(state, i))
- shader_eval_volume<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass);
+ volume_shader_eval<true>(kg, state, sd, PATH_RAY_SHADOW, volume_read_lambda_pass);
if (!(sd->flag & SD_EXTINCTION)) {
return false;
@@ -83,15 +85,16 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals kg,
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
- shader_eval_volume<false>(kg, state, sd, path_flag, volume_read_lambda_pass);
+ volume_shader_eval<false>(kg, state, sd, path_flag, volume_read_lambda_pass);
if (!(sd->flag & (SD_EXTINCTION | SD_SCATTER | SD_EMISSION))) {
return false;
}
- coeff->sigma_s = zero_float3();
- coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction : zero_float3();
- coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_float3();
+ coeff->sigma_s = zero_spectrum();
+ coeff->sigma_t = (sd->flag & SD_EXTINCTION) ? sd->closure_transparent_extinction :
+ zero_spectrum();
+ coeff->emission = (sd->flag & SD_EMISSION) ? sd->closure_emission_background : zero_spectrum();
if (sd->flag & SD_SCATTER) {
for (int i = 0; i < sd->num_closure; i++) {
@@ -143,11 +146,11 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
/* Perform shading at this offset within a step, to integrate over
* over the entire step segment. */
- *step_shade_offset = path_state_rng_1D_hash(kg, rng_state, 0x1e31d8a4);
+ *step_shade_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SHADE_OFFSET);
/* Shift starting point of all segment by this random amount to avoid
* banding artifacts from the volume bounding shape. */
- *steps_offset = path_state_rng_1D_hash(kg, rng_state, 0x3d22c7b3);
+ *steps_offset = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_OFFSET);
}
}
@@ -162,9 +165,9 @@ ccl_device_forceinline void volume_step_init(KernelGlobals kg,
ccl_device void volume_shadow_homogeneous(KernelGlobals kg, IntegratorState state,
ccl_private Ray *ccl_restrict ray,
ccl_private ShaderData *ccl_restrict sd,
- ccl_global float3 *ccl_restrict throughput)
+ ccl_global Spectrum *ccl_restrict throughput)
{
- float3 sigma_t = zero_float3();
+ Spectrum sigma_t = zero_spectrum();
if (shadow_volume_shader_sample(kg, state, sd, &sigma_t)) {
*throughput *= volume_color_transmittance(sigma_t, ray->tmax - ray->tmin);
@@ -178,14 +181,14 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
IntegratorShadowState state,
ccl_private Ray *ccl_restrict ray,
ccl_private ShaderData *ccl_restrict sd,
- ccl_private float3 *ccl_restrict throughput,
+ ccl_private Spectrum *ccl_restrict throughput,
const float object_step_size)
{
/* Load random number state. */
RNGState rng_state;
shadow_path_state_rng_load(state, &rng_state);
- float3 tp = *throughput;
+ Spectrum tp = *throughput;
/* Prepare for stepping.
* For shadows we do not offset all segments, since the starting point is
@@ -207,7 +210,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
/* compute extinction at the start */
float t = ray->tmin;
- float3 sum = zero_float3();
+ Spectrum sum = zero_spectrum();
for (int i = 0; i < max_steps; i++) {
/* advance to new position */
@@ -215,7 +218,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
float dt = new_t - t;
float3 new_P = ray->P + ray->D * (t + dt * step_shade_offset);
- float3 sigma_t = zero_float3();
+ Spectrum sigma_t = zero_spectrum();
/* compute attenuation over segment */
sd->P = new_P;
@@ -228,8 +231,7 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg,
tp = *throughput * exp(sum);
/* stop if nearly all light is blocked */
- if (tp.x < VOLUME_THROUGHPUT_EPSILON && tp.y < VOLUME_THROUGHPUT_EPSILON &&
- tp.z < VOLUME_THROUGHPUT_EPSILON)
+ if (reduce_max(tp) < VOLUME_THROUGHPUT_EPSILON)
break;
}
}
@@ -334,22 +336,22 @@ ccl_device float volume_equiangular_cdf(ccl_private const Ray *ccl_restrict ray,
/* Distance sampling */
ccl_device float volume_distance_sample(float max_t,
- float3 sigma_t,
+ Spectrum sigma_t,
int channel,
float xi,
- ccl_private float3 *transmittance,
- ccl_private float3 *pdf)
+ ccl_private Spectrum *transmittance,
+ ccl_private Spectrum *pdf)
{
/* xi is [0, 1[ so log(0) should never happen, division by zero is
* avoided because sample_sigma_t > 0 when SD_SCATTER is set */
float sample_sigma_t = volume_channel_get(sigma_t, channel);
- float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t);
float sample_transmittance = volume_channel_get(full_transmittance, channel);
float sample_t = min(max_t, -logf(1.0f - xi * (1.0f - sample_transmittance)) / sample_sigma_t);
*transmittance = volume_color_transmittance(sigma_t, sample_t);
- *pdf = safe_divide_color(sigma_t * *transmittance, one_float3() - full_transmittance);
+ *pdf = safe_divide_color(sigma_t * *transmittance, one_spectrum() - full_transmittance);
/* todo: optimization: when taken together with hit/miss decision,
* the full_transmittance cancels out drops out and xi does not
@@ -358,33 +360,36 @@ ccl_device float volume_distance_sample(float max_t,
return sample_t;
}
-ccl_device float3 volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
+ccl_device Spectrum volume_distance_pdf(float max_t, Spectrum sigma_t, float sample_t)
{
- float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
- float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+ Spectrum full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ Spectrum transmittance = volume_color_transmittance(sigma_t, sample_t);
- return safe_divide_color(sigma_t * transmittance, one_float3() - full_transmittance);
+ return safe_divide_color(sigma_t * transmittance, one_spectrum() - full_transmittance);
}
/* Emission */
-ccl_device float3 volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
- int closure_flag,
- float3 transmittance,
- float t)
+ccl_device Spectrum volume_emission_integrate(ccl_private VolumeShaderCoefficients *coeff,
+ int closure_flag,
+ Spectrum transmittance,
+ float t)
{
/* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
* this goes to E * t as sigma_t goes to zero
*
* todo: we should use an epsilon to avoid precision issues near zero sigma_t */
- float3 emission = coeff->emission;
+ Spectrum emission = coeff->emission;
if (closure_flag & SD_EXTINCTION) {
- float3 sigma_t = coeff->sigma_t;
+ Spectrum sigma_t = coeff->sigma_t;
- emission.x *= (sigma_t.x > 0.0f) ? (1.0f - transmittance.x) / sigma_t.x : t;
- emission.y *= (sigma_t.y > 0.0f) ? (1.0f - transmittance.y) / sigma_t.y : t;
- emission.z *= (sigma_t.z > 0.0f) ? (1.0f - transmittance.z) / sigma_t.z : t;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ GET_SPECTRUM_CHANNEL(emission, i) *= (GET_SPECTRUM_CHANNEL(sigma_t, i) > 0.0f) ?
+ (1.0f - GET_SPECTRUM_CHANNEL(transmittance, i)) /
+ GET_SPECTRUM_CHANNEL(sigma_t, i) :
+ t;
+ }
}
else
emission *= t;
@@ -419,14 +424,14 @@ ccl_device_forceinline void volume_integrate_step_scattering(
ccl_private const Ray *ray,
const float3 equiangular_light_P,
ccl_private const VolumeShaderCoefficients &ccl_restrict coeff,
- const float3 transmittance,
+ const Spectrum transmittance,
ccl_private VolumeIntegrateState &ccl_restrict vstate,
ccl_private VolumeIntegrateResult &ccl_restrict result)
{
/* Pick random color channel, we use the Veach one-sample
* model with balance heuristic for the channels. */
- const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
- float3 channel_pdf;
+ const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ Spectrum channel_pdf;
const int channel = volume_sample_channel(
albedo, result.indirect_throughput, vstate.rphase, &channel_pdf);
@@ -435,11 +440,11 @@ ccl_device_forceinline void volume_integrate_step_scattering(
if (result.direct_t >= vstate.tmin && result.direct_t <= vstate.tmax &&
vstate.equiangular_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
const float new_dt = result.direct_t - vstate.tmin;
- const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
result.direct_scatter = true;
result.direct_throughput *= coeff.sigma_s * new_transmittance / vstate.equiangular_pdf;
- shader_copy_volume_phases(&result.direct_phases, sd);
+ volume_shader_copy_phases(&result.direct_phases, sd);
/* Multiple importance sampling. */
if (vstate.use_mis) {
@@ -467,7 +472,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
const float new_t = vstate.tmin + new_dt;
/* transmittance and pdf */
- const float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
+ const Spectrum new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt);
const float distance_pdf = dot(channel_pdf, coeff.sigma_t * new_transmittance);
if (vstate.distance_pdf * distance_pdf > VOLUME_SAMPLE_PDF_CUTOFF) {
@@ -475,7 +480,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
result.indirect_scatter = true;
result.indirect_t = new_t;
result.indirect_throughput *= coeff.sigma_s * new_transmittance / distance_pdf;
- shader_copy_volume_phases(&result.indirect_phases, sd);
+ volume_shader_copy_phases(&result.indirect_phases, sd);
if (vstate.direct_sample_method != VOLUME_SAMPLE_EQUIANGULAR) {
/* If using distance sampling for direct light, just copy parameters
@@ -483,7 +488,7 @@ ccl_device_forceinline void volume_integrate_step_scattering(
result.direct_scatter = true;
result.direct_t = result.indirect_t;
result.direct_throughput = result.indirect_throughput;
- shader_copy_volume_phases(&result.direct_phases, sd);
+ volume_shader_copy_phases(&result.direct_phases, sd);
/* Multiple importance sampling. */
if (vstate.use_mis) {
@@ -546,8 +551,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
vstate.tmin = ray->tmin;
vstate.tmax = ray->tmin;
vstate.absorption_only = true;
- vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_SCATTER_DISTANCE);
- vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_PHASE_CHANNEL);
+ vstate.rscatter = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_SCATTER_DISTANCE);
+ vstate.rphase = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_CHANNEL);
/* Multiple importance sampling: pick between equiangular and distance sampling strategy. */
vstate.direct_sample_method = direct_sample_method;
@@ -566,7 +571,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
vstate.distance_pdf = 1.0f;
/* Initialize volume integration result. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
result.direct_throughput = throughput;
result.indirect_throughput = throughput;
@@ -579,9 +584,9 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
# ifdef __DENOISING_FEATURES__
const bool write_denoising_features = (INTEGRATOR_STATE(state, path, flag) &
PATH_RAY_DENOISING_FEATURES);
- float3 accum_albedo = zero_float3();
+ Spectrum accum_albedo = zero_spectrum();
# endif
- float3 accum_emission = zero_float3();
+ Spectrum accum_emission = zero_spectrum();
for (int i = 0; i < max_steps; i++) {
/* Advance to new position */
@@ -596,18 +601,19 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
/* Evaluate transmittance over segment. */
const float dt = (vstate.tmax - vstate.tmin);
- const float3 transmittance = (closure_flag & SD_EXTINCTION) ?
- volume_color_transmittance(coeff.sigma_t, dt) :
- one_float3();
+ const Spectrum transmittance = (closure_flag & SD_EXTINCTION) ?
+ volume_color_transmittance(coeff.sigma_t, dt) :
+ one_spectrum();
/* Emission. */
if (closure_flag & SD_EMISSION) {
/* Only write emission before indirect light scatter position, since we terminate
* stepping at that point if we have already found a direct light scatter position. */
if (!result.indirect_scatter) {
- const float3 emission = volume_emission_integrate(
+ const Spectrum emission = volume_emission_integrate(
&coeff, closure_flag, transmittance, dt);
accum_emission += result.indirect_throughput * emission;
+ guiding_record_volume_emission(kg, state, emission);
}
}
@@ -616,8 +622,8 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
# ifdef __DENOISING_FEATURES__
/* Accumulate albedo for denoising features. */
if (write_denoising_features && (closure_flag & SD_SCATTER)) {
- const float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
- accum_albedo += result.indirect_throughput * albedo * (one_float3() - transmittance);
+ const Spectrum albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t);
+ accum_albedo += result.indirect_throughput * albedo * (one_spectrum() - transmittance);
}
# endif
@@ -634,7 +640,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
/* Stop if nearly all light blocked. */
if (!result.indirect_scatter) {
if (reduce_max(result.indirect_throughput) < VOLUME_THROUGHPUT_EPSILON) {
- result.indirect_throughput = zero_float3();
+ result.indirect_throughput = zero_spectrum();
break;
}
}
@@ -660,20 +666,19 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
/* Write accumulated emission. */
if (!is_zero(accum_emission)) {
- kernel_accum_emission(
+ film_write_volume_emission(
kg, state, accum_emission, render_buffer, object_lightgroup(kg, sd->object));
}
# ifdef __DENOISING_FEATURES__
/* Write denoising features. */
if (write_denoising_features) {
- kernel_write_denoising_features_volume(
+ film_write_denoising_features_volume(
kg, state, accum_albedo, result.indirect_scatter, render_buffer);
}
# endif /* __DENOISING_FEATURES__ */
}
-# ifdef __EMISSION__
/* Path tracing: sample point on light and evaluate light shader, then
* queue shadow ray to be traced. */
ccl_device_forceinline bool integrate_volume_sample_light(
@@ -691,11 +696,10 @@ ccl_device_forceinline bool integrate_volume_sample_light(
/* Sample position on a light. */
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
- float light_u, light_v;
- path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+ const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
if (!light_distribution_sample_from_volume_segment(
- kg, light_u, light_v, sd->time, sd->P, bounce, path_flag, ls)) {
+ kg, rand_light.x, rand_light.y, sd->time, sd->P, bounce, path_flag, ls)) {
return false;
}
@@ -715,7 +719,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
ccl_private const RNGState *ccl_restrict rng_state,
const float3 P,
ccl_private const ShaderVolumePhases *ccl_restrict phases,
- ccl_private const float3 throughput,
+ ccl_private const Spectrum throughput,
ccl_private LightSample *ccl_restrict ls)
{
PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
@@ -732,11 +736,10 @@ ccl_device_forceinline void integrate_volume_direct_light(
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
- float light_u, light_v;
- path_state_rng_2D(kg, rng_state, PRNG_LIGHT_U, &light_u, &light_v);
+ const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
if (!light_distribution_sample_from_position(
- kg, light_u, light_v, sd->time, P, bounce, path_flag, ls)) {
+ kg, rand_light.x, rand_light.y, sd->time, P, bounce, path_flag, ls)) {
return;
}
}
@@ -753,14 +756,14 @@ ccl_device_forceinline void integrate_volume_direct_light(
* non-constant light sources. */
ShaderDataTinyStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- const float3 light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
+ const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
if (is_zero(light_eval)) {
return;
}
/* Evaluate BSDF. */
BsdfEval phase_eval ccl_optional_struct_init;
- const float phase_pdf = shader_volume_phase_eval(kg, sd, phases, ls->D, &phase_eval);
+ float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls->D, &phase_eval);
if (ls->shader & SHADER_USE_MIS) {
float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf);
@@ -796,11 +799,11 @@ ccl_device_forceinline void integrate_volume_direct_light(
const uint16_t transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
uint32_t shadow_flag = INTEGRATOR_STATE(state, path, flag);
shadow_flag |= (is_light) ? PATH_RAY_SHADOW_FOR_LIGHT : 0;
- const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
+ const Spectrum throughput_phase = throughput * bsdf_eval_sum(&phase_eval);
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
- packed_float3 pass_diffuse_weight;
- packed_float3 pass_glossy_weight;
+ PackedSpectrum pass_diffuse_weight;
+ PackedSpectrum pass_glossy_weight;
if (shadow_flag & PATH_RAY_ANY_PASS) {
/* Indirect bounce, use weights from earlier surface or volume bounce. */
@@ -810,8 +813,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
else {
/* Direct light, no diffuse/glossy distinction needed for volumes. */
shadow_flag |= PATH_RAY_VOLUME_PASS;
- pass_diffuse_weight = packed_float3(one_float3());
- pass_glossy_weight = packed_float3(zero_float3());
+ pass_diffuse_weight = one_spectrum();
+ pass_glossy_weight = zero_spectrum();
}
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, pass_diffuse_weight) = pass_diffuse_weight;
@@ -847,9 +850,14 @@ ccl_device_forceinline void integrate_volume_direct_light(
ls->group + 1 :
kernel_data.background.lightgroup + 1;
+# ifdef __PATH_GUIDING__
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, unlit_throughput) = throughput;
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = INTEGRATOR_STATE(
+ state, guiding, path_segment);
+# endif
+
integrator_state_copy_volume_stack_to_shadow(kg, shadow_state, state);
}
-# endif
/* Path tracing: scatter in new direction using phase function */
ccl_device_forceinline bool integrate_volume_phase_scatter(
@@ -861,27 +869,54 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
{
PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INDIRECT_LIGHT);
- float phase_u, phase_v;
- path_state_rng_2D(kg, rng_state, PRNG_BSDF_U, &phase_u, &phase_v);
+ float2 rand_phase = path_state_rng_2D(kg, rng_state, PRNG_VOLUME_PHASE);
+
+ ccl_private const ShaderVolumeClosure *svc = volume_shader_phase_pick(phases, &rand_phase);
/* Phase closure, sample direction. */
- float phase_pdf;
+ float phase_pdf = 0.0f, unguided_phase_pdf = 0.0f;
BsdfEval phase_eval ccl_optional_struct_init;
float3 phase_omega_in ccl_optional_struct_init;
- differential3 phase_domega_in ccl_optional_struct_init;
-
- const int label = shader_volume_phase_sample(kg,
- sd,
- phases,
- phase_u,
- phase_v,
- &phase_eval,
- &phase_omega_in,
- &phase_domega_in,
- &phase_pdf);
-
- if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
- return false;
+ float sampled_roughness = 1.0f;
+ int label;
+
+# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ if (kernel_data.integrator.use_guiding) {
+ label = volume_shader_phase_guided_sample(kg,
+ state,
+ sd,
+ svc,
+ rand_phase,
+ &phase_eval,
+ &phase_omega_in,
+ &phase_pdf,
+ &unguided_phase_pdf,
+ &sampled_roughness);
+
+ if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
+ return false;
+ }
+
+ INTEGRATOR_STATE_WRITE(state, path, unguided_throughput) *= phase_pdf / unguided_phase_pdf;
+ }
+ else
+# endif
+ {
+ label = volume_shader_phase_sample(kg,
+ sd,
+ phases,
+ svc,
+ rand_phase,
+ &phase_eval,
+ &phase_omega_in,
+ &phase_pdf,
+ &sampled_roughness);
+
+ if (phase_pdf == 0.0f || bsdf_eval_is_zero(&phase_eval)) {
+ return false;
+ }
+
+ unguided_phase_pdf = phase_pdf;
}
/* Setup ray. */
@@ -891,26 +926,31 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
INTEGRATOR_STATE_WRITE(state, ray, tmax) = FLT_MAX;
# ifdef __RAY_DIFFERENTIALS__
INTEGRATOR_STATE_WRITE(state, ray, dP) = differential_make_compact(sd->dP);
- INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_make_compact(phase_domega_in);
# endif
// Save memory by storing last hit prim and object in isect
INTEGRATOR_STATE_WRITE(state, isect, prim) = sd->prim;
INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
+ const Spectrum phase_weight = bsdf_eval_sum(&phase_eval) / phase_pdf;
+
+ /* Add phase function sampling data to the path segment. */
+ guiding_record_volume_bounce(
+ kg, state, sd, phase_weight, phase_pdf, normalize(phase_omega_in), sampled_roughness);
+
/* Update throughput. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- const float3 throughput_phase = throughput * bsdf_eval_sum(&phase_eval) / phase_pdf;
+ const Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
+ const Spectrum throughput_phase = throughput * phase_weight;
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput_phase;
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
- INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
- INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
+ INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum();
+ INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum();
}
/* Update path state */
INTEGRATOR_STATE_WRITE(state, path, mis_ray_pdf) = phase_pdf;
INTEGRATOR_STATE_WRITE(state, path, min_ray_pdf) = fminf(
- phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
+ unguided_phase_pdf, INTEGRATOR_STATE(state, path, min_ray_pdf));
path_state_next(kg, state, label);
return true;
@@ -949,6 +989,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
VOLUME_READ_LAMBDA(integrator_state_read_volume_stack(state, i))
const float step_size = volume_stack_step_size(kg, volume_read_lambda_pass);
+# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ const float3 initial_throughput = INTEGRATOR_STATE(state, path, throughput);
+# endif
+
/* TODO: expensive to zero closures? */
VolumeIntegrateResult result = {};
volume_integrate_heterogeneous(kg,
@@ -966,17 +1010,50 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
* to be terminated. That will shading evaluating to leave out any scattering closures,
* but emission and absorption are still handled for multiple importance sampling. */
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
- const float probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
- 0.0f :
- INTEGRATOR_STATE(state, path, continuation_probability);
- if (probability == 0.0f) {
+ const float continuation_probability = (path_flag & PATH_RAY_TERMINATE_IN_NEXT_VOLUME) ?
+ 0.0f :
+ INTEGRATOR_STATE(
+ state, path, continuation_probability);
+ if (continuation_probability == 0.0f) {
return VOLUME_PATH_MISSED;
}
+# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ bool guiding_generated_new_segment = false;
+ if (kernel_data.integrator.use_guiding) {
+ /* Record transmittance using change in throughput. */
+ float3 transmittance_weight = spectrum_to_rgb(
+ safe_divide_color(result.indirect_throughput, initial_throughput));
+ guiding_record_volume_transmission(kg, state, transmittance_weight);
+
+ if (result.indirect_scatter) {
+ const float3 P = ray->P + result.indirect_t * ray->D;
+
+ /* Record volume segment up to direct scatter position.
+ * TODO: volume segment is wrong when direct_t and indirect_t. */
+ if (result.direct_scatter && (result.direct_t == result.indirect_t)) {
+ guiding_record_volume_segment(kg, state, P, sd.I);
+ guiding_generated_new_segment = true;
+ }
+
+# if PATH_GUIDING_LEVEL >= 4
+ /* TODO: this position will be wrong for direct light pdf computation,
+ * since the direct light position may be different? */
+ volume_shader_prepare_guiding(
+ kg, state, &sd, &rng_state, P, ray->D, &result.direct_phases, direct_sample_method);
+# endif
+ }
+ else {
+ /* No guiding if we don't scatter. */
+ state->guiding.use_volume_guiding = false;
+ }
+ }
+# endif
+
/* Direct light. */
if (result.direct_scatter) {
const float3 direct_P = ray->P + result.direct_t * ray->D;
- result.direct_throughput /= probability;
+ result.direct_throughput /= continuation_probability;
integrate_volume_direct_light(kg,
state,
&sd,
@@ -989,16 +1066,22 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
/* Indirect light.
*
- * Only divide throughput by probability if we scatter. For the attenuation
+ * Only divide throughput by continuation_probability if we scatter. For the attenuation
* case the next surface will already do this division. */
if (result.indirect_scatter) {
- result.indirect_throughput /= probability;
+ result.indirect_throughput /= continuation_probability;
}
INTEGRATOR_STATE_WRITE(state, path, throughput) = result.indirect_throughput;
if (result.indirect_scatter) {
sd.P = ray->P + result.indirect_t * ray->D;
+# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 1
+ if (!guiding_generated_new_segment) {
+ guiding_record_volume_segment(kg, state, sd.P, sd.I);
+ }
+# endif
+
if (integrate_volume_phase_scatter(kg, state, &sd, &rng_state, &result.indirect_phases)) {
return VOLUME_PATH_SCATTERED;
}
diff --git a/intern/cycles/kernel/integrator/shader_eval.h b/intern/cycles/kernel/integrator/shader_eval.h
deleted file mode 100644
index ed4d973e864..00000000000
--- a/intern/cycles/kernel/integrator/shader_eval.h
+++ /dev/null
@@ -1,952 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-/* Functions to evaluate shaders and use the resulting shader closures. */
-
-#pragma once
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf.h"
-#include "kernel/closure/bsdf_util.h"
-#include "kernel/closure/emissive.h"
-
-#include "kernel/film/accumulate.h"
-
-#include "kernel/svm/svm.h"
-
-#ifdef __OSL__
-# include "kernel/osl/shader.h"
-#endif
-
-CCL_NAMESPACE_BEGIN
-
-/* Merging */
-
-#if defined(__VOLUME__)
-ccl_device_inline void shader_merge_volume_closures(ccl_private ShaderData *sd)
-{
- /* Merge identical closures to save closure space with stacked volumes. */
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sci = &sd->closure[i];
-
- if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
- continue;
- }
-
- for (int j = i + 1; j < sd->num_closure; j++) {
- ccl_private ShaderClosure *scj = &sd->closure[j];
- if (sci->type != scj->type) {
- continue;
- }
-
- ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
- sci;
- ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
- scj;
- if (!(hgi->g == hgj->g)) {
- continue;
- }
-
- sci->weight += scj->weight;
- sci->sample_weight += scj->sample_weight;
-
- int size = sd->num_closure - (j + 1);
- if (size > 0) {
- for (int k = 0; k < size; k++) {
- scj[k] = scj[k + 1];
- }
- }
-
- sd->num_closure--;
- kernel_assert(sd->num_closure >= 0);
- j--;
- }
- }
-}
-
-ccl_device_inline void shader_copy_volume_phases(ccl_private ShaderVolumePhases *ccl_restrict
- phases,
- ccl_private const ShaderData *ccl_restrict sd)
-{
- phases->num_closure = 0;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *from_sc = &sd->closure[i];
- ccl_private const HenyeyGreensteinVolume *from_hg =
- (ccl_private const HenyeyGreensteinVolume *)from_sc;
-
- if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
- ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
-
- to_sc->weight = from_sc->weight;
- to_sc->sample_weight = from_sc->sample_weight;
- to_sc->g = from_hg->g;
- phases->num_closure++;
- if (phases->num_closure >= MAX_VOLUME_CLOSURE) {
- break;
- }
- }
- }
-}
-#endif /* __VOLUME__ */
-
-ccl_device_inline void shader_prepare_surface_closures(KernelGlobals kg,
- ConstIntegratorState state,
- ccl_private ShaderData *sd,
- const uint32_t path_flag)
-{
- /* Filter out closures. */
- if (kernel_data.integrator.filter_closures) {
- if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) {
- sd->closure_emission_background = zero_float3();
- }
-
- if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) {
- sd->flag &= ~SD_BSDF_HAS_EVAL;
- }
-
- if (path_flag & PATH_RAY_CAMERA) {
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
-
- if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) ||
- (CLOSURE_IS_BSDF_GLOSSY(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) ||
- (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) {
- sc->type = CLOSURE_NONE_ID;
- sc->sample_weight = 0.0f;
- }
- else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) &&
- (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) {
- sc->type = CLOSURE_HOLDOUT_ID;
- sc->sample_weight = 0.0f;
- sd->flag |= SD_HOLDOUT;
- }
- }
- }
- }
-
- /* Defensive sampling.
- *
- * We can likely also do defensive sampling at deeper bounces, particularly
- * for cases like a perfect mirror but possibly also others. This will need
- * a good heuristic. */
- if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) ==
- 0 &&
- sd->num_closure > 1) {
- float sum = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
- }
- }
- }
-
- /* Filter glossy.
- *
- * Blurring of bsdf after bounces, for rays that have a small likelihood
- * of following this particular path (diffuse, rough glossy) */
- if (kernel_data.integrator.filter_glossy != FLT_MAX
-#ifdef __MNEE__
- && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID)
-#endif
- ) {
- float blur_pdf = kernel_data.integrator.filter_glossy *
- INTEGRATOR_STATE(state, path, min_ray_pdf);
-
- if (blur_pdf < 1.0f) {
- float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF(sc->type)) {
- bsdf_blur(kg, sc, blur_roughness);
- }
- }
- }
- }
-}
-
-/* BSDF */
-
-ccl_device_inline bool shader_bsdf_is_transmission(ccl_private const ShaderData *sd,
- const float3 omega_in)
-{
- return dot(sd->N, omega_in) < 0.0f;
-}
-
-ccl_device_forceinline bool _shader_bsdf_exclude(ClosureType type, uint light_shader_flags)
-{
- if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) {
- return false;
- }
- if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) {
- if (CLOSURE_IS_BSDF_DIFFUSE(type)) {
- return true;
- }
- }
- if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) {
- if (CLOSURE_IS_BSDF_GLOSSY(type)) {
- return true;
- }
- }
- if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) {
- if (CLOSURE_IS_BSDF_TRANSMISSION(type)) {
- return true;
- }
- }
- return false;
-}
-
-ccl_device_inline float _shader_bsdf_multi_eval(KernelGlobals kg,
- ccl_private ShaderData *sd,
- const float3 omega_in,
- const bool is_transmission,
- ccl_private const ShaderClosure *skip_sc,
- ccl_private BsdfEval *result_eval,
- float sum_pdf,
- float sum_sample_weight,
- const uint light_shader_flags)
-{
- /* This is the veach one-sample model with balance heuristic,
- * some PDF factors drop out when using balance heuristic weighting. */
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (sc == skip_sc) {
- continue;
- }
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- if (CLOSURE_IS_BSDF(sc->type) && !_shader_bsdf_exclude(sc->type, light_shader_flags)) {
- float bsdf_pdf = 0.0f;
- float3 eval = bsdf_eval(kg, sd, sc, omega_in, is_transmission, &bsdf_pdf);
-
- if (bsdf_pdf != 0.0f) {
- bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
- sum_pdf += bsdf_pdf * sc->sample_weight;
- }
- }
-
- sum_sample_weight += sc->sample_weight;
- }
- }
-
- return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
-}
-
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_inline
-#endif
- float
- shader_bsdf_eval(KernelGlobals kg,
- ccl_private ShaderData *sd,
- const float3 omega_in,
- const bool is_transmission,
- ccl_private BsdfEval *bsdf_eval,
- const uint light_shader_flags)
-{
- bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_float3());
-
- return _shader_bsdf_multi_eval(
- kg, sd, omega_in, is_transmission, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
-}
-
-/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
-ccl_device_inline ccl_private const ShaderClosure *shader_bsdf_bssrdf_pick(
- ccl_private const ShaderData *ccl_restrict sd, ccl_private float *randu)
-{
- int sampled = 0;
-
- if (sd->num_closure > 1) {
- /* Pick a BSDF or based on sample weights. */
- float sum = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
-
- float r = (*randu) * sum;
- float partial_sum = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- float next_sum = partial_sum + sc->sample_weight;
-
- if (r < next_sum) {
- sampled = i;
-
- /* Rescale to reuse for direction sample, to better preserve stratification. */
- *randu = (r - partial_sum) / sc->sample_weight;
- break;
- }
-
- partial_sum = next_sum;
- }
- }
- }
-
- return &sd->closure[sampled];
-}
-
-/* Return weight for picked BSSRDF. */
-ccl_device_inline float3
-shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
- ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
-{
- float3 weight = bssrdf_sc->weight;
-
- if (sd->num_closure > 1) {
- float sum = 0.0f;
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
- sum += sc->sample_weight;
- }
- }
- weight *= sum / bssrdf_sc->sample_weight;
- }
-
- return weight;
-}
-
-/* Sample direction for picked BSDF, and return evaluation and pdf for all
- * BSDFs combined using MIS. */
-ccl_device int shader_bsdf_sample_closure(KernelGlobals kg,
- ccl_private ShaderData *sd,
- ccl_private const ShaderClosure *sc,
- float randu,
- float randv,
- ccl_private BsdfEval *bsdf_eval,
- ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
- ccl_private float *pdf)
-{
- /* BSSRDF should already have been handled elsewhere. */
- kernel_assert(CLOSURE_IS_BSDF(sc->type));
-
- int label;
- float3 eval = zero_float3();
-
- *pdf = 0.0f;
- label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
-
- if (*pdf != 0.0f) {
- bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
-
- if (sd->num_closure > 1) {
- const bool is_transmission = shader_bsdf_is_transmission(sd, *omega_in);
- float sweight = sc->sample_weight;
- *pdf = _shader_bsdf_multi_eval(
- kg, sd, *omega_in, is_transmission, sc, bsdf_eval, *pdf * sweight, sweight, 0);
- }
- }
-
- return label;
-}
-
-ccl_device float shader_bsdf_average_roughness(ccl_private const ShaderData *sd)
-{
- float roughness = 0.0f;
- float sum_weight = 0.0f;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF(sc->type)) {
- /* sqrt once to undo the squaring from multiplying roughness on the
- * two axes, and once for the squared roughness convention. */
- float weight = fabsf(average(sc->weight));
- roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
- sum_weight += weight;
- }
- }
-
- return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
-}
-
-ccl_device float3 shader_bsdf_transparency(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- if (sd->flag & SD_HAS_ONLY_VOLUME) {
- return one_float3();
- }
- else if (sd->flag & SD_TRANSPARENT) {
- return sd->closure_transparent_extinction;
- }
- else {
- return zero_float3();
- }
-}
-
-ccl_device void shader_bsdf_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd)
-{
- if (sd->flag & SD_TRANSPARENT) {
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
-
- if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
- sc->sample_weight = 0.0f;
- sc->weight = zero_float3();
- }
- }
-
- sd->flag &= ~SD_TRANSPARENT;
- }
-}
-
-ccl_device float3 shader_bsdf_alpha(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 alpha = one_float3() - shader_bsdf_transparency(kg, sd);
-
- alpha = max(alpha, zero_float3());
- alpha = min(alpha, one_float3());
-
- return alpha;
-}
-
-ccl_device float3 shader_bsdf_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 eval = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
-ccl_device float3 shader_bsdf_glossy(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 eval = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
-ccl_device float3 shader_bsdf_transmission(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 eval = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
- eval += sc->weight;
- }
-
- return eval;
-}
-
-ccl_device float3 shader_bsdf_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
-{
- float3 N = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
- N += sc->N * fabsf(average(sc->weight));
- }
-
- return (is_zero(N)) ? sd->N : normalize(N);
-}
-
-ccl_device float3 shader_bsdf_ao(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- const float ao_factor,
- ccl_private float3 *N_)
-{
- float3 eval = zero_float3();
- float3 N = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
- ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
- eval += sc->weight * ao_factor;
- N += bsdf->N * fabsf(average(sc->weight));
- }
- }
-
- *N_ = (is_zero(N)) ? sd->N : normalize(N);
- return eval;
-}
-
-#ifdef __SUBSURFACE__
-ccl_device float3 shader_bssrdf_normal(ccl_private const ShaderData *sd)
-{
- float3 N = zero_float3();
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
-
- if (CLOSURE_IS_BSSRDF(sc->type)) {
- ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
- float avg_weight = fabsf(average(sc->weight));
-
- N += bssrdf->N * avg_weight;
- }
- }
-
- return (is_zero(N)) ? sd->N : normalize(N);
-}
-#endif /* __SUBSURFACE__ */
-
-/* Constant emission optimization */
-
-ccl_device bool shader_constant_emission_eval(KernelGlobals kg,
- int shader,
- ccl_private float3 *eval)
-{
- int shader_index = shader & SHADER_MASK;
- int shader_flag = kernel_data_fetch(shaders, shader_index).flags;
-
- if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
- *eval = make_float3(kernel_data_fetch(shaders, shader_index).constant_emission[0],
- kernel_data_fetch(shaders, shader_index).constant_emission[1],
- kernel_data_fetch(shaders, shader_index).constant_emission[2]);
-
- return true;
- }
-
- return false;
-}
-
-/* Background */
-
-ccl_device float3 shader_background_eval(ccl_private const ShaderData *sd)
-{
- if (sd->flag & SD_EMISSION) {
- return sd->closure_emission_background;
- }
- else {
- return zero_float3();
- }
-}
-
-/* Emission */
-
-ccl_device float3 shader_emissive_eval(ccl_private const ShaderData *sd)
-{
- if (sd->flag & SD_EMISSION) {
- return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
- }
- else {
- return zero_float3();
- }
-}
-
-/* Holdout */
-
-ccl_device float3 shader_holdout_apply(KernelGlobals kg, ccl_private ShaderData *sd)
-{
- float3 weight = zero_float3();
-
- /* For objects marked as holdout, preserve transparency and remove all other
- * closures, replacing them with a holdout weight. */
- if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) {
- weight = one_float3() - sd->closure_transparent_extinction;
-
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private ShaderClosure *sc = &sd->closure[i];
- if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
- sc->type = NBUILTIN_CLOSURES;
- }
- }
-
- sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF));
- }
- else {
- weight = one_float3();
- }
- }
- else {
- for (int i = 0; i < sd->num_closure; i++) {
- ccl_private const ShaderClosure *sc = &sd->closure[i];
- if (CLOSURE_IS_HOLDOUT(sc->type)) {
- weight += sc->weight;
- }
- }
- }
-
- return weight;
-}
-
-/* Surface Evaluation */
-
-template<uint node_feature_mask, typename ConstIntegratorGenericState>
-ccl_device void shader_eval_surface(KernelGlobals kg,
- ConstIntegratorGenericState state,
- ccl_private ShaderData *ccl_restrict sd,
- ccl_global float *ccl_restrict buffer,
- uint32_t path_flag,
- bool use_caustics_storage = false)
-{
- /* If path is being terminated, we are tracing a shadow ray or evaluating
- * emission, then we don't need to store closures. The emission and shadow
- * shader data also do not have a closure array to save GPU memory. */
- int max_closures;
- if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
- max_closures = 0;
- }
- else {
- max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures;
- }
-
- sd->num_closure = 0;
- sd->num_closure_left = max_closures;
-
-#ifdef __OSL__
- if (kg->osl) {
- if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
- OSLShader::eval_background(kg, state, sd, path_flag);
- }
- else {
- OSLShader::eval_surface(kg, state, sd, path_flag);
- }
- }
- else
-#endif
- {
-#ifdef __SVM__
- svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag);
-#else
- if (sd->object == OBJECT_NONE) {
- sd->closure_emission_background = make_float3(0.8f, 0.8f, 0.8f);
- sd->flag |= SD_EMISSION;
- }
- else {
- ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
- sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f));
- if (bsdf != NULL) {
- bsdf->N = sd->N;
- sd->flag |= bsdf_diffuse_setup(bsdf);
- }
- }
-#endif
- }
-}
-
-/* Volume */
-
-#ifdef __VOLUME__
-
-ccl_device_inline float _shader_volume_phase_multi_eval(
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumePhases *phases,
- const float3 omega_in,
- int skip_phase,
- ccl_private BsdfEval *result_eval,
- float sum_pdf,
- float sum_sample_weight)
-{
- for (int i = 0; i < phases->num_closure; i++) {
- if (i == skip_phase)
- continue;
-
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
- float phase_pdf = 0.0f;
- float3 eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
-
- if (phase_pdf != 0.0f) {
- bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
- sum_pdf += phase_pdf * svc->sample_weight;
- }
-
- sum_sample_weight += svc->sample_weight;
- }
-
- return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
-}
-
-ccl_device float shader_volume_phase_eval(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumePhases *phases,
- const float3 omega_in,
- ccl_private BsdfEval *phase_eval)
-{
- bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_float3());
-
- return _shader_volume_phase_multi_eval(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
-}
-
-ccl_device int shader_volume_phase_sample(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumePhases *phases,
- float randu,
- float randv,
- ccl_private BsdfEval *phase_eval,
- ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
- ccl_private float *pdf)
-{
- int sampled = 0;
-
- if (phases->num_closure > 1) {
- /* pick a phase closure based on sample weights */
- float sum = 0.0f;
-
- for (sampled = 0; sampled < phases->num_closure; sampled++) {
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
- sum += svc->sample_weight;
- }
-
- float r = randu * sum;
- float partial_sum = 0.0f;
-
- for (sampled = 0; sampled < phases->num_closure; sampled++) {
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
- float next_sum = partial_sum + svc->sample_weight;
-
- if (r <= next_sum) {
- /* Rescale to reuse for BSDF direction sample. */
- randu = (r - partial_sum) / svc->sample_weight;
- break;
- }
-
- partial_sum = next_sum;
- }
-
- if (sampled == phases->num_closure) {
- *pdf = 0.0f;
- return LABEL_NONE;
- }
- }
-
- /* todo: this isn't quite correct, we don't weight anisotropy properly
- * depending on color channels, even if this is perhaps not a common case */
- ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
- int label;
- float3 eval = zero_float3();
-
- *pdf = 0.0f;
- label = volume_phase_sample(sd, svc, randu, randv, &eval, omega_in, domega_in, pdf);
-
- if (*pdf != 0.0f) {
- bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
- }
-
- return label;
-}
-
-ccl_device int shader_phase_sample_closure(KernelGlobals kg,
- ccl_private const ShaderData *sd,
- ccl_private const ShaderVolumeClosure *sc,
- float randu,
- float randv,
- ccl_private BsdfEval *phase_eval,
- ccl_private float3 *omega_in,
- ccl_private differential3 *domega_in,
- ccl_private float *pdf)
-{
- int label;
- float3 eval = zero_float3();
-
- *pdf = 0.0f;
- label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
-
- if (*pdf != 0.0f)
- bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
-
- return label;
-}
-
-/* Volume Evaluation */
-
-template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
-ccl_device_inline void shader_eval_volume(KernelGlobals kg,
- ConstIntegratorGenericState state,
- ccl_private ShaderData *ccl_restrict sd,
- const uint32_t path_flag,
- StackReadOp stack_read)
-{
- /* If path is being terminated, we are tracing a shadow ray or evaluating
- * emission, then we don't need to store closures. The emission and shadow
- * shader data also do not have a closure array to save GPU memory. */
- int max_closures;
- if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
- max_closures = 0;
- }
- else {
- max_closures = kernel_data.max_closures;
- }
-
- /* reset closures once at the start, we will be accumulating the closures
- * for all volumes in the stack into a single array of closures */
- sd->num_closure = 0;
- sd->num_closure_left = max_closures;
- sd->flag = 0;
- sd->object_flag = 0;
-
- for (int i = 0;; i++) {
- const VolumeStack entry = stack_read(i);
- if (entry.shader == SHADER_NONE) {
- break;
- }
-
- /* Setup shader-data from stack. it's mostly setup already in
- * shader_setup_from_volume, this switching should be quick. */
- sd->object = entry.object;
- sd->lamp = LAMP_NONE;
- sd->shader = entry.shader;
-
- sd->flag &= ~SD_SHADER_FLAGS;
- sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags;
- sd->object_flag &= ~SD_OBJECT_FLAGS;
-
- if (sd->object != OBJECT_NONE) {
- sd->object_flag |= kernel_data_fetch(object_flag, sd->object);
-
-# ifdef __OBJECT_MOTION__
- /* todo: this is inefficient for motion blur, we should be
- * caching matrices instead of recomputing them each step */
- shader_setup_object_transforms(kg, sd, sd->time);
-
- if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) != 0) {
- AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY);
- kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND);
-
- const float3 P = sd->P;
- const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale;
- const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ?
- 0.5f :
- 0.0f;
- const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ?
- (1.0f - kernel_data.cam.shuttertime) + sd->time :
- sd->time;
-
- /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity
- * existed, or will exist, at the given time:
- *
- * `phi(x, T) = phi(x - (T - t) * u(x, T), t)`
- *
- * where
- *
- * x : position
- * T : super-sampled time (or ray time)
- * t : current time of the simulation (in rendering we assume this is center frame with
- * relative time = 0)
- * phi : the volume quantity
- * u : the velocity field
- *
- * But first we need to determine the velocity field `u(x, T)`, which we can estimate also
- * using semi-lagrangian advection.
- *
- * `u(x, T) = u(x - (T - t) * u(x, T), t)`
- *
- * This is the typical way to model self-advection in fluid dynamics, however, we do not
- * account for other forces affecting the velocity during simulation (pressure, buoyancy,
- * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better
- * results, a higher order interpolation scheme can be used (at the cost of more lookups),
- * or an interpolation of the velocity fields for the previous and next frames could also
- * be used to estimate `u(x, T)` (which will cost more memory and lookups).
- *
- * References:
- * "Eulerian Motion Blur", Kim and Ko, 2007
- * "Production Volume Rendering", Wreninge et al., 2012
- */
-
- /* Find velocity. */
- float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
- object_dir_transform(kg, sd, &velocity);
-
- /* Find advected P. */
- sd->P = P - (time - time_offset) * velocity_scale * velocity;
-
- /* Find advected velocity. */
- velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
- object_dir_transform(kg, sd, &velocity);
-
- /* Find advected P. */
- sd->P = P - (time - time_offset) * velocity_scale * velocity;
- }
-# endif
- }
-
- /* evaluate shader */
-# ifdef __SVM__
-# ifdef __OSL__
- if (kg->osl) {
- OSLShader::eval_volume(kg, state, sd, path_flag);
- }
- else
-# endif
- {
- svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
- kg, state, sd, NULL, path_flag);
- }
-# endif
-
- /* Merge closures to avoid exceeding number of closures limit. */
- if (!shadow) {
- if (i > 0) {
- shader_merge_volume_closures(sd);
- }
- }
- }
-}
-
-#endif /* __VOLUME__ */
-
-/* Displacement Evaluation */
-
-template<typename ConstIntegratorGenericState>
-ccl_device void shader_eval_displacement(KernelGlobals kg,
- ConstIntegratorGenericState state,
- ccl_private ShaderData *sd)
-{
- sd->num_closure = 0;
- sd->num_closure_left = 0;
-
- /* this will modify sd->P */
-#ifdef __SVM__
-# ifdef __OSL__
- if (kg->osl)
- OSLShader::eval_displacement(kg, state, sd);
- else
-# endif
- {
- svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_DISPLACEMENT, SHADER_TYPE_DISPLACEMENT>(
- kg, state, sd, NULL, 0);
- }
-#endif
-}
-
-/* Cryptomatte */
-
-ccl_device float shader_cryptomatte_id(KernelGlobals kg, int shader)
-{
- return kernel_data_fetch(shaders, (shader & SHADER_MASK)).cryptomatte_id;
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shadow_catcher.h b/intern/cycles/kernel/integrator/shadow_catcher.h
index ff63625aceb..a620853faea 100644
--- a/intern/cycles/kernel/integrator/shadow_catcher.h
+++ b/intern/cycles/kernel/integrator/shadow_catcher.h
@@ -3,7 +3,6 @@
#pragma once
-#include "kernel/film/write_passes.h"
#include "kernel/integrator/path_state.h"
#include "kernel/integrator/state_util.h"
@@ -76,28 +75,6 @@ ccl_device_forceinline bool kernel_shadow_catcher_is_object_pass(const uint32_t
return path_flag & PATH_RAY_SHADOW_CATCHER_PASS;
}
-/* Write shadow catcher passes on a bounce from the shadow catcher object. */
-ccl_device_forceinline void kernel_write_shadow_catcher_bounce_data(
- KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
-{
- kernel_assert(kernel_data.film.pass_shadow_catcher_sample_count != PASS_UNUSED);
- kernel_assert(kernel_data.film.pass_shadow_catcher_matte != PASS_UNUSED);
-
- const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
- const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
- kernel_data.film.pass_stride;
- ccl_global float *buffer = render_buffer + render_buffer_offset;
-
- /* Count sample for the shadow catcher object. */
- kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_sample_count, 1.0f);
-
- /* Since the split is done, the sample does not contribute to the matte, so accumulate it as
- * transparency to the matte. */
- const float3 throughput = INTEGRATOR_STATE(state, path, throughput);
- kernel_write_pass_float(buffer + kernel_data.film.pass_shadow_catcher_matte + 3,
- average(throughput));
-}
-
#endif /* __SHADOW_CATCHER__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/shadow_state_template.h b/intern/cycles/kernel/integrator/shadow_state_template.h
index c340467606d..d731d1df339 100644
--- a/intern/cycles/kernel/integrator/shadow_state_template.h
+++ b/intern/cycles/kernel/integrator/shadow_state_template.h
@@ -27,19 +27,29 @@ KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_T
/* enum PathRayFlag */
KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
/* Throughput. */
-KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING)
/* Throughput for shadow pass. */
KERNEL_STRUCT_MEMBER(shadow_path,
- packed_float3,
+ PackedSpectrum,
unshadowed_throughput,
KERNEL_FEATURE_SHADOW_PASS | KERNEL_FEATURE_AO_ADDITIVE)
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
-KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
-KERNEL_STRUCT_MEMBER(shadow_path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
/* Number of intersections found by ray-tracing. */
KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, num_hits, KERNEL_FEATURE_PATH_TRACING)
/* Light group. */
KERNEL_STRUCT_MEMBER(shadow_path, uint8_t, lightgroup, KERNEL_FEATURE_PATH_TRACING)
+/* Path guiding. */
+KERNEL_STRUCT_MEMBER(shadow_path, PackedSpectrum, unlit_throughput, KERNEL_FEATURE_PATH_GUIDING)
+#ifdef __PATH_GUIDING__
+KERNEL_STRUCT_MEMBER(shadow_path,
+ openpgl::cpp::PathSegment *,
+ path_segment,
+ KERNEL_FEATURE_PATH_GUIDING)
+#else
+KERNEL_STRUCT_MEMBER(shadow_path, uint64_t, path_segment, KERNEL_FEATURE_PATH_GUIDING)
+#endif
KERNEL_STRUCT_END(shadow_path)
/********************************** Shadow Ray *******************************/
diff --git a/intern/cycles/kernel/integrator/state.h b/intern/cycles/kernel/integrator/state.h
index d10d31e930e..f0fdc6f0d54 100644
--- a/intern/cycles/kernel/integrator/state.h
+++ b/intern/cycles/kernel/integrator/state.h
@@ -31,6 +31,10 @@
#include "util/types.h"
+#ifdef __PATH_GUIDING__
+# include "util/guiding.h"
+#endif
+
#pragma once
CCL_NAMESPACE_BEGIN
@@ -140,7 +144,7 @@ typedef struct IntegratorStateGPU {
* happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
* from a kernel which operates on a shadow catcher state will cause bad memory access. */
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
/* Scalar access on CPU. */
@@ -159,7 +163,7 @@ typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
((state)->nested_struct[array_index].member)
-#else /* __KERNEL_CPU__ */
+#else /* !__KERNEL_GPU__ */
/* Array access on GPU with Structure-of-Arrays. */
@@ -180,6 +184,6 @@ typedef int ConstIntegratorShadowState;
# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/state_flow.h b/intern/cycles/kernel/integrator/state_flow.h
index 4b03c665e17..40961b1c5fb 100644
--- a/intern/cycles/kernel/integrator/state_flow.h
+++ b/intern/cycles/kernel/integrator/state_flow.h
@@ -76,6 +76,9 @@ ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
&kernel_integrator_state.next_shadow_path_index[0], 1);
atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+# ifdef __PATH_GUIDING__
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
+# endif
return shadow_state;
}
@@ -181,6 +184,9 @@ ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(
{
IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
+# ifdef __PATH_GUIDING__
+ INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
+# endif
return shadow_state;
}
diff --git a/intern/cycles/kernel/integrator/state_template.h b/intern/cycles/kernel/integrator/state_template.h
index 5c2af131945..610621f0abe 100644
--- a/intern/cycles/kernel/integrator/state_template.h
+++ b/intern/cycles/kernel/integrator/state_template.h
@@ -46,12 +46,15 @@ KERNEL_STRUCT_MEMBER(path, float, min_ray_pdf, KERNEL_FEATURE_PATH_TRACING)
/* Continuation probability for path termination. */
KERNEL_STRUCT_MEMBER(path, float, continuation_probability, KERNEL_FEATURE_PATH_TRACING)
/* Throughput. */
-KERNEL_STRUCT_MEMBER(path, packed_float3, throughput, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, throughput, KERNEL_FEATURE_PATH_TRACING)
+/* Factor to multiple with throughput to get remove any guiding PDFS.
+ * Such throughput without guiding PDFS is used for Russian roulette termination. */
+KERNEL_STRUCT_MEMBER(path, float, unguided_throughput, KERNEL_FEATURE_PATH_GUIDING)
/* Ratio of throughput to distinguish diffuse / glossy / transmission render passes. */
-KERNEL_STRUCT_MEMBER(path, packed_float3, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
-KERNEL_STRUCT_MEMBER(path, packed_float3, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_diffuse_weight, KERNEL_FEATURE_LIGHT_PASSES)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, pass_glossy_weight, KERNEL_FEATURE_LIGHT_PASSES)
/* Denoising. */
-KERNEL_STRUCT_MEMBER(path, packed_float3, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
+KERNEL_STRUCT_MEMBER(path, PackedSpectrum, denoising_feature_throughput, KERNEL_FEATURE_DENOISING)
/* Shader sorting. */
/* TODO: compress as uint16? or leave out entirely and recompute key in sorting code? */
KERNEL_STRUCT_MEMBER(path, uint32_t, shader_sort_key, KERNEL_FEATURE_PATH_TRACING)
@@ -84,8 +87,8 @@ KERNEL_STRUCT_END(isect)
/*************** Subsurface closure state for subsurface kernel ***************/
KERNEL_STRUCT_BEGIN(subsurface)
-KERNEL_STRUCT_MEMBER(subsurface, packed_float3, albedo, KERNEL_FEATURE_SUBSURFACE)
-KERNEL_STRUCT_MEMBER(subsurface, packed_float3, radius, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, albedo, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, PackedSpectrum, radius, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_MEMBER(subsurface, packed_float3, Ng, KERNEL_FEATURE_SUBSURFACE)
KERNEL_STRUCT_END(subsurface)
@@ -98,3 +101,33 @@ KERNEL_STRUCT_ARRAY_MEMBER(volume_stack, int, shader, KERNEL_FEATURE_VOLUME)
KERNEL_STRUCT_END_ARRAY(volume_stack,
KERNEL_STRUCT_VOLUME_STACK_SIZE,
KERNEL_STRUCT_VOLUME_STACK_SIZE)
+
+/************************************ Path Guiding *****************************/
+KERNEL_STRUCT_BEGIN(guiding)
+#ifdef __PATH_GUIDING__
+/* Current path segment of the random walk/path. */
+KERNEL_STRUCT_MEMBER(guiding,
+ openpgl::cpp::PathSegment *,
+ path_segment,
+ KERNEL_FEATURE_PATH_GUIDING)
+#else
+/* Current path segment of the random walk/path. */
+KERNEL_STRUCT_MEMBER(guiding, uint64_t, path_segment, KERNEL_FEATURE_PATH_GUIDING)
+#endif
+/* If surface guiding is enabled */
+KERNEL_STRUCT_MEMBER(guiding, bool, use_surface_guiding, KERNEL_FEATURE_PATH_GUIDING)
+/* Random number used for additional guiding decisions (e.g., cache query, selection to use guiding
+ * or BSDF sampling) */
+KERNEL_STRUCT_MEMBER(guiding, float, sample_surface_guiding_rand, KERNEL_FEATURE_PATH_GUIDING)
+/* The probability to use surface guiding (i.e., diffuse sampling prob * guiding prob)*/
+KERNEL_STRUCT_MEMBER(guiding, float, surface_guiding_sampling_prob, KERNEL_FEATURE_PATH_GUIDING)
+/* Probability of sampling a BSSRDF closure instead of a BSDF closure*/
+KERNEL_STRUCT_MEMBER(guiding, float, bssrdf_sampling_prob, KERNEL_FEATURE_PATH_GUIDING)
+/* If volume guiding is enabled */
+KERNEL_STRUCT_MEMBER(guiding, bool, use_volume_guiding, KERNEL_FEATURE_PATH_GUIDING)
+/* Random number used for additional guiding decisions (e.g., cache query, selection to use guiding
+ * or BSDF sampling) */
+KERNEL_STRUCT_MEMBER(guiding, float, sample_volume_guiding_rand, KERNEL_FEATURE_PATH_GUIDING)
+/* The probability to use surface guiding (i.e., diffuse sampling prob * guiding prob). */
+KERNEL_STRUCT_MEMBER(guiding, float, volume_guiding_sampling_prob, KERNEL_FEATURE_PATH_GUIDING)
+KERNEL_STRUCT_END(guiding)
diff --git a/intern/cycles/kernel/integrator/state_util.h b/intern/cycles/kernel/integrator/state_util.h
index 8dd58ad6bcd..168122d3a78 100644
--- a/intern/cycles/kernel/integrator/state_util.h
+++ b/intern/cycles/kernel/integrator/state_util.h
@@ -338,7 +338,7 @@ ccl_device_inline IntegratorState integrator_state_shadow_catcher_split(KernelGl
return to_state;
}
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
ccl_device_inline int integrator_state_bounce(ConstIntegratorState state, const int)
{
return INTEGRATOR_STATE(state, path, bounce);
diff --git a/intern/cycles/kernel/integrator/subsurface.h b/intern/cycles/kernel/integrator/subsurface.h
index 2f96f215d8a..efd293e4141 100644
--- a/intern/cycles/kernel/integrator/subsurface.h
+++ b/intern/cycles/kernel/integrator/subsurface.h
@@ -15,9 +15,9 @@
#include "kernel/integrator/intersect_volume_stack.h"
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
#include "kernel/integrator/subsurface_disk.h"
#include "kernel/integrator/subsurface_random_walk.h"
+#include "kernel/integrator/surface_shader.h"
CCL_NAMESPACE_BEGIN
@@ -51,12 +51,10 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
PATH_RAY_SUBSURFACE_RANDOM_WALK);
/* Compute weight, optionally including Fresnel from entry point. */
- float3 weight = shader_bssrdf_sample_weight(sd, sc);
-# ifdef __PRINCIPLED__
+ Spectrum weight = surface_shader_bssrdf_sample_weight(sd, sc);
if (bssrdf->roughness != FLT_MAX) {
path_flag |= PATH_RAY_SUBSURFACE_USE_FRESNEL;
}
-# endif
if (sd->flag & SD_BACKFACING) {
path_flag |= PATH_RAY_SUBSURFACE_BACKFACING;
@@ -70,8 +68,8 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
if (kernel_data.kernel_features & KERNEL_FEATURE_LIGHT_PASSES) {
if (INTEGRATOR_STATE(state, path, bounce) == 0) {
- INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_float3();
- INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_float3();
+ INTEGRATOR_STATE_WRITE(state, path, pass_diffuse_weight) = one_spectrum();
+ INTEGRATOR_STATE_WRITE(state, path, pass_glossy_weight) = zero_spectrum();
}
}
@@ -80,6 +78,9 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
INTEGRATOR_STATE_WRITE(state, subsurface, radius) = bssrdf->radius;
INTEGRATOR_STATE_WRITE(state, subsurface, anisotropy) = bssrdf->anisotropy;
+ /* Path guiding. */
+ guiding_record_bssrdf_weight(kg, state, weight, bssrdf->albedo);
+
return LABEL_SUBSURFACE_SCATTER;
}
@@ -91,7 +92,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
/* Get bump mapped normal from shader evaluation at exit point. */
float3 N = sd->N;
if (sd->flag & SD_HAS_BSSRDF_BUMP) {
- N = shader_bssrdf_normal(sd);
+ N = surface_shader_bssrdf_normal(sd);
}
/* Setup diffuse BSDF at the exit point. This replaces shader_eval_surface. */
@@ -99,9 +100,8 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
sd->num_closure = 0;
sd->num_closure_left = kernel_data.max_closures;
- const float3 weight = one_float3();
+ const Spectrum weight = one_spectrum();
-# ifdef __PRINCIPLED__
if (path_flag & PATH_RAY_SUBSURFACE_USE_FRESNEL) {
ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
sd, sizeof(PrincipledDiffuseBsdf), weight);
@@ -112,9 +112,7 @@ ccl_device void subsurface_shader_data_setup(KernelGlobals kg,
sd->flag |= bsdf_principled_diffuse_setup(bsdf, PRINCIPLED_DIFFUSE_LAMBERT_EXIT);
}
}
- else
-# endif /* __PRINCIPLED__ */
- {
+ else {
ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
sd, sizeof(DiffuseBsdf), weight);
diff --git a/intern/cycles/kernel/integrator/subsurface_disk.h b/intern/cycles/kernel/integrator/subsurface_disk.h
index 2836934f6dd..16fb45392f4 100644
--- a/intern/cycles/kernel/integrator/subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/subsurface_disk.h
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
+#include "kernel/integrator/guiding.h"
+
CCL_NAMESPACE_BEGIN
/* BSSRDF using disk based importance sampling.
@@ -9,11 +11,11 @@ CCL_NAMESPACE_BEGIN
* http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
*/
-ccl_device_inline float3 subsurface_disk_eval(const float3 radius, float disk_r, float r)
+ccl_device_inline Spectrum subsurface_disk_eval(const Spectrum radius, float disk_r, float r)
{
- const float3 eval = bssrdf_eval(radius, r);
+ const Spectrum eval = bssrdf_eval(radius, r);
const float pdf = bssrdf_pdf(radius, disk_r);
- return (pdf > 0.0f) ? eval / pdf : zero_float3();
+ return (pdf > 0.0f) ? eval / pdf : zero_spectrum();
}
/* Subsurface scattering step, from a point on the surface to other
@@ -25,8 +27,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
ccl_private LocalIntersection &ss_isect)
{
- float disk_u, disk_v;
- path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &disk_u, &disk_v);
+ float2 rand_disk = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_DISK);
/* Read shading point info from integrator state. */
const float3 P = INTEGRATOR_STATE(state, ray, P);
@@ -37,7 +38,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
/* Read subsurface scattering parameters. */
- const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+ const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius);
/* Pick random axis in local frame and point on disk. */
float3 disk_N, disk_T, disk_B;
@@ -46,20 +47,20 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
disk_N = Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
- if (disk_v < 0.5f) {
+ if (rand_disk.y < 0.5f) {
pick_pdf_N = 0.5f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.25f;
- disk_v *= 2.0f;
+ rand_disk.y *= 2.0f;
}
- else if (disk_v < 0.75f) {
+ else if (rand_disk.y < 0.75f) {
float3 tmp = disk_N;
disk_N = disk_T;
disk_T = tmp;
pick_pdf_N = 0.25f;
pick_pdf_T = 0.5f;
pick_pdf_B = 0.25f;
- disk_v = (disk_v - 0.5f) * 4.0f;
+ rand_disk.y = (rand_disk.y - 0.5f) * 4.0f;
}
else {
float3 tmp = disk_N;
@@ -68,14 +69,14 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
pick_pdf_N = 0.25f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.5f;
- disk_v = (disk_v - 0.75f) * 4.0f;
+ rand_disk.y = (rand_disk.y - 0.75f) * 4.0f;
}
/* Sample point on disk. */
- float phi = M_2PI_F * disk_v;
+ float phi = M_2PI_F * rand_disk.y;
float disk_height, disk_r;
- bssrdf_sample(radius, disk_u, &disk_r, &disk_height);
+ bssrdf_sample(radius, rand_disk.x, &disk_r, &disk_height);
float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
@@ -108,7 +109,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
* traversal algorithm. */
sort_intersections_and_normals(ss_isect.hits, ss_isect.Ng, num_eval_hits);
- float3 weights[BSSRDF_MAX_HITS]; /* TODO: zero? */
+ Spectrum weights[BSSRDF_MAX_HITS]; /* TODO: zero? */
float sum_weights = 0.0f;
for (int hit = 0; hit < num_eval_hits; hit++) {
@@ -126,17 +127,8 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
/* Transform normal to world space. */
Transform itfm;
- Transform tfm = object_fetch_transform_motion_test(kg, object, time, &itfm);
+ object_fetch_transform_motion_test(kg, object, time, &itfm);
hit_Ng = normalize(transform_direction_transposed(&itfm, hit_Ng));
-
- /* Transform t to world space, except for OptiX and MetalRT where it already is. */
-#ifdef __KERNEL_GPU_RAYTRACING__
- (void)tfm;
-#else
- float3 D = transform_direction(&itfm, ray.D);
- D = normalize(D) * ss_isect.hits[hit].t;
- ss_isect.hits[hit].t = len(transform_direction(&tfm, D));
-#endif
}
/* Quickly retrieve P and Ng without setting up ShaderData. */
@@ -159,7 +151,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
const float r = len(hit_P - P);
/* Evaluate profiles. */
- const float3 weight = subsurface_disk_eval(radius, disk_r, r) * w;
+ const Spectrum weight = subsurface_disk_eval(radius, disk_r, r) * w;
/* Store result. */
ss_isect.Ng[hit] = hit_Ng;
@@ -172,18 +164,19 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
}
/* Use importance resampling, sampling one of the hits proportional to weight. */
- const float r = lcg_step_float(&lcg_state) * sum_weights;
+ const float rand_resample = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_DISK_RESAMPLE);
+ const float r = rand_resample * sum_weights;
float partial_sum = 0.0f;
for (int hit = 0; hit < num_eval_hits; hit++) {
- const float3 weight = weights[hit];
+ const Spectrum weight = weights[hit];
const float sample_weight = average(fabs(weight));
float next_sum = partial_sum + sample_weight;
if (r < next_sum) {
/* Return exit point. */
- INTEGRATOR_STATE_WRITE(state, path, throughput) *= weight * sum_weights / sample_weight;
-
+ const Spectrum resampled_weight = weight * sum_weights / sample_weight;
+ INTEGRATOR_STATE_WRITE(state, path, throughput) *= resampled_weight;
ss_isect.hits[0] = ss_isect.hits[hit];
ss_isect.Ng[0] = ss_isect.Ng[hit];
@@ -191,6 +184,9 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
ray.D = ss_isect.Ng[hit];
ray.tmin = 0.0f;
ray.tmax = 1.0f;
+
+ guiding_record_bssrdf_bounce(
+ kg, state, 1.0f, Ng, -Ng, resampled_weight, INTEGRATOR_STATE(state, subsurface, albedo));
return true;
}
diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h
index c1691030817..fdcb66c32f5 100644
--- a/intern/cycles/kernel/integrator/subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h
@@ -5,6 +5,8 @@
#include "kernel/bvh/bvh.h"
+#include "kernel/integrator/guiding.h"
+
CCL_NAMESPACE_BEGIN
/* Random walk subsurface scattering.
@@ -65,19 +67,20 @@ ccl_device void subsurface_random_walk_remap(const float albedo,
*sigma_t = sigma_t_prime / (1.0f - g);
}
-ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
- const float3 radius,
+ccl_device void subsurface_random_walk_coefficients(const Spectrum albedo,
+ const Spectrum radius,
const float anisotropy,
- ccl_private float3 *sigma_t,
- ccl_private float3 *alpha,
- ccl_private float3 *throughput)
+ ccl_private Spectrum *sigma_t,
+ ccl_private Spectrum *alpha,
+ ccl_private Spectrum *throughput)
{
- float sigma_t_x, sigma_t_y, sigma_t_z;
- float alpha_x, alpha_y, alpha_z;
-
- subsurface_random_walk_remap(albedo.x, radius.x, anisotropy, &sigma_t_x, &alpha_x);
- subsurface_random_walk_remap(albedo.y, radius.y, anisotropy, &sigma_t_y, &alpha_y);
- subsurface_random_walk_remap(albedo.z, radius.z, anisotropy, &sigma_t_z, &alpha_z);
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ subsurface_random_walk_remap(GET_SPECTRUM_CHANNEL(albedo, i),
+ GET_SPECTRUM_CHANNEL(radius, i),
+ anisotropy,
+ &GET_SPECTRUM_CHANNEL(*sigma_t, i),
+ &GET_SPECTRUM_CHANNEL(*alpha, i));
+ }
/* Throughput already contains closure weight at this point, which includes the
* albedo, as well as closure mixing and Fresnel weights. Divide out the albedo
@@ -88,21 +91,12 @@ ccl_device void subsurface_random_walk_coefficients(const float3 albedo,
* infinite phase functions. To avoid a sharp discontinuity as we go from
* such values to 0.0, increase alpha and reduce the throughput to compensate. */
const float min_alpha = 0.2f;
- if (alpha_x < min_alpha) {
- (*throughput).x *= alpha_x / min_alpha;
- alpha_x = min_alpha;
- }
- if (alpha_y < min_alpha) {
- (*throughput).y *= alpha_y / min_alpha;
- alpha_y = min_alpha;
- }
- if (alpha_z < min_alpha) {
- (*throughput).z *= alpha_z / min_alpha;
- alpha_z = min_alpha;
+ FOREACH_SPECTRUM_CHANNEL (i) {
+ if (GET_SPECTRUM_CHANNEL(*alpha, i) < min_alpha) {
+ GET_SPECTRUM_CHANNEL(*throughput, i) *= GET_SPECTRUM_CHANNEL(*alpha, i) / min_alpha;
+ GET_SPECTRUM_CHANNEL(*alpha, i) = min_alpha;
+ }
}
-
- *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
- *alpha = make_float3(alpha_x, alpha_y, alpha_z);
}
/* References for Dwivedi sampling:
@@ -151,12 +145,12 @@ ccl_device_forceinline float3 direction_from_cosine(float3 D, float cos_theta, f
return dir.x * T + dir.y * B + dir.z * D;
}
-ccl_device_forceinline float3 subsurface_random_walk_pdf(float3 sigma_t,
- float t,
- bool hit,
- ccl_private float3 *transmittance)
+ccl_device_forceinline Spectrum subsurface_random_walk_pdf(Spectrum sigma_t,
+ float t,
+ bool hit,
+ ccl_private Spectrum *transmittance)
{
- float3 T = volume_color_transmittance(sigma_t, t);
+ Spectrum T = volume_color_transmittance(sigma_t, t);
if (transmittance) {
*transmittance = T;
}
@@ -173,8 +167,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
ccl_private Ray &ray,
ccl_private LocalIntersection &ss_isect)
{
- float bssrdf_u, bssrdf_v;
- path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ const float2 rand_bsdf = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF);
const float3 P = INTEGRATOR_STATE(state, ray, P);
const float3 N = INTEGRATOR_STATE(state, ray, D);
@@ -187,7 +180,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* Sample diffuse surface scatter into the object. */
float3 D;
float pdf;
- sample_cos_hemisphere(-N, bssrdf_u, bssrdf_v, &D, &pdf);
+ sample_cos_hemisphere(-N, rand_bsdf.x, rand_bsdf.y, &D, &pdf);
if (dot(-Ng, D) <= 0.0f) {
return false;
}
@@ -205,22 +198,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
ray.self.light_object = OBJECT_NONE;
ray.self.light_prim = PRIM_NONE;
-#ifndef __KERNEL_GPU_RAYTRACING__
- /* Compute or fetch object transforms. */
- Transform ob_itfm ccl_optional_struct_init;
- Transform ob_tfm = object_fetch_transform_motion_test(kg, object, time, &ob_itfm);
-#endif
-
/* Convert subsurface to volume coefficients.
* The single-scattering albedo is named alpha to avoid confusion with the surface albedo. */
- const float3 albedo = INTEGRATOR_STATE(state, subsurface, albedo);
- const float3 radius = INTEGRATOR_STATE(state, subsurface, radius);
+ const Spectrum albedo = INTEGRATOR_STATE(state, subsurface, albedo);
+ const Spectrum radius = INTEGRATOR_STATE(state, subsurface, radius);
const float anisotropy = INTEGRATOR_STATE(state, subsurface, anisotropy);
- float3 sigma_t, alpha;
- float3 throughput = INTEGRATOR_STATE_WRITE(state, path, throughput);
+ Spectrum sigma_t, alpha;
+ Spectrum throughput = INTEGRATOR_STATE(state, path, throughput);
subsurface_random_walk_coefficients(albedo, radius, anisotropy, &sigma_t, &alpha, &throughput);
- float3 sigma_s = sigma_t * alpha;
+ Spectrum sigma_s = sigma_t * alpha;
/* Theoretically it should be better to use the exact alpha for the channel we're sampling at
* each bounce, but in practice there doesn't seem to be a noticeable difference in exchange
@@ -243,7 +230,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float phase_log = logf((diffusion_length + 1.0f) / (diffusion_length - 1.0f));
/* Modify state for RNGs, decorrelated from other paths. */
- rng_state.rng_hash = cmj_hash(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
+ rng_state.rng_hash = hash_hp_seeded_uint(rng_state.rng_hash + rng_state.rng_offset, 0xdeadbeef);
/* Random walk until we hit the surface again. */
bool hit = false;
@@ -255,10 +242,10 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
const float guided_fraction = 1.0f - fmaxf(0.5f, powf(fabsf(anisotropy), 0.125f));
#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
- float3 sigma_s_star = sigma_s * (1.0f - anisotropy);
- float3 sigma_t_star = sigma_t - sigma_s + sigma_s_star;
- float3 sigma_t_org = sigma_t;
- float3 sigma_s_org = sigma_s;
+ Spectrum sigma_s_star = sigma_s * (1.0f - anisotropy);
+ Spectrum sigma_t_star = sigma_t - sigma_s + sigma_s_star;
+ Spectrum sigma_t_org = sigma_t;
+ Spectrum sigma_s_org = sigma_s;
const float anisotropy_org = anisotropy;
const float guided_fraction_org = guided_fraction;
#endif
@@ -270,7 +257,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
#ifdef SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL
// shadow with local variables according to depth
float anisotropy, guided_fraction;
- float3 sigma_s, sigma_t;
+ Spectrum sigma_s, sigma_t;
if (bounce <= SUBSURFACE_RANDOM_WALK_SIMILARITY_LEVEL) {
anisotropy = anisotropy_org;
guided_fraction = guided_fraction_org;
@@ -286,11 +273,11 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
#endif
/* Sample color channel, use MIS with balance heuristic. */
- float rphase = path_state_rng_1D(kg, &rng_state, PRNG_PHASE_CHANNEL);
- float3 channel_pdf;
+ float rphase = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_PHASE_CHANNEL);
+ Spectrum channel_pdf;
int channel = volume_sample_channel(alpha, throughput, rphase, &channel_pdf);
float sample_sigma_t = volume_channel_get(sigma_t, channel);
- float randt = path_state_rng_1D(kg, &rng_state, PRNG_SCATTER_DISTANCE);
+ float randt = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_SCATTER_DISTANCE);
/* We need the result of the ray-cast to compute the full guided PDF, so just remember the
* relevant terms to avoid recomputing them later. */
@@ -303,7 +290,8 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* For the initial ray, we already know the direction, so just do classic distance sampling. */
if (bounce > 0) {
/* Decide whether we should use guided or classic sampling. */
- bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_LIGHT_TERMINATE) < guided_fraction);
+ bool guided = (path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_STRATEGY) <
+ guided_fraction);
/* Determine if we want to sample away from the incoming interface.
* This only happens if we found a nearby opposite interface, and the probability for it
@@ -317,27 +305,28 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
float x = clamp(dot(ray.P - P, -N), 0.0f, opposite_distance);
backward_fraction = 1.0f /
(1.0f + expf((opposite_distance - 2.0f * x) / diffusion_length));
- guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_TERMINATE) < backward_fraction;
+ guide_backward = path_state_rng_1D(kg, &rng_state, PRNG_SUBSURFACE_GUIDE_DIRECTION) <
+ backward_fraction;
}
/* Sample scattering direction. */
- float scatter_u, scatter_v;
- path_state_rng_2D(kg, &rng_state, PRNG_BSDF_U, &scatter_u, &scatter_v);
+ const float2 rand_scatter = path_state_rng_2D(kg, &rng_state, PRNG_SUBSURFACE_BSDF);
float cos_theta;
float hg_pdf;
if (guided) {
- cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, scatter_u);
+ cos_theta = sample_phase_dwivedi(diffusion_length, phase_log, rand_scatter.x);
/* The backwards guiding distribution is just mirrored along `sd->N`, so swapping the
* sign here is enough to sample from that instead. */
if (guide_backward) {
cos_theta = -cos_theta;
}
- float3 newD = direction_from_cosine(N, cos_theta, scatter_v);
+ float3 newD = direction_from_cosine(N, cos_theta, rand_scatter.y);
hg_pdf = single_peaked_henyey_greenstein(dot(ray.D, newD), anisotropy);
ray.D = newD;
}
else {
- float3 newD = henyey_greenstrein_sample(ray.D, anisotropy, scatter_u, scatter_v, &hg_pdf);
+ float3 newD = henyey_greenstrein_sample(
+ ray.D, anisotropy, rand_scatter.x, rand_scatter.y, &hg_pdf);
cos_theta = dot(newD, N);
ray.D = newD;
}
@@ -363,7 +352,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
}
}
- /* Sample direction along ray. */
+ /* Sample distance along ray. */
float t = -logf(1.0f - randt) / sample_sigma_t;
/* On the first bounce, we use the ray-cast to check if the opposite side is nearby.
@@ -383,15 +372,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
hit = (ss_isect.num_hits > 0);
if (hit) {
-#ifdef __KERNEL_GPU_RAYTRACING__
- /* t is always in world space with OptiX and MetalRT. */
ray.tmax = ss_isect.hits[0].t;
-#else
- /* Compute world space distance to surface hit. */
- float3 D = transform_direction(&ob_itfm, ray.D);
- D = normalize(D) * ss_isect.hits[0].t;
- ray.tmax = len(transform_direction(&ob_tfm, D));
-#endif
}
if (bounce == 0) {
@@ -413,16 +394,17 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* Advance to new scatter location. */
ray.P += t * ray.D;
- float3 transmittance;
- float3 pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance);
+ Spectrum transmittance;
+ Spectrum pdf = subsurface_random_walk_pdf(sigma_t, t, hit, &transmittance);
if (bounce > 0) {
/* Compute PDF just like we do for classic sampling, but with the stretched sigma_t. */
- float3 guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL);
+ Spectrum guided_pdf = subsurface_random_walk_pdf(forward_stretching * sigma_t, t, hit, NULL);
if (have_opposite_interface) {
/* First step of MIS: Depending on geometry we might have two methods for guided
* sampling, so perform MIS between them. */
- float3 back_pdf = subsurface_random_walk_pdf(backward_stretching * sigma_t, t, hit, NULL);
+ Spectrum back_pdf = subsurface_random_walk_pdf(
+ backward_stretching * sigma_t, t, hit, NULL);
guided_pdf = mix(
guided_pdf * forward_pdf_factor, back_pdf * backward_pdf_factor, backward_fraction);
}
@@ -444,9 +426,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
/* If we hit the surface, we are done. */
break;
}
- else if (throughput.x < VOLUME_THROUGHPUT_EPSILON &&
- throughput.y < VOLUME_THROUGHPUT_EPSILON &&
- throughput.z < VOLUME_THROUGHPUT_EPSILON) {
+ else if (reduce_max(throughput) < VOLUME_THROUGHPUT_EPSILON) {
/* Avoid unnecessary work and precision issue when throughput gets really small. */
break;
}
@@ -454,6 +434,16 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
if (hit) {
kernel_assert(isfinite_safe(throughput));
+
+ guiding_record_bssrdf_bounce(
+ kg,
+ state,
+ pdf,
+ N,
+ D,
+ safe_divide_color(throughput, INTEGRATOR_STATE(state, path, throughput)),
+ albedo);
+
INTEGRATOR_STATE_WRITE(state, path, throughput) = throughput;
}
diff --git a/intern/cycles/kernel/integrator/surface_shader.h b/intern/cycles/kernel/integrator/surface_shader.h
new file mode 100644
index 00000000000..6c0097b11bd
--- /dev/null
+++ b/intern/cycles/kernel/integrator/surface_shader.h
@@ -0,0 +1,860 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/* Functions to evaluate shaders. */
+
+#pragma once
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/emissive.h"
+
+#include "kernel/integrator/guiding.h"
+
+#ifdef __SVM__
+# include "kernel/svm/svm.h"
+#endif
+#ifdef __OSL__
+# include "kernel/osl/osl.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Guiding */
+
+#ifdef __PATH_GUIDING__
+ccl_device_inline void surface_shader_prepare_guiding(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private ShaderData *sd,
+ ccl_private const RNGState *rng_state)
+{
+ /* Have any BSDF to guide? */
+ if (!(kernel_data.integrator.use_surface_guiding && (sd->flag & SD_BSDF_HAS_EVAL))) {
+ state->guiding.use_surface_guiding = false;
+ return;
+ }
+
+ const float surface_guiding_probability = kernel_data.integrator.surface_guiding_probability;
+ float rand_bsdf_guiding = path_state_rng_1D(kg, rng_state, PRNG_SURFACE_BSDF_GUIDING);
+
+ /* Compute proportion of diffuse BSDF and BSSRDFs .*/
+ float diffuse_sampling_fraction = 0.0f;
+ float bssrdf_sampling_fraction = 0.0f;
+ float bsdf_bssrdf_sampling_sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ const float sweight = sc->sample_weight;
+ kernel_assert(sweight >= 0.0f);
+
+ bsdf_bssrdf_sampling_sum += sweight;
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) && sc->type < CLOSURE_BSDF_TRANSLUCENT_ID) {
+ diffuse_sampling_fraction += sweight;
+ }
+ if (CLOSURE_IS_BSSRDF(sc->type)) {
+ bssrdf_sampling_fraction += sweight;
+ }
+ }
+ }
+
+ if (bsdf_bssrdf_sampling_sum > 0.0f) {
+ diffuse_sampling_fraction /= bsdf_bssrdf_sampling_sum;
+ bssrdf_sampling_fraction /= bsdf_bssrdf_sampling_sum;
+ }
+
+ /* Init guiding (diffuse BSDFs only for now). */
+ if (!(diffuse_sampling_fraction > 0.0f &&
+ guiding_bsdf_init(kg, state, sd->P, sd->N, rand_bsdf_guiding))) {
+ state->guiding.use_surface_guiding = false;
+ return;
+ }
+
+ state->guiding.use_surface_guiding = true;
+ state->guiding.surface_guiding_sampling_prob = surface_guiding_probability *
+ diffuse_sampling_fraction;
+ state->guiding.bssrdf_sampling_prob = bssrdf_sampling_fraction;
+ state->guiding.sample_surface_guiding_rand = rand_bsdf_guiding;
+
+ kernel_assert(state->guiding.surface_guiding_sampling_prob > 0.0f &&
+ state->guiding.surface_guiding_sampling_prob <= 1.0f);
+}
+#endif
+
+ccl_device_inline void surface_shader_prepare_closures(KernelGlobals kg,
+ ConstIntegratorState state,
+ ccl_private ShaderData *sd,
+ const uint32_t path_flag)
+{
+ /* Filter out closures. */
+ if (kernel_data.integrator.filter_closures) {
+ if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_EMISSION) {
+ sd->closure_emission_background = zero_spectrum();
+ }
+
+ if (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIRECT_LIGHT) {
+ sd->flag &= ~SD_BSDF_HAS_EVAL;
+ }
+
+ if (path_flag & PATH_RAY_CAMERA) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+
+ if ((CLOSURE_IS_BSDF_DIFFUSE(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_DIFFUSE)) ||
+ (CLOSURE_IS_BSDF_GLOSSY(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_GLOSSY)) ||
+ (CLOSURE_IS_BSDF_TRANSMISSION(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSMISSION))) {
+ sc->type = CLOSURE_NONE_ID;
+ sc->sample_weight = 0.0f;
+ }
+ else if ((CLOSURE_IS_BSDF_TRANSPARENT(sc->type) &&
+ (kernel_data.integrator.filter_closures & FILTER_CLOSURE_TRANSPARENT))) {
+ sc->type = CLOSURE_HOLDOUT_ID;
+ sc->sample_weight = 0.0f;
+ sd->flag |= SD_HOLDOUT;
+ }
+ }
+ }
+ }
+
+ /* Defensive sampling.
+ *
+ * We can likely also do defensive sampling at deeper bounces, particularly
+ * for cases like a perfect mirror but possibly also others. This will need
+ * a good heuristic. */
+ if (INTEGRATOR_STATE(state, path, bounce) + INTEGRATOR_STATE(state, path, transparent_bounce) ==
+ 0 &&
+ sd->num_closure > 1) {
+ float sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sc->sample_weight = max(sc->sample_weight, 0.125f * sum);
+ }
+ }
+ }
+
+ /* Filter glossy.
+ *
+ * Blurring of bsdf after bounces, for rays that have a small likelihood
+ * of following this particular path (diffuse, rough glossy) */
+ if (kernel_data.integrator.filter_glossy != FLT_MAX
+#ifdef __MNEE__
+ && !(INTEGRATOR_STATE(state, path, mnee) & PATH_MNEE_VALID)
+#endif
+ ) {
+ float blur_pdf = kernel_data.integrator.filter_glossy *
+ INTEGRATOR_STATE(state, path, min_ray_pdf);
+
+ if (blur_pdf < 1.0f) {
+ float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ bsdf_blur(kg, sc, blur_roughness);
+ }
+ }
+ }
+ }
+}
+
+/* BSDF */
+#if 0
+ccl_device_inline void surface_shader_validate_bsdf_sample(const KernelGlobals kg,
+ const ShaderClosure *sc,
+ const float3 omega_in,
+ const int org_label,
+ const float2 org_roughness,
+ const float org_eta)
+{
+ /* Validate the the bsdf_label and bsdf_roughness_eta functions
+ * by estimating the values after a bsdf sample. */
+ const int comp_label = bsdf_label(kg, sc, omega_in);
+ kernel_assert(org_label == comp_label);
+
+ float2 comp_roughness;
+ float comp_eta;
+ bsdf_roughness_eta(kg, sc, &comp_roughness, &comp_eta);
+ kernel_assert(org_eta == comp_eta);
+ kernel_assert(org_roughness.x == comp_roughness.x);
+ kernel_assert(org_roughness.y == comp_roughness.y);
+}
+#endif
+
+ccl_device_forceinline bool _surface_shader_exclude(ClosureType type, uint light_shader_flags)
+{
+ if (!(light_shader_flags & SHADER_EXCLUDE_ANY)) {
+ return false;
+ }
+ if (light_shader_flags & SHADER_EXCLUDE_DIFFUSE) {
+ if (CLOSURE_IS_BSDF_DIFFUSE(type)) {
+ return true;
+ }
+ }
+ if (light_shader_flags & SHADER_EXCLUDE_GLOSSY) {
+ if (CLOSURE_IS_BSDF_GLOSSY(type)) {
+ return true;
+ }
+ }
+ if (light_shader_flags & SHADER_EXCLUDE_TRANSMIT) {
+ if (CLOSURE_IS_BSDF_TRANSMISSION(type)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+ccl_device_inline float _surface_shader_bsdf_eval_mis(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ const float3 omega_in,
+ ccl_private const ShaderClosure *skip_sc,
+ ccl_private BsdfEval *result_eval,
+ float sum_pdf,
+ float sum_sample_weight,
+ const uint light_shader_flags)
+{
+ /* This is the veach one-sample model with balance heuristic,
+ * some PDF factors drop out when using balance heuristic weighting. */
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (sc == skip_sc) {
+ continue;
+ }
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) {
+ float bsdf_pdf = 0.0f;
+ Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
+
+ if (bsdf_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
+ sum_pdf += bsdf_pdf * sc->sample_weight;
+ }
+ }
+
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
+
+ return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+ccl_device_inline float surface_shader_bsdf_eval_pdfs(const KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ const float3 omega_in,
+ ccl_private BsdfEval *result_eval,
+ ccl_private float *pdfs,
+ const uint light_shader_flags)
+{
+ /* This is the veach one-sample model with balance heuristic, some pdf
+ * factors drop out when using balance heuristic weighting. */
+ float sum_pdf = 0.0f;
+ float sum_sample_weight = 0.0f;
+ bsdf_eval_init(result_eval, CLOSURE_NONE_ID, zero_spectrum());
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ if (CLOSURE_IS_BSDF(sc->type) && !_surface_shader_exclude(sc->type, light_shader_flags)) {
+ float bsdf_pdf = 0.0f;
+ Spectrum eval = bsdf_eval(kg, sd, sc, omega_in, &bsdf_pdf);
+ kernel_assert(bsdf_pdf >= 0.0f);
+ if (bsdf_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, sc->type, eval * sc->weight);
+ sum_pdf += bsdf_pdf * sc->sample_weight;
+ kernel_assert(bsdf_pdf * sc->sample_weight >= 0.0f);
+ pdfs[i] = bsdf_pdf * sc->sample_weight;
+ }
+ else {
+ pdfs[i] = 0.0f;
+ }
+ }
+ else {
+ pdfs[i] = 0.0f;
+ }
+
+ sum_sample_weight += sc->sample_weight;
+ }
+ else {
+ pdfs[i] = 0.0f;
+ }
+ }
+ if (sum_pdf > 0.0f) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ pdfs[i] /= sum_pdf;
+ }
+ }
+
+ return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+#ifndef __KERNEL_CUDA__
+ccl_device
+#else
+ccl_device_inline
+#endif
+ float
+ surface_shader_bsdf_eval(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private ShaderData *sd,
+ const float3 omega_in,
+ ccl_private BsdfEval *bsdf_eval,
+ const uint light_shader_flags)
+{
+ bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, zero_spectrum());
+
+ float pdf = _surface_shader_bsdf_eval_mis(
+ kg, sd, omega_in, NULL, bsdf_eval, 0.0f, 0.0f, light_shader_flags);
+
+#if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ if (state->guiding.use_surface_guiding) {
+ const float guiding_sampling_prob = state->guiding.surface_guiding_sampling_prob;
+ const float bssrdf_sampling_prob = state->guiding.bssrdf_sampling_prob;
+ const float guide_pdf = guiding_bsdf_pdf(kg, state, omega_in);
+ pdf = (guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob)) +
+ (1.0f - guiding_sampling_prob) * pdf;
+ }
+#endif
+
+ return pdf;
+}
+
+/* Randomly sample a BSSRDF or BSDF proportional to ShaderClosure.sample_weight. */
+ccl_device_inline ccl_private const ShaderClosure *surface_shader_bsdf_bssrdf_pick(
+ ccl_private const ShaderData *ccl_restrict sd, ccl_private float2 *rand_bsdf)
+{
+ int sampled = 0;
+
+ if (sd->num_closure > 1) {
+ /* Pick a BSDF or based on sample weights. */
+ float sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
+
+ float r = (*rand_bsdf).x * sum;
+ float partial_sum = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ float next_sum = partial_sum + sc->sample_weight;
+
+ if (r < next_sum) {
+ sampled = i;
+
+ /* Rescale to reuse for direction sample, to better preserve stratification. */
+ (*rand_bsdf).x = (r - partial_sum) / sc->sample_weight;
+ break;
+ }
+
+ partial_sum = next_sum;
+ }
+ }
+ }
+
+ return &sd->closure[sampled];
+}
+
+/* Return weight for picked BSSRDF. */
+ccl_device_inline Spectrum
+surface_shader_bssrdf_sample_weight(ccl_private const ShaderData *ccl_restrict sd,
+ ccl_private const ShaderClosure *ccl_restrict bssrdf_sc)
+{
+ Spectrum weight = bssrdf_sc->weight;
+
+ if (sd->num_closure > 1) {
+ float sum = 0.0f;
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type)) {
+ sum += sc->sample_weight;
+ }
+ }
+ weight *= sum / bssrdf_sc->sample_weight;
+ }
+
+ return weight;
+}
+
+#ifdef __PATH_GUIDING__
+/* Sample direction for picked BSDF, and return evaluation and pdf for all
+ * BSDFs combined using MIS. */
+
+ccl_device int surface_shader_bsdf_guided_sample_closure(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private ShaderData *sd,
+ ccl_private const ShaderClosure *sc,
+ const float2 rand_bsdf,
+ ccl_private BsdfEval *bsdf_eval,
+ ccl_private float3 *omega_in,
+ ccl_private float *bsdf_pdf,
+ ccl_private float *unguided_bsdf_pdf,
+ ccl_private float2 *sampled_rougness,
+ ccl_private float *eta)
+{
+ /* BSSRDF should already have been handled elsewhere. */
+ kernel_assert(CLOSURE_IS_BSDF(sc->type));
+
+ const bool use_surface_guiding = state->guiding.use_surface_guiding;
+ const float guiding_sampling_prob = state->guiding.surface_guiding_sampling_prob;
+ const float bssrdf_sampling_prob = state->guiding.bssrdf_sampling_prob;
+
+ /* Decide between sampling guiding distribution and BSDF. */
+ bool sample_guiding = false;
+ float rand_bsdf_guiding = state->guiding.sample_surface_guiding_rand;
+
+ if (use_surface_guiding && rand_bsdf_guiding < guiding_sampling_prob) {
+ sample_guiding = true;
+ rand_bsdf_guiding /= guiding_sampling_prob;
+ }
+ else {
+ rand_bsdf_guiding -= guiding_sampling_prob;
+ rand_bsdf_guiding /= (1.0f - guiding_sampling_prob);
+ }
+
+ /* Initialize to zero. */
+ int label = LABEL_NONE;
+ Spectrum eval = zero_spectrum();
+ bsdf_eval_init(bsdf_eval, CLOSURE_NONE_ID, eval);
+
+ *unguided_bsdf_pdf = 0.0f;
+ float guide_pdf = 0.0f;
+
+ if (sample_guiding) {
+ /* Sample guiding distribution. */
+ guide_pdf = guiding_bsdf_sample(kg, state, rand_bsdf, omega_in);
+ *bsdf_pdf = 0.0f;
+
+ if (guide_pdf != 0.0f) {
+ float unguided_bsdf_pdfs[MAX_CLOSURE];
+
+ *unguided_bsdf_pdf = surface_shader_bsdf_eval_pdfs(
+ kg, sd, *omega_in, bsdf_eval, unguided_bsdf_pdfs, 0);
+ *bsdf_pdf = (guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob)) +
+ ((1.0f - guiding_sampling_prob) * (*unguided_bsdf_pdf));
+ float sum_pdfs = 0.0f;
+
+ if (*unguided_bsdf_pdf > 0.0f) {
+ int idx = -1;
+ for (int i = 0; i < sd->num_closure; i++) {
+ sum_pdfs += unguided_bsdf_pdfs[i];
+ if (rand_bsdf_guiding <= sum_pdfs) {
+ idx = i;
+ break;
+ }
+ }
+
+ kernel_assert(idx >= 0);
+ /* Set the default idx to the last in the list.
+ * in case of numerical problems and rand_bsdf_guiding is just >=1.0f and
+ * the sum of all unguided_bsdf_pdfs is just < 1.0f. */
+ idx = (rand_bsdf_guiding > sum_pdfs) ? sd->num_closure - 1 : idx;
+
+ label = bsdf_label(kg, &sd->closure[idx], *omega_in);
+ }
+ }
+
+ kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f);
+
+ *sampled_rougness = make_float2(1.0f, 1.0f);
+ *eta = 1.0f;
+ }
+ else {
+ /* Sample BSDF. */
+ *bsdf_pdf = 0.0f;
+ label = bsdf_sample(kg,
+ sd,
+ sc,
+ rand_bsdf.x,
+ rand_bsdf.y,
+ &eval,
+ omega_in,
+ unguided_bsdf_pdf,
+ sampled_rougness,
+ eta);
+# if 0
+ if (*unguided_bsdf_pdf > 0.0f) {
+ surface_shader_validate_bsdf_sample(kg, sc, *omega_in, label, sampled_roughness, eta);
+ }
+# endif
+
+ if (*unguided_bsdf_pdf != 0.0f) {
+ bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
+
+ kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f);
+
+ if (sd->num_closure > 1) {
+ float sweight = sc->sample_weight;
+ *unguided_bsdf_pdf = _surface_shader_bsdf_eval_mis(
+ kg, sd, *omega_in, sc, bsdf_eval, (*unguided_bsdf_pdf) * sweight, sweight, 0);
+ kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f);
+ }
+ *bsdf_pdf = *unguided_bsdf_pdf;
+
+ if (use_surface_guiding) {
+ guide_pdf = guiding_bsdf_pdf(kg, state, *omega_in);
+ *bsdf_pdf *= 1.0f - guiding_sampling_prob;
+ *bsdf_pdf += guiding_sampling_prob * guide_pdf * (1.0f - bssrdf_sampling_prob);
+ }
+ }
+
+ kernel_assert(reduce_min(bsdf_eval_sum(bsdf_eval)) >= 0.0f);
+ }
+
+ return label;
+}
+#endif
+
+/* Sample direction for picked BSDF, and return evaluation and pdf for all
+ * BSDFs combined using MIS. */
+ccl_device int surface_shader_bsdf_sample_closure(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ ccl_private const ShaderClosure *sc,
+ const float2 rand_bsdf,
+ ccl_private BsdfEval *bsdf_eval,
+ ccl_private float3 *omega_in,
+ ccl_private float *pdf,
+ ccl_private float2 *sampled_roughness,
+ ccl_private float *eta)
+{
+ /* BSSRDF should already have been handled elsewhere. */
+ kernel_assert(CLOSURE_IS_BSDF(sc->type));
+
+ int label;
+ Spectrum eval = zero_spectrum();
+
+ *pdf = 0.0f;
+ label = bsdf_sample(
+ kg, sd, sc, rand_bsdf.x, rand_bsdf.y, &eval, omega_in, pdf, sampled_roughness, eta);
+
+ if (*pdf != 0.0f) {
+ bsdf_eval_init(bsdf_eval, sc->type, eval * sc->weight);
+
+ if (sd->num_closure > 1) {
+ float sweight = sc->sample_weight;
+ *pdf = _surface_shader_bsdf_eval_mis(
+ kg, sd, *omega_in, sc, bsdf_eval, *pdf * sweight, sweight, 0);
+ }
+ }
+ else {
+ bsdf_eval_init(bsdf_eval, sc->type, zero_spectrum());
+ }
+
+ return label;
+}
+
+ccl_device float surface_shader_average_roughness(ccl_private const ShaderData *sd)
+{
+ float roughness = 0.0f;
+ float sum_weight = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF(sc->type)) {
+ /* sqrt once to undo the squaring from multiplying roughness on the
+ * two axes, and once for the squared roughness convention. */
+ float weight = fabsf(average(sc->weight));
+ roughness += weight * sqrtf(safe_sqrtf(bsdf_get_roughness_squared(sc)));
+ sum_weight += weight;
+ }
+ }
+
+ return (sum_weight > 0.0f) ? roughness / sum_weight : 0.0f;
+}
+
+ccl_device Spectrum surface_shader_transparency(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ return one_spectrum();
+ }
+ else if (sd->flag & SD_TRANSPARENT) {
+ return sd->closure_transparent_extinction;
+ }
+ else {
+ return zero_spectrum();
+ }
+}
+
+ccl_device void surface_shader_disable_transparency(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+ if (sd->flag & SD_TRANSPARENT) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+
+ if (sc->type == CLOSURE_BSDF_TRANSPARENT_ID) {
+ sc->sample_weight = 0.0f;
+ sc->weight = zero_spectrum();
+ }
+ }
+
+ sd->flag &= ~SD_TRANSPARENT;
+ }
+}
+
+ccl_device Spectrum surface_shader_alpha(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum alpha = one_spectrum() - surface_shader_transparency(kg, sd);
+
+ alpha = saturate(alpha);
+
+ return alpha;
+}
+
+ccl_device Spectrum surface_shader_diffuse(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum eval = zero_spectrum();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type) || CLOSURE_IS_BSSRDF(sc->type))
+ eval += sc->weight;
+ }
+
+ return eval;
+}
+
+ccl_device Spectrum surface_shader_glossy(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum eval = zero_spectrum();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ eval += sc->weight;
+ }
+
+ return eval;
+}
+
+ccl_device Spectrum surface_shader_transmission(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ Spectrum eval = zero_spectrum();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_TRANSMISSION(sc->type))
+ eval += sc->weight;
+ }
+
+ return eval;
+}
+
+ccl_device float3 surface_shader_average_normal(KernelGlobals kg, ccl_private const ShaderData *sd)
+{
+ float3 N = zero_float3();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+ N += sc->N * fabsf(average(sc->weight));
+ }
+
+ return (is_zero(N)) ? sd->N : normalize(N);
+}
+
+ccl_device Spectrum surface_shader_ao(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ const float ao_factor,
+ ccl_private float3 *N_)
+{
+ Spectrum eval = zero_spectrum();
+ float3 N = zero_float3();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type)) {
+ ccl_private const DiffuseBsdf *bsdf = (ccl_private const DiffuseBsdf *)sc;
+ eval += sc->weight * ao_factor;
+ N += bsdf->N * fabsf(average(sc->weight));
+ }
+ }
+
+ *N_ = (is_zero(N)) ? sd->N : normalize(N);
+ return eval;
+}
+
+#ifdef __SUBSURFACE__
+ccl_device float3 surface_shader_bssrdf_normal(ccl_private const ShaderData *sd)
+{
+ float3 N = zero_float3();
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+
+ if (CLOSURE_IS_BSSRDF(sc->type)) {
+ ccl_private const Bssrdf *bssrdf = (ccl_private const Bssrdf *)sc;
+ float avg_weight = fabsf(average(sc->weight));
+
+ N += bssrdf->N * avg_weight;
+ }
+ }
+
+ return (is_zero(N)) ? sd->N : normalize(N);
+}
+#endif /* __SUBSURFACE__ */
+
+/* Constant emission optimization */
+
+ccl_device bool surface_shader_constant_emission(KernelGlobals kg,
+ int shader,
+ ccl_private Spectrum *eval)
+{
+ int shader_index = shader & SHADER_MASK;
+ int shader_flag = kernel_data_fetch(shaders, shader_index).flags;
+
+ if (shader_flag & SD_HAS_CONSTANT_EMISSION) {
+ const float3 emission_rgb = make_float3(
+ kernel_data_fetch(shaders, shader_index).constant_emission[0],
+ kernel_data_fetch(shaders, shader_index).constant_emission[1],
+ kernel_data_fetch(shaders, shader_index).constant_emission[2]);
+ *eval = rgb_to_spectrum(emission_rgb);
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Background */
+
+ccl_device Spectrum surface_shader_background(ccl_private const ShaderData *sd)
+{
+ if (sd->flag & SD_EMISSION) {
+ return sd->closure_emission_background;
+ }
+ else {
+ return zero_spectrum();
+ }
+}
+
+/* Emission */
+
+ccl_device Spectrum surface_shader_emission(ccl_private const ShaderData *sd)
+{
+ if (sd->flag & SD_EMISSION) {
+ return emissive_simple_eval(sd->Ng, sd->I) * sd->closure_emission_background;
+ }
+ else {
+ return zero_spectrum();
+ }
+}
+
+/* Holdout */
+
+ccl_device Spectrum surface_shader_apply_holdout(KernelGlobals kg, ccl_private ShaderData *sd)
+{
+ Spectrum weight = zero_spectrum();
+
+ /* For objects marked as holdout, preserve transparency and remove all other
+ * closures, replacing them with a holdout weight. */
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ if ((sd->flag & SD_TRANSPARENT) && !(sd->flag & SD_HAS_ONLY_VOLUME)) {
+ weight = one_spectrum() - sd->closure_transparent_extinction;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sc = &sd->closure[i];
+ if (!CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ sc->type = NBUILTIN_CLOSURES;
+ }
+ }
+
+ sd->flag &= ~(SD_CLOSURE_FLAGS - (SD_TRANSPARENT | SD_BSDF));
+ }
+ else {
+ weight = one_spectrum();
+ }
+ }
+ else {
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *sc = &sd->closure[i];
+ if (CLOSURE_IS_HOLDOUT(sc->type)) {
+ weight += sc->weight;
+ }
+ }
+ }
+
+ return weight;
+}
+
+/* Surface Evaluation */
+
+template<uint node_feature_mask, typename ConstIntegratorGenericState>
+ccl_device void surface_shader_eval(KernelGlobals kg,
+ ConstIntegratorGenericState state,
+ ccl_private ShaderData *ccl_restrict sd,
+ ccl_global float *ccl_restrict buffer,
+ uint32_t path_flag,
+ bool use_caustics_storage = false)
+{
+ /* If path is being terminated, we are tracing a shadow ray or evaluating
+ * emission, then we don't need to store closures. The emission and shadow
+ * shader data also do not have a closure array to save GPU memory. */
+ int max_closures;
+ if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+ max_closures = 0;
+ }
+ else {
+ max_closures = use_caustics_storage ? CAUSTICS_MAX_CLOSURE : kernel_data.max_closures;
+ }
+
+ sd->num_closure = 0;
+ sd->num_closure_left = max_closures;
+
+#ifdef __OSL__
+ if (kg->osl) {
+ if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
+ OSLShader::eval_background(kg, state, sd, path_flag);
+ }
+ else {
+ OSLShader::eval_surface(kg, state, sd, path_flag);
+ }
+ }
+ else
+#endif
+ {
+#ifdef __SVM__
+ svm_eval_nodes<node_feature_mask, SHADER_TYPE_SURFACE>(kg, state, sd, buffer, path_flag);
+#else
+ if (sd->object == OBJECT_NONE) {
+ sd->closure_emission_background = make_spectrum(0.8f);
+ sd->flag |= SD_EMISSION;
+ }
+ else {
+ ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(DiffuseBsdf), make_spectrum(0.8f));
+ if (bsdf != NULL) {
+ bsdf->N = sd->N;
+ sd->flag |= bsdf_diffuse_setup(bsdf);
+ }
+ }
+#endif
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/volume_shader.h b/intern/cycles/kernel/integrator/volume_shader.h
new file mode 100644
index 00000000000..0ff968723a1
--- /dev/null
+++ b/intern/cycles/kernel/integrator/volume_shader.h
@@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/* Volume shader evaluation and sampling. */
+
+#pragma once
+
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/emissive.h"
+
+#ifdef __SVM__
+# include "kernel/svm/svm.h"
+#endif
+#ifdef __OSL__
+# include "kernel/osl/osl.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME__
+
+/* Merging */
+
+ccl_device_inline void volume_shader_merge_closures(ccl_private ShaderData *sd)
+{
+ /* Merge identical closures to save closure space with stacked volumes. */
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private ShaderClosure *sci = &sd->closure[i];
+
+ if (sci->type != CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+ continue;
+ }
+
+ for (int j = i + 1; j < sd->num_closure; j++) {
+ ccl_private ShaderClosure *scj = &sd->closure[j];
+ if (sci->type != scj->type) {
+ continue;
+ }
+
+ ccl_private const HenyeyGreensteinVolume *hgi = (ccl_private const HenyeyGreensteinVolume *)
+ sci;
+ ccl_private const HenyeyGreensteinVolume *hgj = (ccl_private const HenyeyGreensteinVolume *)
+ scj;
+ if (!(hgi->g == hgj->g)) {
+ continue;
+ }
+
+ sci->weight += scj->weight;
+ sci->sample_weight += scj->sample_weight;
+
+ int size = sd->num_closure - (j + 1);
+ if (size > 0) {
+ for (int k = 0; k < size; k++) {
+ scj[k] = scj[k + 1];
+ }
+ }
+
+ sd->num_closure--;
+ kernel_assert(sd->num_closure >= 0);
+ j--;
+ }
+ }
+}
+
+ccl_device_inline void volume_shader_copy_phases(ccl_private ShaderVolumePhases *ccl_restrict
+ phases,
+ ccl_private const ShaderData *ccl_restrict sd)
+{
+ phases->num_closure = 0;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ccl_private const ShaderClosure *from_sc = &sd->closure[i];
+ ccl_private const HenyeyGreensteinVolume *from_hg =
+ (ccl_private const HenyeyGreensteinVolume *)from_sc;
+
+ if (from_sc->type == CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) {
+ ccl_private ShaderVolumeClosure *to_sc = &phases->closure[phases->num_closure];
+
+ to_sc->weight = from_sc->weight;
+ to_sc->sample_weight = from_sc->sample_weight;
+ to_sc->g = from_hg->g;
+ phases->num_closure++;
+ if (phases->num_closure >= MAX_VOLUME_CLOSURE) {
+ break;
+ }
+ }
+ }
+}
+
+/* Guiding */
+
+# ifdef __PATH_GUIDING__
+ccl_device_inline void volume_shader_prepare_guiding(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private ShaderData *sd,
+ ccl_private const RNGState *rng_state,
+ const float3 P,
+ const float3 D,
+ ccl_private ShaderVolumePhases *phases,
+ const VolumeSampleMethod direct_sample_method)
+{
+ /* Have any phase functions to guide? */
+ const int num_phases = phases->num_closure;
+ if (!kernel_data.integrator.use_volume_guiding || num_phases == 0) {
+ state->guiding.use_volume_guiding = false;
+ return;
+ }
+
+ const float volume_guiding_probability = kernel_data.integrator.volume_guiding_probability;
+ float rand_phase_guiding = path_state_rng_1D(kg, rng_state, PRNG_VOLUME_PHASE_GUIDING);
+
+ /* If we have more than one phase function we select one random based on its
+ * sample weight to calculate the product distribution for guiding. */
+ int phase_id = 0;
+ float phase_weight = 1.0f;
+
+ if (num_phases > 1) {
+ /* Pick a phase closure based on sample weights. */
+ float sum = 0.0f;
+
+ for (phase_id = 0; phase_id < num_phases; phase_id++) {
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id];
+ sum += svc->sample_weight;
+ }
+
+ float r = rand_phase_guiding * sum;
+ float partial_sum = 0.0f;
+
+ for (phase_id = 0; phase_id < num_phases; phase_id++) {
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id];
+ float next_sum = partial_sum + svc->sample_weight;
+
+ if (r <= next_sum) {
+ /* Rescale to reuse. */
+ rand_phase_guiding = (r - partial_sum) / svc->sample_weight;
+ phase_weight = svc->sample_weight / sum;
+ break;
+ }
+
+ partial_sum = next_sum;
+ }
+
+ /* Adjust the sample weight of the component used for guiding. */
+ phases->closure[phase_id].sample_weight *= volume_guiding_probability;
+ }
+
+ /* Init guiding for selected phase function. */
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[phase_id];
+ if (!guiding_phase_init(kg, state, P, D, svc->g, rand_phase_guiding)) {
+ state->guiding.use_volume_guiding = false;
+ return;
+ }
+
+ state->guiding.use_volume_guiding = true;
+ state->guiding.sample_volume_guiding_rand = rand_phase_guiding;
+ state->guiding.volume_guiding_sampling_prob = volume_guiding_probability * phase_weight;
+
+ kernel_assert(state->guiding.volume_guiding_sampling_prob > 0.0f &&
+ state->guiding.volume_guiding_sampling_prob <= 1.0f);
+}
+# endif
+
+/* Phase Evaluation & Sampling */
+
+/* Randomly sample a volume phase function proportional to ShaderClosure.sample_weight. */
+ccl_device_inline ccl_private const ShaderVolumeClosure *volume_shader_phase_pick(
+ ccl_private const ShaderVolumePhases *phases, ccl_private float2 *rand_phase)
+{
+ int sampled = 0;
+
+ if (phases->num_closure > 1) {
+ /* pick a phase closure based on sample weights */
+ float sum = 0.0f;
+
+ for (int i = 0; i < phases->num_closure; i++) {
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[sampled];
+ sum += svc->sample_weight;
+ }
+
+ float r = (*rand_phase).x * sum;
+ float partial_sum = 0.0f;
+
+ for (int i = 0; i < phases->num_closure; i++) {
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
+ float next_sum = partial_sum + svc->sample_weight;
+
+ if (r <= next_sum) {
+ /* Rescale to reuse for volume phase direction sample. */
+ sampled = i;
+ (*rand_phase).x = (r - partial_sum) / svc->sample_weight;
+ break;
+ }
+
+ partial_sum = next_sum;
+ }
+ }
+
+ /* todo: this isn't quite correct, we don't weight anisotropy properly
+ * depending on color channels, even if this is perhaps not a common case */
+ return &phases->closure[sampled];
+}
+
+ccl_device_inline float _volume_shader_phase_eval_mis(ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumePhases *phases,
+ const float3 omega_in,
+ int skip_phase,
+ ccl_private BsdfEval *result_eval,
+ float sum_pdf,
+ float sum_sample_weight)
+{
+ for (int i = 0; i < phases->num_closure; i++) {
+ if (i == skip_phase)
+ continue;
+
+ ccl_private const ShaderVolumeClosure *svc = &phases->closure[i];
+ float phase_pdf = 0.0f;
+ Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
+
+ if (phase_pdf != 0.0f) {
+ bsdf_eval_accum(result_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+ sum_pdf += phase_pdf * svc->sample_weight;
+ }
+
+ sum_sample_weight += svc->sample_weight;
+ }
+
+ return (sum_sample_weight > 0.0f) ? sum_pdf / sum_sample_weight : 0.0f;
+}
+
+ccl_device float volume_shader_phase_eval(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumeClosure *svc,
+ const float3 omega_in,
+ ccl_private BsdfEval *phase_eval)
+{
+ float phase_pdf = 0.0f;
+ Spectrum eval = volume_phase_eval(sd, svc, omega_in, &phase_pdf);
+
+ if (phase_pdf != 0.0f) {
+ bsdf_eval_accum(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+ }
+
+ return phase_pdf;
+}
+
+ccl_device float volume_shader_phase_eval(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumePhases *phases,
+ const float3 omega_in,
+ ccl_private BsdfEval *phase_eval)
+{
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
+
+ float pdf = _volume_shader_phase_eval_mis(sd, phases, omega_in, -1, phase_eval, 0.0f, 0.0f);
+
+# if defined(__PATH_GUIDING__) && PATH_GUIDING_LEVEL >= 4
+ if (state->guiding.use_volume_guiding) {
+ const float guiding_sampling_prob = state->guiding.volume_guiding_sampling_prob;
+ const float guide_pdf = guiding_phase_pdf(kg, state, omega_in);
+ pdf = (guiding_sampling_prob * guide_pdf) + (1.0f - guiding_sampling_prob) * pdf;
+ }
+# endif
+
+ return pdf;
+}
+
+# ifdef __PATH_GUIDING__
+ccl_device int volume_shader_phase_guided_sample(KernelGlobals kg,
+ IntegratorState state,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumeClosure *svc,
+ const float2 rand_phase,
+ ccl_private BsdfEval *phase_eval,
+ ccl_private float3 *omega_in,
+ ccl_private float *phase_pdf,
+ ccl_private float *unguided_phase_pdf,
+ ccl_private float *sampled_roughness)
+{
+ const bool use_volume_guiding = state->guiding.use_volume_guiding;
+ const float guiding_sampling_prob = state->guiding.volume_guiding_sampling_prob;
+
+ /* Decide between sampling guiding distribution and phase. */
+ float rand_phase_guiding = state->guiding.sample_volume_guiding_rand;
+ bool sample_guiding = false;
+ if (use_volume_guiding && rand_phase_guiding < guiding_sampling_prob) {
+ sample_guiding = true;
+ rand_phase_guiding /= guiding_sampling_prob;
+ }
+ else {
+ rand_phase_guiding -= guiding_sampling_prob;
+ rand_phase_guiding /= (1.0f - guiding_sampling_prob);
+ }
+
+ /* Initialize to zero. */
+ int label = LABEL_NONE;
+ Spectrum eval = zero_spectrum();
+
+ *unguided_phase_pdf = 0.0f;
+ float guide_pdf = 0.0f;
+ *sampled_roughness = 1.0f - fabsf(svc->g);
+
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
+
+ if (sample_guiding) {
+ /* Sample guiding distribution. */
+ guide_pdf = guiding_phase_sample(kg, state, rand_phase, omega_in);
+ *phase_pdf = 0.0f;
+
+ if (guide_pdf != 0.0f) {
+ *unguided_phase_pdf = volume_shader_phase_eval(kg, sd, svc, *omega_in, phase_eval);
+ *phase_pdf = (guiding_sampling_prob * guide_pdf) +
+ ((1.0f - guiding_sampling_prob) * (*unguided_phase_pdf));
+ label = LABEL_VOLUME_SCATTER;
+ }
+ }
+ else {
+ /* Sample phase. */
+ *phase_pdf = 0.0f;
+ label = volume_phase_sample(
+ sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, unguided_phase_pdf);
+
+ if (*unguided_phase_pdf != 0.0f) {
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+
+ *phase_pdf = *unguided_phase_pdf;
+ if (use_volume_guiding) {
+ guide_pdf = guiding_phase_pdf(kg, state, *omega_in);
+ *phase_pdf *= 1.0f - guiding_sampling_prob;
+ *phase_pdf += guiding_sampling_prob * guide_pdf;
+ }
+
+ kernel_assert(reduce_min(bsdf_eval_sum(phase_eval)) >= 0.0f);
+ }
+ else {
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, zero_spectrum());
+ }
+
+ kernel_assert(reduce_min(bsdf_eval_sum(phase_eval)) >= 0.0f);
+ }
+
+ return label;
+}
+# endif
+
+ccl_device int volume_shader_phase_sample(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ ccl_private const ShaderVolumePhases *phases,
+ ccl_private const ShaderVolumeClosure *svc,
+ float2 rand_phase,
+ ccl_private BsdfEval *phase_eval,
+ ccl_private float3 *omega_in,
+ ccl_private float *pdf,
+ ccl_private float *sampled_roughness)
+{
+ *sampled_roughness = 1.0f - fabsf(svc->g);
+ Spectrum eval = zero_spectrum();
+
+ *pdf = 0.0f;
+ int label = volume_phase_sample(sd, svc, rand_phase.x, rand_phase.y, &eval, omega_in, pdf);
+
+ if (*pdf != 0.0f) {
+ bsdf_eval_init(phase_eval, CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID, eval);
+ }
+
+ return label;
+}
+
+/* Motion Blur */
+
+# ifdef __OBJECT_MOTION__
+ccl_device_inline void volume_shader_motion_blur(KernelGlobals kg,
+ ccl_private ShaderData *ccl_restrict sd)
+{
+ if ((sd->object_flag & SD_OBJECT_HAS_VOLUME_MOTION) == 0) {
+ return;
+ }
+
+ AttributeDescriptor v_desc = find_attribute(kg, sd, ATTR_STD_VOLUME_VELOCITY);
+ kernel_assert(v_desc.offset != ATTR_STD_NOT_FOUND);
+
+ const float3 P = sd->P;
+ const float velocity_scale = kernel_data_fetch(objects, sd->object).velocity_scale;
+ const float time_offset = kernel_data.cam.motion_position == MOTION_POSITION_CENTER ? 0.5f :
+ 0.0f;
+ const float time = kernel_data.cam.motion_position == MOTION_POSITION_END ?
+ (1.0f - kernel_data.cam.shuttertime) + sd->time :
+ sd->time;
+
+ /* Use a 1st order semi-lagrangian advection scheme to estimate what volume quantity
+ * existed, or will exist, at the given time:
+ *
+ * `phi(x, T) = phi(x - (T - t) * u(x, T), t)`
+ *
+ * where
+ *
+ * x : position
+ * T : super-sampled time (or ray time)
+ * t : current time of the simulation (in rendering we assume this is center frame with
+ * relative time = 0)
+ * phi : the volume quantity
+ * u : the velocity field
+ *
+ * But first we need to determine the velocity field `u(x, T)`, which we can estimate also
+ * using semi-lagrangian advection.
+ *
+ * `u(x, T) = u(x - (T - t) * u(x, T), t)`
+ *
+ * This is the typical way to model self-advection in fluid dynamics, however, we do not
+ * account for other forces affecting the velocity during simulation (pressure, buoyancy,
+ * etc.): this gives a linear interpolation when fluid are mostly "curvy". For better
+ * results, a higher order interpolation scheme can be used (at the cost of more lookups),
+ * or an interpolation of the velocity fields for the previous and next frames could also
+ * be used to estimate `u(x, T)` (which will cost more memory and lookups).
+ *
+ * References:
+ * "Eulerian Motion Blur", Kim and Ko, 2007
+ * "Production Volume Rendering", Wreninge et al., 2012
+ */
+
+ /* Find velocity. */
+ float3 velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
+ object_dir_transform(kg, sd, &velocity);
+
+ /* Find advected P. */
+ sd->P = P - (time - time_offset) * velocity_scale * velocity;
+
+ /* Find advected velocity. */
+ velocity = primitive_volume_attribute_float3(kg, sd, v_desc);
+ object_dir_transform(kg, sd, &velocity);
+
+ /* Find advected P. */
+ sd->P = P - (time - time_offset) * velocity_scale * velocity;
+}
+# endif
+
+/* Volume Evaluation */
+
+template<const bool shadow, typename StackReadOp, typename ConstIntegratorGenericState>
+ccl_device_inline void volume_shader_eval(KernelGlobals kg,
+ ConstIntegratorGenericState state,
+ ccl_private ShaderData *ccl_restrict sd,
+ const uint32_t path_flag,
+ StackReadOp stack_read)
+{
+ /* If path is being terminated, we are tracing a shadow ray or evaluating
+ * emission, then we don't need to store closures. The emission and shadow
+ * shader data also do not have a closure array to save GPU memory. */
+ int max_closures;
+ if (path_flag & (PATH_RAY_TERMINATE | PATH_RAY_SHADOW | PATH_RAY_EMISSION)) {
+ max_closures = 0;
+ }
+ else {
+ max_closures = kernel_data.max_closures;
+ }
+
+ /* reset closures once at the start, we will be accumulating the closures
+ * for all volumes in the stack into a single array of closures */
+ sd->num_closure = 0;
+ sd->num_closure_left = max_closures;
+ sd->flag = 0;
+ sd->object_flag = 0;
+
+ for (int i = 0;; i++) {
+ const VolumeStack entry = stack_read(i);
+ if (entry.shader == SHADER_NONE) {
+ break;
+ }
+
+ /* Setup shader-data from stack. it's mostly setup already in
+ * shader_setup_from_volume, this switching should be quick. */
+ sd->object = entry.object;
+ sd->lamp = LAMP_NONE;
+ sd->shader = entry.shader;
+
+ sd->flag &= ~SD_SHADER_FLAGS;
+ sd->flag |= kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).flags;
+ sd->object_flag &= ~SD_OBJECT_FLAGS;
+
+ if (sd->object != OBJECT_NONE) {
+ sd->object_flag |= kernel_data_fetch(object_flag, sd->object);
+
+# ifdef __OBJECT_MOTION__
+ /* todo: this is inefficient for motion blur, we should be
+ * caching matrices instead of recomputing them each step */
+ shader_setup_object_transforms(kg, sd, sd->time);
+
+ volume_shader_motion_blur(kg, sd);
+# endif
+ }
+
+ /* evaluate shader */
+# ifdef __OSL__
+ if (kg->osl) {
+ OSLShader::eval_volume(kg, state, sd, path_flag);
+ }
+ else
+# endif
+ {
+# ifdef __SVM__
+ svm_eval_nodes<KERNEL_FEATURE_NODE_MASK_VOLUME, SHADER_TYPE_VOLUME>(
+ kg, state, sd, NULL, path_flag);
+# endif
+ }
+
+ /* Merge closures to avoid exceeding number of closures limit. */
+ if (!shadow) {
+ if (i > 0) {
+ volume_shader_merge_closures(sd);
+ }
+ }
+ }
+}
+
+#endif /* __VOLUME__ */
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/integrator/volume_stack.h b/intern/cycles/kernel/integrator/volume_stack.h
index 97a0f0f386c..675e1927fc0 100644
--- a/intern/cycles/kernel/integrator/volume_stack.h
+++ b/intern/cycles/kernel/integrator/volume_stack.h
@@ -39,7 +39,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
break;
}
- if (entry.object == sd->object) {
+ if (entry.object == sd->object && entry.shader == sd->shader) {
/* Shift back next stack entries. */
do {
entry = stack_read(i + 1);
@@ -61,7 +61,7 @@ ccl_device void volume_stack_enter_exit(KernelGlobals kg,
}
/* Already in the stack? then we have nothing to do. */
- if (entry.object == sd->object) {
+ if (entry.object == sd->object && entry.shader == sd->shader) {
return;
}
}
diff --git a/intern/cycles/kernel/light/background.h b/intern/cycles/kernel/light/background.h
index 2a97d43c9ce..951620ff1cb 100644
--- a/intern/cycles/kernel/light/background.h
+++ b/intern/cycles/kernel/light/background.h
@@ -9,8 +9,6 @@ CCL_NAMESPACE_BEGIN
/* Background Light */
-#ifdef __BACKGROUND_MIS__
-
ccl_device float3 background_map_sample(KernelGlobals kg,
float randu,
float randv,
@@ -435,6 +433,4 @@ ccl_device float background_light_pdf(KernelGlobals kg, float3 P, float3 directi
return pdf * kernel_data.integrator.pdf_lights;
}
-#endif
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/light/light.h b/intern/cycles/kernel/light/light.h
index b939489bb18..12a6f21b58d 100644
--- a/intern/cycles/kernel/light/light.h
+++ b/intern/cycles/kernel/light/light.h
@@ -86,7 +86,6 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
ls->pdf = invarea / (costheta * costheta * costheta);
ls->eval_fac = ls->pdf;
}
-#ifdef __BACKGROUND_MIS__
else if (type == LIGHT_BACKGROUND) {
/* infinite area light (e.g. light dome or env light) */
float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
@@ -97,7 +96,6 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
ls->t = FLT_MAX;
ls->eval_fac = 1.0f;
}
-#endif
else {
ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
@@ -202,8 +200,12 @@ ccl_device_inline bool light_sample(KernelGlobals kg,
inplane = ls->P - inplane;
}
- ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
- ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
+ const float light_u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu));
+ const float light_v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv));
+
+ /* NOTE: Return barycentric coordinates in the same notation as Embree and OptiX. */
+ ls->u = light_v + 0.5f;
+ ls->v = -light_u - light_v;
ls->Ng = Ng;
ls->D = normalize_len(ls->P - P, &ls->t);
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
index 210bb1b35c2..e0d4f221bef 100644
--- a/intern/cycles/kernel/light/sample.h
+++ b/intern/cycles/kernel/light/sample.h
@@ -4,7 +4,7 @@
#pragma once
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
+#include "kernel/integrator/surface_shader.h"
#include "kernel/light/light.h"
@@ -14,7 +14,7 @@
CCL_NAMESPACE_BEGIN
/* Evaluate shader on light. */
-ccl_device_noinline_cpu float3
+ccl_device_noinline_cpu Spectrum
light_sample_shader_eval(KernelGlobals kg,
IntegratorState state,
ccl_private ShaderData *ccl_restrict emission_sd,
@@ -22,24 +22,21 @@ light_sample_shader_eval(KernelGlobals kg,
float time)
{
/* setup shading at emitter */
- float3 eval = zero_float3();
+ Spectrum eval = zero_spectrum();
- if (shader_constant_emission_eval(kg, ls->shader, &eval)) {
+ if (surface_shader_constant_emission(kg, ls->shader, &eval)) {
if ((ls->prim != PRIM_NONE) && dot(ls->Ng, ls->D) > 0.0f) {
ls->Ng = -ls->Ng;
}
}
else {
- /* Setup shader data and call shader_eval_surface once, better
+ /* Setup shader data and call surface_shader_eval once, better
* for GPU coherence and compile times. */
PROFILING_INIT_FOR_SHADER(kg, PROFILING_SHADE_LIGHT_SETUP);
-#ifdef __BACKGROUND_MIS__
if (ls->type == LIGHT_BACKGROUND) {
shader_setup_from_background(kg, emission_sd, ls->P, ls->D, time);
}
- else
-#endif
- {
+ else {
shader_setup_from_sample(kg,
emission_sd,
ls->P,
@@ -63,18 +60,15 @@ light_sample_shader_eval(KernelGlobals kg,
/* No proper path flag, we're evaluating this for all closures. that's
* weak but we'd have to do multiple evaluations otherwise. */
- shader_eval_surface<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
+ surface_shader_eval<KERNEL_FEATURE_NODE_MASK_SURFACE_LIGHT>(
kg, state, emission_sd, NULL, PATH_RAY_EMISSION);
/* Evaluate closures. */
-#ifdef __BACKGROUND_MIS__
if (ls->type == LIGHT_BACKGROUND) {
- eval = shader_background_eval(emission_sd);
+ eval = surface_shader_background(emission_sd);
}
- else
-#endif
- {
- eval = shader_emissive_eval(emission_sd);
+ else {
+ eval = surface_shader_emission(emission_sd);
}
}
@@ -82,7 +76,8 @@ light_sample_shader_eval(KernelGlobals kg,
if (ls->lamp != LAMP_NONE) {
ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp);
- eval *= make_float3(klight->strength[0], klight->strength[1], klight->strength[2]);
+ eval *= rgb_to_spectrum(
+ make_float3(klight->strength[0], klight->strength[1], klight->strength[2]));
}
return eval;
@@ -137,8 +132,9 @@ ccl_device_inline float3 shadow_ray_smooth_surface_offset(
triangle_vertices_and_normals(kg, sd->prim, V, N);
}
- const float u = sd->u, v = sd->v;
- const float w = 1 - u - v;
+ const float u = 1.0f - sd->u - sd->v;
+ const float v = sd->u;
+ const float w = sd->v;
float3 P = V[0] * u + V[1] * v + V[2] * w; /* Local space */
float3 n = N[0] * u + N[1] * v + N[2] * w; /* We get away without normalization */
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index 7570490be7c..5075e4e1528 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -10,21 +10,18 @@ set(INC_SYS
)
set(SRC
- background.cpp
- bsdf_diffuse_ramp.cpp
- bsdf_phong_ramp.cpp
- emissive.cpp
- bssrdf.cpp
closures.cpp
+ globals.cpp
services.cpp
- shader.cpp
)
set(HEADER_SRC
- closures.h
+ closures_setup.h
+ closures_template.h
globals.h
+ osl.h
services.h
- shader.h
+ types.h
)
set(LIB
diff --git a/intern/cycles/kernel/osl/background.cpp b/intern/cycles/kernel/osl/background.cpp
deleted file mode 100644
index 865ff4ddc6d..00000000000
--- a/intern/cycles/kernel/osl/background.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#include <OpenImageIO/fmath.h>
-
-#include <OSL/genclosure.h>
-
-#include "kernel/osl/closures.h"
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/emissive.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-/// Generic background closure
-///
-/// We only have a background closure for the shaders
-/// to return a color in background shaders. No methods,
-/// only the weight is taking into account
-///
-class GenericBackgroundClosure : public CClosurePrimitive {
- public:
- void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight)
- {
- background_setup(sd, weight);
- }
-};
-
-/// Holdout closure
-///
-/// This will be used by the shader to mark the
-/// amount of holdout for the current shading
-/// point. No parameters, only the weight will be
-/// used
-///
-class HoldoutClosure : CClosurePrimitive {
- public:
- void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight)
- {
- closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, weight);
- sd->flag |= SD_HOLDOUT;
- }
-};
-
-ClosureParam *closure_background_params()
-{
- static ClosureParam params[] = {
- CLOSURE_STRING_KEYPARAM(GenericBackgroundClosure, label, "label"),
- CLOSURE_FINISH_PARAM(GenericBackgroundClosure)};
- return params;
-}
-
-CCLOSURE_PREPARE(closure_background_prepare, GenericBackgroundClosure)
-
-ClosureParam *closure_holdout_params()
-{
- static ClosureParam params[] = {CLOSURE_FINISH_PARAM(HoldoutClosure)};
- return params;
-}
-
-CCLOSURE_PREPARE(closure_holdout_prepare, HoldoutClosure)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp b/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
deleted file mode 100644
index 39fcee1ac0d..00000000000
--- a/intern/cycles/kernel/osl/bsdf_diffuse_ramp.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#include <OpenImageIO/fmath.h>
-
-#include <OSL/genclosure.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/osl/closures.h"
-
-// clang-format off
-#include "kernel/types.h"
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_diffuse_ramp.h"
-#include "kernel/closure/bsdf_util.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-class DiffuseRampClosure : public CBSDFClosure {
- public:
- DiffuseRampBsdf params;
- Color3 colors[8];
-
- void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- DiffuseRampBsdf *bsdf = (DiffuseRampBsdf *)bsdf_alloc_osl(
- sd, sizeof(DiffuseRampBsdf), weight, &params);
-
- if (bsdf) {
- bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
-
- if (bsdf->colors) {
- for (int i = 0; i < 8; i++)
- bsdf->colors[i] = TO_FLOAT3(colors[i]);
-
- sd->flag |= bsdf_diffuse_ramp_setup(bsdf);
- }
- }
- }
-};
-
-ClosureParam *closure_bsdf_diffuse_ramp_params()
-{
- static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(DiffuseRampClosure, params.N),
- CLOSURE_COLOR_ARRAY_PARAM(DiffuseRampClosure, colors, 8),
- CLOSURE_STRING_KEYPARAM(DiffuseRampClosure, label, "label"),
- CLOSURE_FINISH_PARAM(DiffuseRampClosure)};
- return params;
-}
-
-CCLOSURE_PREPARE(closure_bsdf_diffuse_ramp_prepare, DiffuseRampClosure)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp b/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
deleted file mode 100644
index 972ed7e4a6d..00000000000
--- a/intern/cycles/kernel/osl/bsdf_phong_ramp.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#include <OpenImageIO/fmath.h>
-
-#include <OSL/genclosure.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/osl/closures.h"
-
-// clang-format off
-#include "kernel/types.h"
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_phong_ramp.h"
-#include "kernel/closure/bsdf_util.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-class PhongRampClosure : public CBSDFClosure {
- public:
- PhongRampBsdf params;
- Color3 colors[8];
-
- void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- PhongRampBsdf *bsdf = (PhongRampBsdf *)bsdf_alloc_osl(
- sd, sizeof(PhongRampBsdf), weight, &params);
-
- if (bsdf) {
- bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
-
- if (bsdf->colors) {
- for (int i = 0; i < 8; i++)
- bsdf->colors[i] = TO_FLOAT3(colors[i]);
-
- sd->flag |= bsdf_phong_ramp_setup(bsdf);
- }
- }
- }
-};
-
-ClosureParam *closure_bsdf_phong_ramp_params()
-{
- static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PhongRampClosure, params.N),
- CLOSURE_FLOAT_PARAM(PhongRampClosure, params.exponent),
- CLOSURE_COLOR_ARRAY_PARAM(PhongRampClosure, colors, 8),
- CLOSURE_STRING_KEYPARAM(PhongRampClosure, label, "label"),
- CLOSURE_FINISH_PARAM(PhongRampClosure)};
- return params;
-}
-
-CCLOSURE_PREPARE(closure_bsdf_phong_ramp_prepare, PhongRampClosure)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/bssrdf.cpp b/intern/cycles/kernel/osl/bssrdf.cpp
deleted file mode 100644
index 4b282fddad3..00000000000
--- a/intern/cycles/kernel/osl/bssrdf.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#include <OSL/genclosure.h>
-
-#include "kernel/device/cpu/compat.h"
-#include "kernel/osl/closures.h"
-
-// clang-format off
-#include "kernel/types.h"
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_util.h"
-#include "kernel/closure/bsdf_diffuse.h"
-#include "kernel/closure/bsdf_principled_diffuse.h"
-#include "kernel/closure/bssrdf.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-static ustring u_burley("burley");
-static ustring u_random_walk_fixed_radius("random_walk_fixed_radius");
-static ustring u_random_walk("random_walk");
-
-class CBSSRDFClosure : public CClosurePrimitive {
- public:
- Bssrdf params;
- float ior;
- ustring method;
-
- CBSSRDFClosure()
- {
- params.roughness = FLT_MAX;
- params.anisotropy = 1.0f;
- ior = 1.4f;
- }
-
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- if (method == u_burley) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID);
- }
- else if (method == u_random_walk_fixed_radius) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
- }
- else if (method == u_random_walk) {
- alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID);
- }
- }
-
- void alloc(ShaderData *sd, uint32_t path_flag, float3 weight, ClosureType type)
- {
- Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
-
- if (bssrdf) {
- /* disable in case of diffuse ancestor, can't see it well then and
- * adds considerably noise due to probabilities of continuing path
- * getting lower and lower */
- if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
- params.radius = make_float3(0.0f, 0.0f, 0.0f);
- }
-
- /* create one closure per color channel */
- bssrdf->radius = params.radius;
- bssrdf->albedo = params.albedo;
- bssrdf->N = params.N;
- bssrdf->roughness = params.roughness;
- bssrdf->anisotropy = clamp(params.anisotropy, 0.0f, 0.9f);
- sd->flag |= bssrdf_setup(sd, bssrdf, (ClosureType)type, clamp(ior, 1.01f, 3.8f));
- }
- }
-};
-
-ClosureParam *closure_bssrdf_params()
-{
- static ClosureParam params[] = {
- CLOSURE_STRING_PARAM(CBSSRDFClosure, method),
- CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.N),
- CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.radius),
- CLOSURE_FLOAT3_PARAM(CBSSRDFClosure, params.albedo),
- CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.roughness, "roughness"),
- CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, ior, "ior"),
- CLOSURE_FLOAT_KEYPARAM(CBSSRDFClosure, params.anisotropy, "anisotropy"),
- CLOSURE_STRING_KEYPARAM(CBSSRDFClosure, label, "label"),
- CLOSURE_FINISH_PARAM(CBSSRDFClosure)};
- return params;
-}
-
-CCLOSURE_PREPARE(closure_bssrdf_prepare, CBSSRDFClosure)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/closures.cpp b/intern/cycles/kernel/osl/closures.cpp
index e80b7aa1fd5..d56e0551a91 100644
--- a/intern/cycles/kernel/osl/closures.cpp
+++ b/intern/cycles/kernel/osl/closures.cpp
@@ -9,992 +9,304 @@
#include <OSL/genclosure.h>
#include <OSL/oslclosure.h>
-#include "kernel/osl/closures.h"
-#include "kernel/osl/shader.h"
+#include "kernel/types.h"
+
+#include "kernel/osl/globals.h"
+#include "kernel/osl/services.h"
#include "util/math.h"
#include "util/param.h"
-// clang-format off
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
-#include "kernel/types.h"
-
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/bsdf_util.h"
-#include "kernel/closure/bsdf_ashikhmin_velvet.h"
-#include "kernel/closure/bsdf_diffuse.h"
-#include "kernel/closure/bsdf_microfacet.h"
-#include "kernel/closure/bsdf_microfacet_beckmann.h"
-#include "kernel/closure/bsdf_microfacet_glass.h"
-#include "kernel/closure/bsdf_oren_nayar.h"
-#include "kernel/closure/bsdf_reflection.h"
-#include "kernel/closure/bsdf_refraction.h"
-#include "kernel/closure/bsdf_transparent.h"
-#include "kernel/closure/bsdf_ashikhmin_shirley.h"
-#include "kernel/closure/bsdf_toon.h"
-#include "kernel/closure/bsdf_hair.h"
-#include "kernel/closure/bsdf_hair_principled.h"
-#include "kernel/closure/bsdf_principled_diffuse.h"
-#include "kernel/closure/bsdf_principled_sheen.h"
-#include "kernel/closure/volume.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-/* BSDF class definitions */
-
-BSDF_CLOSURE_CLASS_BEGIN(Diffuse, diffuse, DiffuseBsdf, LABEL_DIFFUSE)
- BSDF_CLOSURE_FLOAT3_PARAM(DiffuseClosure, params.N)
-BSDF_CLOSURE_CLASS_END(Diffuse, diffuse)
-
-BSDF_CLOSURE_CLASS_BEGIN(Translucent, translucent, DiffuseBsdf, LABEL_DIFFUSE)
- BSDF_CLOSURE_FLOAT3_PARAM(TranslucentClosure, params.N)
-BSDF_CLOSURE_CLASS_END(Translucent, translucent)
-
-BSDF_CLOSURE_CLASS_BEGIN(OrenNayar, oren_nayar, OrenNayarBsdf, LABEL_DIFFUSE)
- BSDF_CLOSURE_FLOAT3_PARAM(OrenNayarClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(OrenNayarClosure, params.roughness)
-BSDF_CLOSURE_CLASS_END(OrenNayar, oren_nayar)
-
-BSDF_CLOSURE_CLASS_BEGIN(Reflection, reflection, MicrofacetBsdf, LABEL_SINGULAR)
- BSDF_CLOSURE_FLOAT3_PARAM(ReflectionClosure, params.N)
-BSDF_CLOSURE_CLASS_END(Reflection, reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(Refraction, refraction, MicrofacetBsdf, LABEL_SINGULAR)
- BSDF_CLOSURE_FLOAT3_PARAM(RefractionClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(RefractionClosure, params.ior)
-BSDF_CLOSURE_CLASS_END(Refraction, refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminVelvet, ashikhmin_velvet, VelvetBsdf, LABEL_DIFFUSE)
- BSDF_CLOSURE_FLOAT3_PARAM(AshikhminVelvetClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(AshikhminVelvetClosure, params.sigma)
-BSDF_CLOSURE_CLASS_END(AshikhminVelvet, ashikhmin_velvet)
-
-BSDF_CLOSURE_CLASS_BEGIN(AshikhminShirley,
- ashikhmin_shirley,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_REFLECT)
- BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.N)
- BSDF_CLOSURE_FLOAT3_PARAM(AshikhminShirleyClosure, params.T)
- BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_x)
- BSDF_CLOSURE_FLOAT_PARAM(AshikhminShirleyClosure, params.alpha_y)
-BSDF_CLOSURE_CLASS_END(AshikhminShirley, ashikhmin_shirley)
-
-BSDF_CLOSURE_CLASS_BEGIN(DiffuseToon, diffuse_toon, ToonBsdf, LABEL_DIFFUSE)
- BSDF_CLOSURE_FLOAT3_PARAM(DiffuseToonClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.size)
- BSDF_CLOSURE_FLOAT_PARAM(DiffuseToonClosure, params.smooth)
-BSDF_CLOSURE_CLASS_END(DiffuseToon, diffuse_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(GlossyToon, glossy_toon, ToonBsdf, LABEL_GLOSSY)
- BSDF_CLOSURE_FLOAT3_PARAM(GlossyToonClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.size)
- BSDF_CLOSURE_FLOAT_PARAM(GlossyToonClosure, params.smooth)
-BSDF_CLOSURE_CLASS_END(GlossyToon, glossy_toon)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXIsotropic,
- microfacet_ggx_isotropic,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_REFLECT)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXIsotropicClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXIsotropicClosure, params.alpha_x)
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXIsotropic, microfacet_ggx_isotropic)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGX,
- microfacet_ggx,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_REFLECT)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.N)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXClosure, params.T)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_x)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXClosure, params.alpha_y)
-BSDF_CLOSURE_CLASS_END(MicrofacetGGX, microfacet_ggx)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannIsotropic,
- microfacet_beckmann_isotropic,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_REFLECT)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannIsotropicClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannIsotropicClosure, params.alpha_x)
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannIsotropic, microfacet_beckmann_isotropic)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmann,
- microfacet_beckmann,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_REFLECT)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.N)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannClosure, params.T)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_x)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannClosure, params.alpha_y)
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmann, microfacet_beckmann)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetGGXRefraction,
- microfacet_ggx_refraction,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_TRANSMIT)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetGGXRefractionClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.alpha_x)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetGGXRefractionClosure, params.ior)
-BSDF_CLOSURE_CLASS_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(MicrofacetBeckmannRefraction,
- microfacet_beckmann_refraction,
- MicrofacetBsdf,
- LABEL_GLOSSY | LABEL_TRANSMIT)
- BSDF_CLOSURE_FLOAT3_PARAM(MicrofacetBeckmannRefractionClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.alpha_x)
- BSDF_CLOSURE_FLOAT_PARAM(MicrofacetBeckmannRefractionClosure, params.ior)
-BSDF_CLOSURE_CLASS_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairReflection, hair_reflection, HairBsdf, LABEL_GLOSSY)
- BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness1)
- BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.roughness2)
- BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T)
- BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset)
-BSDF_CLOSURE_CLASS_END(HairReflection, hair_reflection)
-
-BSDF_CLOSURE_CLASS_BEGIN(HairTransmission, hair_transmission, HairBsdf, LABEL_GLOSSY)
- BSDF_CLOSURE_FLOAT3_PARAM(HairTransmissionClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness1)
- BSDF_CLOSURE_FLOAT_PARAM(HairTransmissionClosure, params.roughness2)
- BSDF_CLOSURE_FLOAT3_PARAM(HairReflectionClosure, params.T)
- BSDF_CLOSURE_FLOAT_PARAM(HairReflectionClosure, params.offset)
-BSDF_CLOSURE_CLASS_END(HairTransmission, hair_transmission)
-
-BSDF_CLOSURE_CLASS_BEGIN(PrincipledDiffuse,
- principled_diffuse,
- PrincipledDiffuseBsdf,
- LABEL_DIFFUSE)
- BSDF_CLOSURE_FLOAT3_PARAM(PrincipledDiffuseClosure, params.N)
- BSDF_CLOSURE_FLOAT_PARAM(PrincipledDiffuseClosure, params.roughness)
-BSDF_CLOSURE_CLASS_END(PrincipledDiffuse, principled_diffuse)
-
-class PrincipledSheenClosure : public CBSDFClosure {
- public:
- PrincipledSheenBsdf params;
-
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- if (!skip(sd, path_flag, LABEL_DIFFUSE)) {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- PrincipledSheenBsdf *bsdf = (PrincipledSheenBsdf *)bsdf_alloc_osl(
- sd, sizeof(PrincipledSheenBsdf), weight, &params);
- sd->flag |= (bsdf) ? bsdf_principled_sheen_setup(sd, bsdf) : 0;
- }
- }
-};
+#include "kernel/geom/object.h"
+#include "kernel/util/differential.h"
-static ClosureParam *bsdf_principled_sheen_params()
-{
- static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledSheenClosure, params.N),
- CLOSURE_STRING_KEYPARAM(PrincipledSheenClosure, label, "label"),
- CLOSURE_FINISH_PARAM(PrincipledSheenClosure)};
- return params;
-}
+#include "kernel/osl/osl.h"
-CCLOSURE_PREPARE_STATIC(closure_bsdf_principled_sheen_prepare, PrincipledSheenClosure)
+#include "kernel/osl/closures_setup.h"
-/* PRINCIPLED HAIR BSDF */
-class PrincipledHairClosure : public CBSDFClosure {
- public:
- PrincipledHairBSDF params;
+#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
+#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2])
- PrincipledHairBSDF *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)bsdf_alloc_osl(
- sd, sizeof(PrincipledHairBSDF), weight, &params);
- if (!bsdf) {
- return NULL;
- }
+CCL_NAMESPACE_BEGIN
- PrincipledHairExtra *extra = (PrincipledHairExtra *)closure_alloc_extra(
- sd, sizeof(PrincipledHairExtra));
- if (!extra) {
- return NULL;
- }
+/* Registration */
- bsdf->extra = extra;
- return bsdf;
+#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
+ static OSL::ClosureParam *osl_closure_##lower##_params() \
+ { \
+ static OSL::ClosureParam params[] = {
+#define OSL_CLOSURE_STRUCT_END(Upper, lower) \
+ CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \
+ } \
+ ; \
+ return params; \
}
+#define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) \
+ CLOSURE_##TYPE##_KEYPARAM(Upper##Closure, name, key),
+#define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) \
+ CLOSURE_##TYPE##_ARRAY_PARAM(Upper##Closure, name, size),
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- if (!skip(sd, path_flag, LABEL_GLOSSY)) {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- PrincipledHairBSDF *bsdf = (PrincipledHairBSDF *)alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
+#include "closures_template.h"
- sd->flag |= (bsdf) ? bsdf_principled_hair_setup(sd, bsdf) : 0;
- }
- }
-};
-
-static ClosureParam *closure_bsdf_principled_hair_params()
+void OSLRenderServices::register_closures(OSL::ShadingSystem *ss)
{
- static ClosureParam params[] = {CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.N),
- CLOSURE_FLOAT3_PARAM(PrincipledHairClosure, params.sigma),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.v),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.s),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.m0_roughness),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.alpha),
- CLOSURE_FLOAT_PARAM(PrincipledHairClosure, params.eta),
- CLOSURE_STRING_KEYPARAM(PrincipledHairClosure, label, "label"),
- CLOSURE_FINISH_PARAM(PrincipledHairClosure)};
-
- return params;
-}
-
-CCLOSURE_PREPARE(closure_bsdf_principled_hair_prepare, PrincipledHairClosure)
+#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
+ ss->register_closure( \
+ #lower, OSL_CLOSURE_##Upper##_ID, osl_closure_##lower##_params(), nullptr, nullptr);
-/* DISNEY PRINCIPLED CLEARCOAT */
-class PrincipledClearcoatClosure : public CBSDFClosure {
- public:
- MicrofacetBsdf params;
- float clearcoat, clearcoat_roughness;
-
- MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
- sd, sizeof(MicrofacetBsdf), 0.25f * clearcoat * weight, &params);
- if (!bsdf) {
- return NULL;
- }
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->extra = NULL;
- bsdf->ior = 1.5f;
- bsdf->alpha_x = clearcoat_roughness;
- bsdf->alpha_y = clearcoat_roughness;
- return bsdf;
- }
-
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
-
- sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
- }
-};
-
-ClosureParam *closure_bsdf_principled_clearcoat_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(PrincipledClearcoatClosure, params.N),
- CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat),
- CLOSURE_FLOAT_PARAM(PrincipledClearcoatClosure, clearcoat_roughness),
- CLOSURE_STRING_KEYPARAM(PrincipledClearcoatClosure, label, "label"),
- CLOSURE_FINISH_PARAM(PrincipledClearcoatClosure)};
- return params;
+#include "closures_template.h"
}
-CCLOSURE_PREPARE(closure_bsdf_principled_clearcoat_prepare, PrincipledClearcoatClosure)
-/* Registration */
-
-static void register_closure(OSL::ShadingSystem *ss,
- const char *name,
- int id,
- OSL::ClosureParam *params,
- OSL::PrepareClosureFunc prepare)
-{
- /* optimization: it's possible to not use a prepare function at all and
- * only initialize the actual class when accessing the closure component
- * data, but then we need to map the id to the class somehow */
-#if OSL_LIBRARY_VERSION_CODE >= 10900
- ss->register_closure(name, id, params, prepare, NULL);
-#else
- ss->register_closure(name, id, params, prepare, NULL, 16);
-#endif
-}
+/* Globals */
-void OSLShader::register_closures(OSLShadingSystem *ss_)
+static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
+ ShaderData *sd,
+ const void *state,
+ uint32_t path_flag,
+ OSLThreadData *tdata)
{
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)ss_;
- int id = 0;
-
- register_closure(ss, "diffuse", id++, bsdf_diffuse_params(), bsdf_diffuse_prepare);
- register_closure(ss, "oren_nayar", id++, bsdf_oren_nayar_params(), bsdf_oren_nayar_prepare);
- register_closure(ss, "translucent", id++, bsdf_translucent_params(), bsdf_translucent_prepare);
- register_closure(ss, "reflection", id++, bsdf_reflection_params(), bsdf_reflection_prepare);
- register_closure(ss, "refraction", id++, bsdf_refraction_params(), bsdf_refraction_prepare);
- register_closure(ss,
- "transparent",
- id++,
- closure_bsdf_transparent_params(),
- closure_bsdf_transparent_prepare);
-
- register_closure(
- ss, "microfacet", id++, closure_bsdf_microfacet_params(), closure_bsdf_microfacet_prepare);
- register_closure(ss,
- "microfacet_ggx",
- id++,
- bsdf_microfacet_ggx_isotropic_params(),
- bsdf_microfacet_ggx_isotropic_prepare);
- register_closure(
- ss, "microfacet_ggx_aniso", id++, bsdf_microfacet_ggx_params(), bsdf_microfacet_ggx_prepare);
- register_closure(ss,
- "microfacet_ggx_refraction",
- id++,
- bsdf_microfacet_ggx_refraction_params(),
- bsdf_microfacet_ggx_refraction_prepare);
- register_closure(ss,
- "microfacet_multi_ggx",
- id++,
- closure_bsdf_microfacet_multi_ggx_params(),
- closure_bsdf_microfacet_multi_ggx_prepare);
- register_closure(ss,
- "microfacet_multi_ggx_glass",
- id++,
- closure_bsdf_microfacet_multi_ggx_glass_params(),
- closure_bsdf_microfacet_multi_ggx_glass_prepare);
- register_closure(ss,
- "microfacet_multi_ggx_aniso",
- id++,
- closure_bsdf_microfacet_multi_ggx_aniso_params(),
- closure_bsdf_microfacet_multi_ggx_aniso_prepare);
- register_closure(ss,
- "microfacet_ggx_fresnel",
- id++,
- closure_bsdf_microfacet_ggx_fresnel_params(),
- closure_bsdf_microfacet_ggx_fresnel_prepare);
- register_closure(ss,
- "microfacet_ggx_aniso_fresnel",
- id++,
- closure_bsdf_microfacet_ggx_aniso_fresnel_params(),
- closure_bsdf_microfacet_ggx_aniso_fresnel_prepare);
- register_closure(ss,
- "microfacet_multi_ggx_fresnel",
- id++,
- closure_bsdf_microfacet_multi_ggx_fresnel_params(),
- closure_bsdf_microfacet_multi_ggx_fresnel_prepare);
- register_closure(ss,
- "microfacet_multi_ggx_glass_fresnel",
- id++,
- closure_bsdf_microfacet_multi_ggx_glass_fresnel_params(),
- closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare);
- register_closure(ss,
- "microfacet_multi_ggx_aniso_fresnel",
- id++,
- closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params(),
- closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare);
- register_closure(ss,
- "microfacet_beckmann",
- id++,
- bsdf_microfacet_beckmann_isotropic_params(),
- bsdf_microfacet_beckmann_isotropic_prepare);
- register_closure(ss,
- "microfacet_beckmann_aniso",
- id++,
- bsdf_microfacet_beckmann_params(),
- bsdf_microfacet_beckmann_prepare);
- register_closure(ss,
- "microfacet_beckmann_refraction",
- id++,
- bsdf_microfacet_beckmann_refraction_params(),
- bsdf_microfacet_beckmann_refraction_prepare);
- register_closure(ss,
- "ashikhmin_shirley",
- id++,
- bsdf_ashikhmin_shirley_params(),
- bsdf_ashikhmin_shirley_prepare);
- register_closure(
- ss, "ashikhmin_velvet", id++, bsdf_ashikhmin_velvet_params(), bsdf_ashikhmin_velvet_prepare);
- register_closure(
- ss, "diffuse_toon", id++, bsdf_diffuse_toon_params(), bsdf_diffuse_toon_prepare);
- register_closure(ss, "glossy_toon", id++, bsdf_glossy_toon_params(), bsdf_glossy_toon_prepare);
- register_closure(ss,
- "principled_diffuse",
- id++,
- bsdf_principled_diffuse_params(),
- bsdf_principled_diffuse_prepare);
- register_closure(ss,
- "principled_sheen",
- id++,
- bsdf_principled_sheen_params(),
- closure_bsdf_principled_sheen_prepare);
- register_closure(ss,
- "principled_clearcoat",
- id++,
- closure_bsdf_principled_clearcoat_params(),
- closure_bsdf_principled_clearcoat_prepare);
-
- register_closure(ss, "emission", id++, closure_emission_params(), closure_emission_prepare);
- register_closure(
- ss, "background", id++, closure_background_params(), closure_background_prepare);
- register_closure(ss, "holdout", id++, closure_holdout_params(), closure_holdout_prepare);
- register_closure(ss,
- "diffuse_ramp",
- id++,
- closure_bsdf_diffuse_ramp_params(),
- closure_bsdf_diffuse_ramp_prepare);
- register_closure(
- ss, "phong_ramp", id++, closure_bsdf_phong_ramp_params(), closure_bsdf_phong_ramp_prepare);
- register_closure(ss, "bssrdf", id++, closure_bssrdf_params(), closure_bssrdf_prepare);
-
- register_closure(
- ss, "hair_reflection", id++, bsdf_hair_reflection_params(), bsdf_hair_reflection_prepare);
- register_closure(ss,
- "hair_transmission",
- id++,
- bsdf_hair_transmission_params(),
- bsdf_hair_transmission_prepare);
-
- register_closure(ss,
- "principled_hair",
- id++,
- closure_bsdf_principled_hair_params(),
- closure_bsdf_principled_hair_prepare);
-
- register_closure(ss,
- "henyey_greenstein",
- id++,
- closure_henyey_greenstein_params(),
- closure_henyey_greenstein_prepare);
- register_closure(
- ss, "absorption", id++, closure_absorption_params(), closure_absorption_prepare);
-}
-
-/* BSDF Closure */
-
-bool CBSDFClosure::skip(const ShaderData *sd, uint32_t path_flag, int scattering)
-{
- /* caustic options */
- if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
- const KernelGlobalsCPU *kg = sd->osl_globals;
-
- if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
- (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
- return true;
- }
+ OSL::ShaderGlobals *globals = &tdata->globals;
+
+ const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
+ const differential3 dI = differential_from_compact(sd->I, sd->dI);
+
+ /* copy from shader data to shader globals */
+ globals->P = TO_VEC3(sd->P);
+ globals->dPdx = TO_VEC3(dP.dx);
+ globals->dPdy = TO_VEC3(dP.dy);
+ globals->I = TO_VEC3(sd->I);
+ globals->dIdx = TO_VEC3(dI.dx);
+ globals->dIdy = TO_VEC3(dI.dy);
+ globals->N = TO_VEC3(sd->N);
+ globals->Ng = TO_VEC3(sd->Ng);
+ globals->u = sd->u;
+ globals->dudx = sd->du.dx;
+ globals->dudy = sd->du.dy;
+ globals->v = sd->v;
+ globals->dvdx = sd->dv.dx;
+ globals->dvdy = sd->dv.dy;
+ globals->dPdu = TO_VEC3(sd->dPdu);
+ globals->dPdv = TO_VEC3(sd->dPdv);
+ globals->surfacearea = 1.0f;
+ globals->time = sd->time;
+
+ /* booleans */
+ globals->raytype = path_flag;
+ globals->flipHandedness = 0;
+ globals->backfacing = (sd->flag & SD_BACKFACING);
+
+ /* shader data to be used in services callbacks */
+ globals->renderstate = sd;
+
+ /* hacky, we leave it to services to fetch actual object matrix */
+ globals->shader2common = sd;
+ globals->object2common = sd;
+
+ /* must be set to NULL before execute */
+ globals->Ci = NULL;
+
+ /* clear trace data */
+ tdata->tracedata.init = false;
+
+ /* Used by render-services. */
+ sd->osl_globals = kg;
+ if (path_flag & PATH_RAY_SHADOW) {
+ sd->osl_path_state = nullptr;
+ sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state;
}
-
- return false;
-}
-
-/* Standard Microfacet Closure */
-
-class MicrofacetClosure : public CBSDFClosure {
- public:
- MicrofacetBsdf params;
- ustring distribution;
- int refract;
-
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- static ustring u_ggx("ggx");
- static ustring u_default("default");
-
- const int label = (refract) ? LABEL_TRANSMIT : LABEL_REFLECT;
- if (skip(sd, path_flag, LABEL_GLOSSY | label)) {
- return;
- }
-
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
- sd, sizeof(MicrofacetBsdf), weight, &params);
-
- if (!bsdf) {
- return;
- }
-
- /* GGX */
- if (distribution == u_ggx || distribution == u_default) {
- if (!refract) {
- if (params.alpha_x == params.alpha_y) {
- /* Isotropic */
- sd->flag |= bsdf_microfacet_ggx_isotropic_setup(bsdf);
- }
- else {
- /* Anisotropic */
- sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
- }
- }
- else {
- sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
- }
- }
- /* Beckmann */
- else {
- if (!refract) {
- if (params.alpha_x == params.alpha_y) {
- /* Isotropic */
- sd->flag |= bsdf_microfacet_beckmann_isotropic_setup(bsdf);
- }
- else {
- /* Anisotropic */
- sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
- }
- }
- else {
- sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
- }
- }
+ else {
+ sd->osl_path_state = (const IntegratorStateCPU *)state;
+ sd->osl_shadow_path_state = nullptr;
}
-};
-
-ClosureParam *closure_bsdf_microfacet_params()
-{
- static ClosureParam params[] = {CLOSURE_STRING_PARAM(MicrofacetClosure, distribution),
- CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.alpha_y),
- CLOSURE_FLOAT_PARAM(MicrofacetClosure, params.ior),
- CLOSURE_INT_PARAM(MicrofacetClosure, refract),
- CLOSURE_STRING_KEYPARAM(MicrofacetClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetClosure)};
-
- return params;
}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_prepare, MicrofacetClosure)
-
-/* GGX closures with Fresnel */
-
-class MicrofacetFresnelClosure : public CBSDFClosure {
- public:
- MicrofacetBsdf params;
- float3 color;
- float3 cspec0;
-
- MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- /* Technically, the MultiGGX Glass closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
- if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
- return NULL;
- }
- MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
- sd, sizeof(MicrofacetBsdf), weight, &params);
- if (!bsdf) {
- return NULL;
+static void flatten_closure_tree(const KernelGlobalsCPU *kg,
+ ShaderData *sd,
+ uint32_t path_flag,
+ const OSL::ClosureColor *closure,
+ float3 weight = make_float3(1.0f, 1.0f, 1.0f))
+{
+ /* OSL gives us a closure tree, we flatten it into arrays per
+ * closure type, for evaluation, sampling, etc later on. */
+
+ switch (closure->id) {
+ case OSL::ClosureColor::MUL: {
+ OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
+ flatten_closure_tree(kg, sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
+ break;
}
-
- MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if (!extra) {
- return NULL;
+ case OSL::ClosureColor::ADD: {
+ OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
+ flatten_closure_tree(kg, sd, path_flag, add->closureA, weight);
+ flatten_closure_tree(kg, sd, path_flag, add->closureB, weight);
+ break;
}
-
- bsdf->extra = extra;
- bsdf->extra->color = color;
- bsdf->extra->cspec0 = cspec0;
- return bsdf;
+#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
+ case OSL_CLOSURE_##Upper##_ID: { \
+ const OSL::ClosureComponent *comp = reinterpret_cast<const OSL::ClosureComponent *>(closure); \
+ weight *= TO_FLOAT3(comp->w); \
+ osl_closure_##lower##_setup( \
+ kg, sd, path_flag, weight, reinterpret_cast<const Upper##Closure *>(comp + 1)); \
+ break; \
}
-};
-
-class MicrofacetGGXFresnelClosure : public MicrofacetFresnelClosure {
- public:
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+#include "closures_template.h"
+ default:
+ break;
}
-};
-
-ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
- return params;
}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_fresnel_prepare, MicrofacetGGXFresnelClosure);
-class MicrofacetGGXAnisoFresnelClosure : public MicrofacetFresnelClosure {
- public:
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
+/* Surface */
- sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
- }
-};
-
-ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params()
+void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
+ const void *state,
+ ShaderData *sd,
+ uint32_t path_flag)
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.alpha_y),
- CLOSURE_FLOAT_PARAM(MicrofacetGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetGGXFresnelClosure)};
- return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_ggx_aniso_fresnel_prepare,
- MicrofacetGGXAnisoFresnelClosure);
-
-/* Multiscattering GGX closures */
-
-class MicrofacetMultiClosure : public CBSDFClosure {
- public:
- MicrofacetBsdf params;
- float3 color;
-
- MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- /* Technically, the MultiGGX closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
- if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
- return NULL;
- }
-
- MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
- sd, sizeof(MicrofacetBsdf), weight, &params);
- if (!bsdf) {
- return NULL;
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+ /* execute shader for this point */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
+ int shader = sd->shader & SHADER_MASK;
+
+ /* automatic bump shader */
+ if (kg->osl->bump_state[shader]) {
+ /* save state */
+ const float3 P = sd->P;
+ const float dP = sd->dP;
+ const OSL::Vec3 dPdx = globals->dPdx;
+ const OSL::Vec3 dPdy = globals->dPdy;
+
+ /* set state as if undisplaced */
+ if (sd->flag & SD_HAS_DISPLACEMENT) {
+ float data[9];
+ bool found = kg->osl->services->get_attribute(sd,
+ true,
+ OSLRenderServices::u_empty,
+ TypeDesc::TypeVector,
+ OSLRenderServices::u_geom_undisplaced,
+ data);
+ (void)found;
+ assert(found);
+
+ differential3 tmp_dP;
+ memcpy(&sd->P, data, sizeof(float) * 3);
+ memcpy(&tmp_dP.dx, data + 3, sizeof(float) * 3);
+ memcpy(&tmp_dP.dy, data + 6, sizeof(float) * 3);
+
+ object_position_transform(kg, sd, &sd->P);
+ object_dir_transform(kg, sd, &tmp_dP.dx);
+ object_dir_transform(kg, sd, &tmp_dP.dy);
+
+ sd->dP = differential_make_compact(tmp_dP);
+
+ globals->P = TO_VEC3(sd->P);
+ globals->dPdx = TO_VEC3(tmp_dP.dx);
+ globals->dPdy = TO_VEC3(tmp_dP.dy);
}
- bsdf->extra = NULL;
- return bsdf;
- }
-};
-
-class MicrofacetMultiGGXClosure : public MicrofacetMultiClosure {
- public:
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
+ /* execute bump shader */
+ ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
- const KernelGlobalsCPU *kg = sd->osl_globals;
+ /* reset state */
+ sd->P = P;
+ sd->dP = dP;
- bsdf->ior = 0.0f;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_setup(kg, bsdf, sd, color);
+ globals->P = TO_VEC3(P);
+ globals->dPdx = TO_VEC3(dPdx);
+ globals->dPdy = TO_VEC3(dPdy);
}
-};
-ClosureParam *closure_bsdf_microfacet_multi_ggx_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
- return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_prepare, MicrofacetMultiGGXClosure);
-
-class MicrofacetMultiGGXAnisoClosure : public MicrofacetMultiClosure {
- public:
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
-
- const KernelGlobalsCPU *kg = sd->osl_globals;
-
- bsdf->ior = 0.0f;
- sd->flag |= bsdf_microfacet_multi_ggx_setup(kg, bsdf, sd, color);
+ /* surface shader */
+ if (kg->osl->surface_state[shader]) {
+ ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
}
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_y),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
- return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_prepare, MicrofacetMultiGGXAnisoClosure);
-class MicrofacetMultiGGXGlassClosure : public MicrofacetMultiClosure {
- public:
- MicrofacetMultiGGXGlassClosure() : MicrofacetMultiClosure()
- {
+ /* flatten closure tree */
+ if (globals->Ci) {
+ flatten_closure_tree(kg, sd, path_flag, globals->Ci);
}
+}
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
-
- const KernelGlobalsCPU *kg = sd->osl_globals;
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(kg, bsdf, sd, color);
- }
-};
+/* Background */
-ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params()
+void OSLShader::eval_background(const KernelGlobalsCPU *kg,
+ const void *state,
+ ShaderData *sd,
+ uint32_t path_flag)
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXClosure, color),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXClosure)};
- return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_prepare, MicrofacetMultiGGXGlassClosure);
-
-/* Multiscattering GGX closures with Fresnel */
-
-class MicrofacetMultiFresnelClosure : public CBSDFClosure {
- public:
- MicrofacetBsdf params;
- float3 color;
- float3 cspec0;
-
- MicrofacetBsdf *alloc(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- /* Technically, the MultiGGX closure may also transmit. However,
- * since this is set statically and only used for caustic flags, this
- * is probably as good as it gets. */
- if (skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
- return NULL;
- }
-
- MicrofacetBsdf *bsdf = (MicrofacetBsdf *)bsdf_alloc_osl(
- sd, sizeof(MicrofacetBsdf), weight, &params);
- if (!bsdf) {
- return NULL;
- }
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
- MicrofacetExtra *extra = (MicrofacetExtra *)closure_alloc_extra(sd, sizeof(MicrofacetExtra));
- if (!extra) {
- return NULL;
- }
+ /* execute shader for this point */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
- bsdf->extra = extra;
- bsdf->extra->color = color;
- bsdf->extra->cspec0 = cspec0;
- return bsdf;
+ if (kg->osl->background_state) {
+ ss->execute(octx, *(kg->osl->background_state), *globals);
}
-};
-
-class MicrofacetMultiGGXFresnelClosure : public MicrofacetMultiFresnelClosure {
- public:
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
-
- const KernelGlobalsCPU *kg = sd->osl_globals;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(kg, bsdf, sd);
+ /* return background color immediately */
+ if (globals->Ci) {
+ flatten_closure_tree(kg, sd, path_flag, globals->Ci);
}
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
- return params;
}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_fresnel_prepare,
- MicrofacetMultiGGXFresnelClosure);
-
-class MicrofacetMultiGGXAnisoFresnelClosure : public MicrofacetMultiFresnelClosure {
- public:
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
- const KernelGlobalsCPU *kg = sd->osl_globals;
-
- sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(kg, bsdf, sd);
- }
-};
+/* Volume */
-ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params()
+void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
+ const void *state,
+ ShaderData *sd,
+ uint32_t path_flag)
{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.T),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_y),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
- return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare,
- MicrofacetMultiGGXAnisoFresnelClosure);
-
-class MicrofacetMultiGGXGlassFresnelClosure : public MicrofacetMultiFresnelClosure {
- public:
- MicrofacetMultiGGXGlassFresnelClosure() : MicrofacetMultiFresnelClosure()
- {
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
+
+ /* execute shader */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
+ int shader = sd->shader & SHADER_MASK;
+
+ if (kg->osl->volume_state[shader]) {
+ ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
}
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N);
-
- MicrofacetBsdf *bsdf = alloc(sd, path_flag, weight);
- if (!bsdf) {
- return;
- }
-
- const KernelGlobalsCPU *kg = sd->osl_globals;
-
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
- bsdf->alpha_y = bsdf->alpha_x;
- sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(kg, bsdf, sd);
- }
-};
-
-ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, params.N),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.alpha_x),
- CLOSURE_FLOAT_PARAM(MicrofacetMultiGGXFresnelClosure, params.ior),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, color),
- CLOSURE_FLOAT3_PARAM(MicrofacetMultiGGXFresnelClosure, cspec0),
- CLOSURE_STRING_KEYPARAM(MicrofacetMultiGGXFresnelClosure, label, "label"),
- CLOSURE_FINISH_PARAM(MicrofacetMultiGGXFresnelClosure)};
- return params;
-}
-CCLOSURE_PREPARE(closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare,
- MicrofacetMultiGGXGlassFresnelClosure);
-
-/* Transparent */
-
-class TransparentClosure : public CBSDFClosure {
- public:
- ShaderClosure params;
- float3 unused;
-
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- bsdf_transparent_setup(sd, weight, path_flag);
+ /* flatten closure tree */
+ if (globals->Ci) {
+ flatten_closure_tree(kg, sd, path_flag, globals->Ci);
}
-};
-
-ClosureParam *closure_bsdf_transparent_params()
-{
- static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(TransparentClosure, label, "label"),
- CLOSURE_FINISH_PARAM(TransparentClosure)};
- return params;
}
-CCLOSURE_PREPARE(closure_bsdf_transparent_prepare, TransparentClosure)
+/* Displacement */
-/* Volume */
-
-class VolumeAbsorptionClosure : public CBSDFClosure {
- public:
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- volume_extinction_setup(sd, weight);
- }
-};
-
-ClosureParam *closure_absorption_params()
+void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd)
{
- static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(VolumeAbsorptionClosure, label, "label"),
- CLOSURE_FINISH_PARAM(VolumeAbsorptionClosure)};
- return params;
-}
-
-CCLOSURE_PREPARE(closure_absorption_prepare, VolumeAbsorptionClosure)
-
-class VolumeHenyeyGreensteinClosure : public CBSDFClosure {
- public:
- HenyeyGreensteinVolume params;
-
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight)
- {
- volume_extinction_setup(sd, weight);
-
- HenyeyGreensteinVolume *volume = (HenyeyGreensteinVolume *)bsdf_alloc_osl(
- sd, sizeof(HenyeyGreensteinVolume), weight, &params);
- if (!volume) {
- return;
- }
-
- sd->flag |= volume_henyey_greenstein_setup(volume);
+ /* setup shader globals from shader data */
+ OSLThreadData *tdata = kg->osl_tdata;
+ shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
+
+ /* execute shader */
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSL::ShaderGlobals *globals = &tdata->globals;
+ OSL::ShadingContext *octx = tdata->context;
+ int shader = sd->shader & SHADER_MASK;
+
+ if (kg->osl->displacement_state[shader]) {
+ ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
}
-};
-ClosureParam *closure_henyey_greenstein_params()
-{
- static ClosureParam params[] = {
- CLOSURE_FLOAT_PARAM(VolumeHenyeyGreensteinClosure, params.g),
- CLOSURE_STRING_KEYPARAM(VolumeHenyeyGreensteinClosure, label, "label"),
- CLOSURE_FINISH_PARAM(VolumeHenyeyGreensteinClosure)};
- return params;
+ /* get back position */
+ sd->P = TO_FLOAT3(globals->P);
}
-CCLOSURE_PREPARE(closure_henyey_greenstein_prepare, VolumeHenyeyGreensteinClosure)
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/closures.h b/intern/cycles/kernel/osl/closures.h
deleted file mode 100644
index e10a3d88a04..00000000000
--- a/intern/cycles/kernel/osl/closures.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#ifndef __OSL_CLOSURES_H__
-#define __OSL_CLOSURES_H__
-
-#include "kernel/types.h"
-#include "util/types.h"
-
-#include <OSL/genclosure.h>
-#include <OSL/oslclosure.h>
-#include <OSL/oslexec.h>
-
-CCL_NAMESPACE_BEGIN
-
-OSL::ClosureParam *closure_emission_params();
-OSL::ClosureParam *closure_background_params();
-OSL::ClosureParam *closure_holdout_params();
-OSL::ClosureParam *closure_bsdf_diffuse_ramp_params();
-OSL::ClosureParam *closure_bsdf_phong_ramp_params();
-OSL::ClosureParam *closure_bsdf_transparent_params();
-OSL::ClosureParam *closure_bssrdf_params();
-OSL::ClosureParam *closure_absorption_params();
-OSL::ClosureParam *closure_henyey_greenstein_params();
-OSL::ClosureParam *closure_bsdf_microfacet_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_params();
-OSL::ClosureParam *closure_bsdf_microfacet_ggx_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_ggx_aniso_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_glass_fresnel_params();
-OSL::ClosureParam *closure_bsdf_microfacet_multi_ggx_aniso_fresnel_params();
-OSL::ClosureParam *closure_bsdf_principled_clearcoat_params();
-
-void closure_emission_prepare(OSL::RendererServices *, int id, void *data);
-void closure_background_prepare(OSL::RendererServices *, int id, void *data);
-void closure_holdout_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_diffuse_ramp_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_phong_ramp_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_transparent_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bssrdf_prepare(OSL::RendererServices *, int id, void *data);
-void closure_absorption_prepare(OSL::RendererServices *, int id, void *data);
-void closure_henyey_greenstein_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_glass_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_multi_ggx_aniso_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_ggx_fresnel_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_microfacet_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
- int id,
- void *data);
-void closure_bsdf_microfacet_multi_ggx_fresnel_prepare(OSL::RendererServices *,
- int id,
- void *data);
-void closure_bsdf_microfacet_multi_ggx_glass_fresnel_prepare(OSL::RendererServices *,
- int id,
- void *data);
-void closure_bsdf_microfacet_multi_ggx_aniso_fresnel_prepare(OSL::RendererServices *,
- int id,
- void *data);
-void closure_bsdf_principled_clearcoat_prepare(OSL::RendererServices *, int id, void *data);
-void closure_bsdf_principled_hair_prepare(OSL::RendererServices *, int id, void *data);
-
-#define CCLOSURE_PREPARE(name, classname) \
- void name(RendererServices *, int id, void *data) \
- { \
- memset(data, 0, sizeof(classname)); \
- new (data) classname(); \
- }
-
-#define CCLOSURE_PREPARE_STATIC(name, classname) static CCLOSURE_PREPARE(name, classname)
-
-#define CLOSURE_FLOAT3_PARAM(st, fld) \
- { \
- TypeDesc::TypeVector, (int)reckless_offsetof(st, fld), NULL, sizeof(OSL::Vec3) \
- }
-
-#define BSDF_CLOSURE_FLOAT_PARAM(st, fld) CLOSURE_FLOAT_PARAM(st, fld),
-#define BSDF_CLOSURE_FLOAT3_PARAM(st, fld) CLOSURE_FLOAT3_PARAM(st, fld),
-
-#define TO_VEC3(v) OSL::Vec3(v.x, v.y, v.z)
-#define TO_COLOR3(v) OSL::Color3(v.x, v.y, v.z)
-#define TO_FLOAT3(v) make_float3(v[0], v[1], v[2])
-
-/* Closure */
-
-class CClosurePrimitive {
- public:
- virtual void setup(ShaderData *sd, uint32_t path_flag, float3 weight) = 0;
-
- OSL::ustring label;
-};
-
-/* BSDF */
-
-class CBSDFClosure : public CClosurePrimitive {
- public:
- bool skip(const ShaderData *sd, uint32_t path_flag, int scattering);
-};
-
-#define BSDF_CLOSURE_CLASS_BEGIN(Upper, lower, structname, TYPE) \
-\
- class Upper##Closure : public CBSDFClosure { \
- public: \
- structname params; \
- float3 unused; \
-\
- void setup(ShaderData *sd, uint32_t path_flag, float3 weight) \
- { \
- if (!skip(sd, path_flag, TYPE)) { \
- params.N = ensure_valid_reflection(sd->Ng, sd->I, params.N); \
- structname *bsdf = (structname *)bsdf_alloc_osl(sd, sizeof(structname), weight, &params); \
- sd->flag |= (bsdf) ? bsdf_##lower##_setup(bsdf) : 0; \
- } \
- } \
- }; \
-\
- static ClosureParam *bsdf_##lower##_params() \
- { \
- static ClosureParam params[] = {
-
-/* parameters */
-
-#define BSDF_CLOSURE_CLASS_END(Upper, lower) \
- CLOSURE_STRING_KEYPARAM(Upper##Closure, label, "label"), CLOSURE_FINISH_PARAM(Upper##Closure) \
- } \
- ; \
- return params; \
- } \
-\
- CCLOSURE_PREPARE_STATIC(bsdf_##lower##_prepare, Upper##Closure)
-
-CCL_NAMESPACE_END
-
-#endif /* __OSL_CLOSURES_H__ */
diff --git a/intern/cycles/kernel/osl/closures_setup.h b/intern/cycles/kernel/osl/closures_setup.h
new file mode 100644
index 00000000000..23f62ec2f3d
--- /dev/null
+++ b/intern/cycles/kernel/osl/closures_setup.h
@@ -0,0 +1,1111 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Adapted from Open Shading Language
+ * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
+ * All Rights Reserved.
+ *
+ * Modifications Copyright 2011-2022 Blender Foundation. */
+
+#pragma once
+
+// clang-format off
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/bsdf_ashikhmin_velvet.h"
+#include "kernel/closure/bsdf_diffuse.h"
+#include "kernel/closure/bsdf_microfacet.h"
+#include "kernel/closure/bsdf_microfacet_beckmann.h"
+#include "kernel/closure/bsdf_microfacet_glass.h"
+#include "kernel/closure/bsdf_oren_nayar.h"
+#include "kernel/closure/bsdf_reflection.h"
+#include "kernel/closure/bsdf_refraction.h"
+#include "kernel/closure/bsdf_transparent.h"
+#include "kernel/closure/bsdf_ashikhmin_shirley.h"
+#include "kernel/closure/bsdf_toon.h"
+#include "kernel/closure/bsdf_hair.h"
+#include "kernel/closure/bsdf_hair_principled.h"
+#include "kernel/closure/bsdf_principled_diffuse.h"
+#include "kernel/closure/bsdf_principled_sheen.h"
+#include "kernel/closure/volume.h"
+#include "kernel/closure/bsdf_diffuse_ramp.h"
+#include "kernel/closure/bsdf_phong_ramp.h"
+#include "kernel/closure/bssrdf.h"
+#include "kernel/closure/emissive.h"
+// clang-format on
+
+CCL_NAMESPACE_BEGIN
+
+#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) \
+ struct ccl_align(8) Upper##Closure \
+ { \
+ const char *label;
+#define OSL_CLOSURE_STRUCT_END(Upper, lower) \
+ } \
+ ; \
+ ccl_device void osl_closure_##lower##_setup(KernelGlobals kg, \
+ ccl_private ShaderData *sd, \
+ uint32_t path_flag, \
+ float3 weight, \
+ ccl_private Upper##Closure *closure);
+#define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key) type name;
+#define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size) type name[size];
+
+#include "closures_template.h"
+
+ccl_device_forceinline bool osl_closure_skip(KernelGlobals kg,
+ ccl_private const ShaderData *sd,
+ uint32_t path_flag,
+ int scattering)
+{
+ /* caustic options */
+ if ((scattering & LABEL_GLOSSY) && (path_flag & PATH_RAY_DIFFUSE)) {
+ if ((!kernel_data.integrator.caustics_reflective && (scattering & LABEL_REFLECT)) ||
+ (!kernel_data.integrator.caustics_refractive && (scattering & LABEL_TRANSMIT))) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* Diffuse */
+
+ccl_device void osl_closure_diffuse_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const DiffuseClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) {
+ return;
+ }
+
+ ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(DiffuseBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+
+ sd->flag |= bsdf_diffuse_setup(bsdf);
+}
+
+ccl_device void osl_closure_oren_nayar_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const OrenNayarClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) {
+ return;
+ }
+
+ ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc(
+ sd, sizeof(OrenNayarBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->roughness = closure->roughness;
+
+ sd->flag |= bsdf_oren_nayar_setup(bsdf);
+}
+
+ccl_device void osl_closure_translucent_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const TranslucentClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) {
+ return;
+ }
+
+ ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(DiffuseBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+
+ sd->flag |= bsdf_translucent_setup(bsdf);
+}
+
+ccl_device void osl_closure_reflection_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const ReflectionClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_SINGULAR)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+
+ sd->flag |= bsdf_reflection_setup(bsdf);
+}
+
+ccl_device void osl_closure_refraction_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const RefractionClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_SINGULAR)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->ior = closure->ior;
+
+ sd->flag |= bsdf_refraction_setup(bsdf);
+}
+
+ccl_device void osl_closure_transparent_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const TransparentClosure *closure)
+{
+ bsdf_transparent_setup(sd, rgb_to_spectrum(weight), path_flag);
+}
+
+/* Standard microfacet closures */
+
+ccl_device void osl_closure_microfacet_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetClosure *closure)
+{
+ const int label = (closure->refract) ? LABEL_TRANSMIT : LABEL_REFLECT;
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | label)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = closure->alpha_y;
+ bsdf->ior = closure->ior;
+ bsdf->T = closure->T;
+
+ static OSL::ustring u_ggx("ggx");
+ static OSL::ustring u_default("default");
+
+ /* GGX */
+ if (closure->distribution == u_ggx || closure->distribution == u_default) {
+ if (!closure->refract) {
+ sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+ }
+ else {
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+ }
+ }
+ /* Beckmann */
+ else {
+ if (!closure->refract) {
+ sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+ }
+ else {
+ sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+ }
+ }
+}
+
+ccl_device void osl_closure_microfacet_ggx_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetGGXIsotropicClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+
+ sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+}
+
+ccl_device void osl_closure_microfacet_ggx_aniso_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetGGXClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = closure->alpha_y;
+ bsdf->T = closure->T;
+
+ sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
+}
+
+ccl_device void osl_closure_microfacet_ggx_refraction_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetGGXRefractionClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_TRANSMIT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->ior = closure->ior;
+
+ sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf);
+}
+
+/* GGX closures with Fresnel */
+
+ccl_device void osl_closure_microfacet_ggx_fresnel_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetGGXFresnelClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->ior = closure->ior;
+
+ bsdf->extra = extra;
+ bsdf->extra->color = rgb_to_spectrum(closure->color);
+ bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0);
+
+ bsdf->T = zero_float3();
+
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+}
+
+ccl_device void osl_closure_microfacet_ggx_aniso_fresnel_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetGGXAnisoFresnelClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = closure->alpha_y;
+ bsdf->ior = closure->ior;
+
+ bsdf->extra = extra;
+ bsdf->extra->color = rgb_to_spectrum(closure->color);
+ bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0);
+
+ bsdf->T = closure->T;
+
+ sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
+}
+
+/* Multi-scattering GGX closures */
+
+ccl_device void osl_closure_microfacet_multi_ggx_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetMultiGGXClosure *closure)
+{
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->ior = 1.0f;
+
+ bsdf->T = zero_float3();
+
+ sd->flag |= bsdf_microfacet_multi_ggx_setup(kg, bsdf, sd, rgb_to_spectrum(closure->color));
+}
+
+ccl_device void osl_closure_microfacet_multi_ggx_glass_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetMultiGGXGlassClosure *closure)
+{
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->ior = closure->ior;
+
+ bsdf->T = zero_float3();
+
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(kg, bsdf, sd, rgb_to_spectrum(closure->color));
+}
+
+ccl_device void osl_closure_microfacet_multi_ggx_aniso_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetMultiGGXAnisoClosure *closure)
+{
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = closure->alpha_y;
+ bsdf->ior = 1.0f;
+
+ bsdf->T = closure->T;
+
+ sd->flag |= bsdf_microfacet_multi_ggx_setup(kg, bsdf, sd, rgb_to_spectrum(closure->color));
+}
+
+/* Multi-scattering GGX closures with Fresnel */
+
+ccl_device void osl_closure_microfacet_multi_ggx_fresnel_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetMultiGGXFresnelClosure *closure)
+{
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->ior = closure->ior;
+
+ bsdf->extra = extra;
+ bsdf->extra->color = rgb_to_spectrum(closure->color);
+ bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0);
+
+ bsdf->T = zero_float3();
+
+ sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(kg, bsdf, sd);
+}
+
+ccl_device void osl_closure_microfacet_multi_ggx_glass_fresnel_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetMultiGGXGlassFresnelClosure *closure)
+{
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = bsdf->alpha_x;
+ bsdf->ior = closure->ior;
+
+ bsdf->extra = extra;
+ bsdf->extra->color = rgb_to_spectrum(closure->color);
+ bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0);
+
+ bsdf->T = zero_float3();
+
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(kg, bsdf, sd);
+}
+
+ccl_device void osl_closure_microfacet_multi_ggx_aniso_fresnel_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetMultiGGXAnisoFresnelClosure *closure)
+{
+ /* Technically, the MultiGGX closure may also transmit. However,
+ * since this is set statically and only used for caustic flags, this
+ * is probably as good as it gets. */
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ ccl_private MicrofacetExtra *extra = (ccl_private MicrofacetExtra *)closure_alloc_extra(
+ sd, sizeof(MicrofacetExtra));
+ if (!extra) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = closure->alpha_y;
+ bsdf->ior = closure->ior;
+
+ bsdf->extra = extra;
+ bsdf->extra->color = rgb_to_spectrum(closure->color);
+ bsdf->extra->cspec0 = rgb_to_spectrum(closure->cspec0);
+
+ bsdf->T = closure->T;
+
+ sd->flag |= bsdf_microfacet_multi_ggx_fresnel_setup(kg, bsdf, sd);
+}
+
+/* Beckmann closures */
+
+ccl_device void osl_closure_microfacet_beckmann_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetBeckmannIsotropicClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+
+ sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+}
+
+ccl_device void osl_closure_microfacet_beckmann_aniso_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetBeckmannClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = closure->alpha_y;
+ bsdf->T = closure->T;
+
+ sd->flag |= bsdf_microfacet_beckmann_setup(bsdf);
+}
+
+ccl_device void osl_closure_microfacet_beckmann_refraction_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const MicrofacetBeckmannRefractionClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_TRANSMIT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->ior = closure->ior;
+
+ sd->flag |= bsdf_microfacet_beckmann_refraction_setup(bsdf);
+}
+
+/* Ashikhmin closures */
+
+ccl_device void osl_closure_ashikhmin_velvet_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const AshikhminVelvetClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) {
+ return;
+ }
+
+ ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc(
+ sd, sizeof(VelvetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->sigma = closure->sigma;
+
+ sd->flag |= bsdf_ashikhmin_velvet_setup(bsdf);
+}
+
+ccl_device void osl_closure_ashikhmin_shirley_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const AshikhminShirleyClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) {
+ return;
+ }
+
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->alpha_x;
+ bsdf->alpha_y = closure->alpha_y;
+ bsdf->T = closure->T;
+
+ sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf);
+}
+
+ccl_device void osl_closure_diffuse_toon_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const DiffuseToonClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) {
+ return;
+ }
+
+ ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc(
+ sd, sizeof(ToonBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->size = closure->size;
+ bsdf->smooth = closure->smooth;
+
+ sd->flag |= bsdf_diffuse_toon_setup(bsdf);
+}
+
+ccl_device void osl_closure_glossy_toon_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const GlossyToonClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) {
+ return;
+ }
+
+ ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc(
+ sd, sizeof(ToonBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->size = closure->size;
+ bsdf->smooth = closure->smooth;
+
+ sd->flag |= bsdf_glossy_toon_setup(bsdf);
+}
+
+/* Disney principled closures */
+
+ccl_device void osl_closure_principled_diffuse_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const PrincipledDiffuseClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) {
+ return;
+ }
+
+ ccl_private PrincipledDiffuseBsdf *bsdf = (ccl_private PrincipledDiffuseBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledDiffuseBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->roughness = closure->roughness;
+
+ sd->flag |= bsdf_principled_diffuse_setup(bsdf);
+}
+
+ccl_device void osl_closure_principled_sheen_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const PrincipledSheenClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_DIFFUSE)) {
+ return;
+ }
+
+ ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc(
+ sd, sizeof(PrincipledSheenBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->avg_value = 0.0f;
+
+ sd->flag |= bsdf_principled_sheen_setup(sd, bsdf);
+}
+
+ccl_device void osl_closure_principled_clearcoat_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const PrincipledClearcoatClosure *closure)
+{
+ ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
+ sd, sizeof(MicrofacetBsdf), rgb_to_spectrum(weight) * closure->clearcoat);
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->alpha_x = closure->clearcoat_roughness;
+ bsdf->alpha_y = closure->clearcoat_roughness;
+ bsdf->ior = 1.5f;
+
+ bsdf->T = zero_float3();
+
+ sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd);
+}
+
+/* Variable cone emissive closure
+ *
+ * This primitive emits in a cone having a configurable penumbra area where the light decays to 0
+ * reaching the outer_angle limit. It can also behave as a lambertian emitter if the provided
+ * angles are PI/2, which is the default
+ */
+ccl_device void osl_closure_emission_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t /* path_flag */,
+ float3 weight,
+ ccl_private const GenericEmissiveClosure *closure)
+{
+ emission_setup(sd, rgb_to_spectrum(weight));
+}
+
+/* Generic background closure
+ *
+ * We only have a background closure for the shaders to return a color in background shaders. No
+ * methods, only the weight is taking into account
+ */
+ccl_device void osl_closure_background_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t /* path_flag */,
+ float3 weight,
+ ccl_private const GenericBackgroundClosure *closure)
+{
+ background_setup(sd, rgb_to_spectrum(weight));
+}
+
+/* Holdout closure
+ *
+ * This will be used by the shader to mark the amount of holdout for the current shading point. No
+ * parameters, only the weight will be used
+ */
+ccl_device void osl_closure_holdout_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t /* path_flag */,
+ float3 weight,
+ ccl_private const HoldoutClosure *closure)
+{
+ closure_alloc(sd, sizeof(ShaderClosure), CLOSURE_HOLDOUT_ID, rgb_to_spectrum(weight));
+ sd->flag |= SD_HOLDOUT;
+}
+
+ccl_device void osl_closure_diffuse_ramp_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t /* path_flag */,
+ float3 weight,
+ ccl_private const DiffuseRampClosure *closure)
+{
+ ccl_private DiffuseRampBsdf *bsdf = (ccl_private DiffuseRampBsdf *)bsdf_alloc(
+ sd, sizeof(DiffuseRampBsdf), rgb_to_spectrum(weight));
+
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+
+ bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
+ if (!bsdf->colors) {
+ return;
+ }
+
+ for (int i = 0; i < 8; i++)
+ bsdf->colors[i] = closure->colors[i];
+
+ sd->flag |= bsdf_diffuse_ramp_setup(bsdf);
+}
+
+ccl_device void osl_closure_phong_ramp_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t /* path_flag */,
+ float3 weight,
+ ccl_private const PhongRampClosure *closure)
+{
+ ccl_private PhongRampBsdf *bsdf = (ccl_private PhongRampBsdf *)bsdf_alloc(
+ sd, sizeof(PhongRampBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->exponent = closure->exponent;
+
+ bsdf->colors = (float3 *)closure_alloc_extra(sd, sizeof(float3) * 8);
+ if (!bsdf->colors) {
+ return;
+ }
+
+ for (int i = 0; i < 8; i++)
+ bsdf->colors[i] = closure->colors[i];
+
+ sd->flag |= bsdf_phong_ramp_setup(bsdf);
+}
+
+ccl_device void osl_closure_bssrdf_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const BSSRDFClosure *closure)
+{
+ static ustring u_burley("burley");
+ static ustring u_random_walk_fixed_radius("random_walk_fixed_radius");
+ static ustring u_random_walk("random_walk");
+
+ ClosureType type;
+ if (closure->method == u_burley) {
+ type = CLOSURE_BSSRDF_BURLEY_ID;
+ }
+ else if (closure->method == u_random_walk_fixed_radius) {
+ type = CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID;
+ }
+ else if (closure->method == u_random_walk) {
+ type = CLOSURE_BSSRDF_RANDOM_WALK_ID;
+ }
+ else {
+ return;
+ }
+
+ ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, rgb_to_spectrum(weight));
+ if (!bssrdf) {
+ return;
+ }
+
+ /* disable in case of diffuse ancestor, can't see it well then and
+ * adds considerably noise due to probabilities of continuing path
+ * getting lower and lower */
+ if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR) {
+ bssrdf->radius = zero_spectrum();
+ }
+ else {
+ bssrdf->radius = closure->radius;
+ }
+
+ /* create one closure per color channel */
+ bssrdf->albedo = closure->albedo;
+ bssrdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bssrdf->roughness = closure->roughness;
+ bssrdf->anisotropy = clamp(closure->anisotropy, 0.0f, 0.9f);
+
+ sd->flag |= bssrdf_setup(sd, bssrdf, type, clamp(closure->ior, 1.01f, 3.8f));
+}
+
+/* Hair */
+
+ccl_device void osl_closure_hair_reflection_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const HairReflectionClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) {
+ return;
+ }
+
+ ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc(
+ sd, sizeof(HairBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->T = closure->T;
+ bsdf->roughness1 = closure->roughness1;
+ bsdf->roughness2 = closure->roughness2;
+ bsdf->offset = closure->offset;
+
+ sd->flag |= bsdf_hair_reflection_setup(bsdf);
+}
+
+ccl_device void osl_closure_hair_transmission_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const HairTransmissionClosure *closure)
+{
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) {
+ return;
+ }
+
+ ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc(
+ sd, sizeof(HairBsdf), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->T = closure->T;
+ bsdf->roughness1 = closure->roughness1;
+ bsdf->roughness2 = closure->roughness2;
+ bsdf->offset = closure->offset;
+
+ sd->flag |= bsdf_hair_transmission_setup(bsdf);
+}
+
+ccl_device void osl_closure_principled_hair_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const PrincipledHairClosure *closure)
+{
+#ifdef __HAIR__
+ if (osl_closure_skip(kg, sd, path_flag, LABEL_GLOSSY)) {
+ return;
+ }
+
+ ccl_private PrincipledHairBSDF *bsdf = (ccl_private PrincipledHairBSDF *)bsdf_alloc(
+ sd, sizeof(PrincipledHairBSDF), rgb_to_spectrum(weight));
+ if (!bsdf) {
+ return;
+ }
+
+ ccl_private PrincipledHairExtra *extra = (ccl_private PrincipledHairExtra *)closure_alloc_extra(
+ sd, sizeof(PrincipledHairExtra));
+ if (!extra) {
+ return;
+ }
+
+ bsdf->N = ensure_valid_reflection(sd->Ng, sd->I, closure->N);
+ bsdf->sigma = closure->sigma;
+ bsdf->v = closure->v;
+ bsdf->s = closure->s;
+ bsdf->alpha = closure->alpha;
+ bsdf->eta = closure->eta;
+ bsdf->m0_roughness = closure->m0_roughness;
+
+ bsdf->extra = extra;
+
+ sd->flag |= bsdf_principled_hair_setup(sd, bsdf);
+#endif
+}
+
+/* Volume */
+
+ccl_device void osl_closure_absorption_setup(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const VolumeAbsorptionClosure *closure)
+{
+ volume_extinction_setup(sd, rgb_to_spectrum(weight));
+}
+
+ccl_device void osl_closure_henyey_greenstein_setup(
+ KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ uint32_t path_flag,
+ float3 weight,
+ ccl_private const VolumeHenyeyGreensteinClosure *closure)
+{
+ volume_extinction_setup(sd, rgb_to_spectrum(weight));
+
+ ccl_private HenyeyGreensteinVolume *volume = (ccl_private HenyeyGreensteinVolume *)bsdf_alloc(
+ sd, sizeof(HenyeyGreensteinVolume), rgb_to_spectrum(weight));
+ if (!volume) {
+ return;
+ }
+
+ volume->g = closure->g;
+
+ sd->flag |= volume_henyey_greenstein_setup(volume);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/closures_template.h b/intern/cycles/kernel/osl/closures_template.h
new file mode 100644
index 00000000000..c808b275966
--- /dev/null
+++ b/intern/cycles/kernel/osl/closures_template.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#ifndef OSL_CLOSURE_STRUCT_BEGIN
+# define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower)
+#endif
+#ifndef OSL_CLOSURE_STRUCT_END
+# define OSL_CLOSURE_STRUCT_END(Upper, lower)
+#endif
+#ifndef OSL_CLOSURE_STRUCT_MEMBER
+# define OSL_CLOSURE_STRUCT_MEMBER(Upper, TYPE, type, name, key)
+#endif
+#ifndef OSL_CLOSURE_STRUCT_ARRAY_MEMBER
+# define OSL_CLOSURE_STRUCT_ARRAY_MEMBER(Upper, TYPE, type, name, key, size)
+#endif
+
+OSL_CLOSURE_STRUCT_BEGIN(Diffuse, diffuse)
+ OSL_CLOSURE_STRUCT_MEMBER(Diffuse, VECTOR, packed_float3, N, NULL)
+OSL_CLOSURE_STRUCT_END(Diffuse, diffuse)
+
+OSL_CLOSURE_STRUCT_BEGIN(OrenNayar, oren_nayar)
+ OSL_CLOSURE_STRUCT_MEMBER(OrenNayar, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(OrenNayar, FLOAT, float, roughness, NULL)
+OSL_CLOSURE_STRUCT_END(OrenNayar, oren_nayar)
+
+OSL_CLOSURE_STRUCT_BEGIN(Translucent, translucent)
+ OSL_CLOSURE_STRUCT_MEMBER(Translucent, VECTOR, packed_float3, N, NULL)
+OSL_CLOSURE_STRUCT_END(Translucent, translucent)
+
+OSL_CLOSURE_STRUCT_BEGIN(Reflection, reflection)
+ OSL_CLOSURE_STRUCT_MEMBER(Reflection, VECTOR, packed_float3, N, NULL)
+OSL_CLOSURE_STRUCT_END(Reflection, reflection)
+
+OSL_CLOSURE_STRUCT_BEGIN(Refraction, refraction)
+ OSL_CLOSURE_STRUCT_MEMBER(Refraction, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Refraction, FLOAT, float, ior, NULL)
+OSL_CLOSURE_STRUCT_END(Refraction, refraction)
+
+OSL_CLOSURE_STRUCT_BEGIN(Transparent, transparent)
+OSL_CLOSURE_STRUCT_END(Transparent, transparent)
+
+OSL_CLOSURE_STRUCT_BEGIN(Microfacet, microfacet)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, STRING, ustring, distribution, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, alpha_y, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, FLOAT, float, ior, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(Microfacet, INT, int, refract, NULL)
+OSL_CLOSURE_STRUCT_END(Microfacet, microfacet)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXIsotropic, microfacet_ggx)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXIsotropic, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXIsotropic, FLOAT, float, alpha_x, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetGGXIsotropic, microfacet_ggx)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGX, microfacet_ggx_aniso)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGX, FLOAT, float, alpha_y, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetGGX, microfacet_ggx_aniso)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXRefraction, microfacet_ggx_refraction)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXRefraction, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXRefraction, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXRefraction, FLOAT, float, ior, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetGGXRefraction, microfacet_ggx_refraction)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGX, microfacet_multi_ggx)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGX, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGX, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGX, VECTOR, packed_float3, color, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGX, microfacet_multi_ggx)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXGlass, microfacet_multi_ggx_glass)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, FLOAT, float, ior, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlass, VECTOR, packed_float3, color, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXGlass, microfacet_multi_ggx_glass)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXAniso, microfacet_multi_ggx_aniso)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, FLOAT, float, alpha_y, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAniso, VECTOR, packed_float3, color, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXAniso, microfacet_multi_ggx_aniso)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXFresnel, microfacet_ggx_fresnel)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, FLOAT, float, ior, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, VECTOR, packed_float3, color, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXFresnel, VECTOR, packed_float3, cspec0, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetGGXFresnel, microfacet_ggx_fresnel)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetGGXAnisoFresnel, microfacet_ggx_aniso_fresnel)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, FLOAT, float, alpha_y, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, FLOAT, float, ior, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, color, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetGGXAnisoFresnel, VECTOR, packed_float3, cspec0, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetGGXAnisoFresnel, microfacet_ggx_aniso_fresnel)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXFresnel, microfacet_multi_ggx_fresnel)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, FLOAT, float, ior, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, VECTOR, packed_float3, color, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXFresnel, VECTOR, packed_float3, cspec0, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXFresnel, microfacet_multi_ggx_fresnel)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXGlassFresnel, microfacet_multi_ggx_glass_fresnel)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, FLOAT, float, ior, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, VECTOR, packed_float3, color, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXGlassFresnel, VECTOR, packed_float3, cspec0, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXGlassFresnel, microfacet_multi_ggx_glass_fresnel)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetMultiGGXAnisoFresnel, microfacet_multi_ggx_aniso_fresnel)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, FLOAT, float, alpha_y, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, FLOAT, float, ior, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, color, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetMultiGGXAnisoFresnel, VECTOR, packed_float3, cspec0, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetMultiGGXAnisoFresnel, microfacet_multi_ggx_aniso_fresnel)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetBeckmannIsotropic, microfacet_beckmann)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannIsotropic, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannIsotropic, FLOAT, float, alpha_x, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetBeckmannIsotropic, microfacet_beckmann)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetBeckmann, microfacet_beckmann_aniso)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmann, FLOAT, float, alpha_y, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetBeckmann, microfacet_beckmann_aniso)
+
+OSL_CLOSURE_STRUCT_BEGIN(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannRefraction, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannRefraction, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(MicrofacetBeckmannRefraction, FLOAT, float, ior, NULL)
+OSL_CLOSURE_STRUCT_END(MicrofacetBeckmannRefraction, microfacet_beckmann_refraction)
+
+OSL_CLOSURE_STRUCT_BEGIN(AshikhminShirley, ashikhmin_shirley)
+ OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, FLOAT, float, alpha_x, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(AshikhminShirley, FLOAT, float, alpha_y, NULL)
+OSL_CLOSURE_STRUCT_END(AshikhminShirley, ashikhmin_shirley)
+
+OSL_CLOSURE_STRUCT_BEGIN(AshikhminVelvet, ashikhmin_velvet)
+ OSL_CLOSURE_STRUCT_MEMBER(AshikhminVelvet, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(AshikhminVelvet, FLOAT, float, sigma, NULL)
+OSL_CLOSURE_STRUCT_END(AshikhminVelvet, ashikhmin_velvet)
+
+OSL_CLOSURE_STRUCT_BEGIN(DiffuseToon, diffuse_toon)
+ OSL_CLOSURE_STRUCT_MEMBER(DiffuseToon, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(DiffuseToon, FLOAT, float, size, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(DiffuseToon, FLOAT, float, smooth, NULL)
+OSL_CLOSURE_STRUCT_END(DiffuseToon, diffuse_toon)
+
+OSL_CLOSURE_STRUCT_BEGIN(GlossyToon, glossy_toon)
+ OSL_CLOSURE_STRUCT_MEMBER(GlossyToon, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(GlossyToon, FLOAT, float, size, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(GlossyToon, FLOAT, float, smooth, NULL)
+OSL_CLOSURE_STRUCT_END(GlossyToon, glossy_toon)
+
+OSL_CLOSURE_STRUCT_BEGIN(PrincipledDiffuse, principled_diffuse)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledDiffuse, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledDiffuse, FLOAT, float, roughness, NULL)
+OSL_CLOSURE_STRUCT_END(PrincipledDiffuse, principled_diffuse)
+
+OSL_CLOSURE_STRUCT_BEGIN(PrincipledSheen, principled_sheen)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledSheen, VECTOR, packed_float3, N, NULL)
+OSL_CLOSURE_STRUCT_END(PrincipledSheen, principled_sheen)
+
+OSL_CLOSURE_STRUCT_BEGIN(PrincipledClearcoat, principled_clearcoat)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledClearcoat, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledClearcoat, FLOAT, float, clearcoat, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledClearcoat, FLOAT, float, clearcoat_roughness, NULL)
+OSL_CLOSURE_STRUCT_END(PrincipledClearcoat, principled_clearcoat)
+
+OSL_CLOSURE_STRUCT_BEGIN(GenericEmissive, emission)
+OSL_CLOSURE_STRUCT_END(GenericEmissive, emission)
+
+OSL_CLOSURE_STRUCT_BEGIN(GenericBackground, background)
+OSL_CLOSURE_STRUCT_END(GenericBackground, background)
+
+OSL_CLOSURE_STRUCT_BEGIN(Holdout, holdout)
+OSL_CLOSURE_STRUCT_END(Holdout, holdout)
+
+OSL_CLOSURE_STRUCT_BEGIN(DiffuseRamp, diffuse_ramp)
+ OSL_CLOSURE_STRUCT_MEMBER(DiffuseRamp, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_ARRAY_MEMBER(DiffuseRamp, COLOR, packed_float3, colors, NULL, 8)
+OSL_CLOSURE_STRUCT_END(DiffuseRamp, diffuse_ramp)
+
+OSL_CLOSURE_STRUCT_BEGIN(PhongRamp, phong_ramp)
+ OSL_CLOSURE_STRUCT_MEMBER(PhongRamp, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PhongRamp, FLOAT, float, exponent, NULL)
+ OSL_CLOSURE_STRUCT_ARRAY_MEMBER(PhongRamp, COLOR, packed_float3, colors, NULL, 8)
+OSL_CLOSURE_STRUCT_END(PhongRamp, phong_ramp)
+
+OSL_CLOSURE_STRUCT_BEGIN(BSSRDF, bssrdf)
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, STRING, ustring, method, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, radius, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, VECTOR, packed_float3, albedo, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, FLOAT, float, roughness, "roughness")
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, FLOAT, float, ior, "ior")
+ OSL_CLOSURE_STRUCT_MEMBER(BSSRDF, FLOAT, float, anisotropy, "anisotropy")
+OSL_CLOSURE_STRUCT_END(BSSRDF, bssrdf)
+
+OSL_CLOSURE_STRUCT_BEGIN(HairReflection, hair_reflection)
+ OSL_CLOSURE_STRUCT_MEMBER(HairReflection, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, roughness1, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, roughness2, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairReflection, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, offset, NULL)
+OSL_CLOSURE_STRUCT_END(HairReflection, hair_reflection)
+
+OSL_CLOSURE_STRUCT_BEGIN(HairTransmission, hair_transmission)
+ OSL_CLOSURE_STRUCT_MEMBER(HairTransmission, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairTransmission, FLOAT, float, roughness1, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairTransmission, FLOAT, float, roughness2, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairReflection, VECTOR, packed_float3, T, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(HairReflection, FLOAT, float, offset, NULL)
+OSL_CLOSURE_STRUCT_END(HairTransmission, hair_transmission)
+
+OSL_CLOSURE_STRUCT_BEGIN(PrincipledHair, principled_hair)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, VECTOR, packed_float3, N, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, VECTOR, packed_float3, sigma, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, v, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, s, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, m0_roughness, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, alpha, NULL)
+ OSL_CLOSURE_STRUCT_MEMBER(PrincipledHair, FLOAT, float, eta, NULL)
+OSL_CLOSURE_STRUCT_END(PrincipledHair, principled_hair)
+
+OSL_CLOSURE_STRUCT_BEGIN(VolumeAbsorption, absorption)
+OSL_CLOSURE_STRUCT_END(VolumeAbsorption, absorption)
+
+OSL_CLOSURE_STRUCT_BEGIN(VolumeHenyeyGreenstein, henyey_greenstein)
+ OSL_CLOSURE_STRUCT_MEMBER(VolumeHenyeyGreenstein, FLOAT, float, g, NULL)
+OSL_CLOSURE_STRUCT_END(VolumeHenyeyGreenstein, henyey_greenstein)
+
+#undef OSL_CLOSURE_STRUCT_BEGIN
+#undef OSL_CLOSURE_STRUCT_END
+#undef OSL_CLOSURE_STRUCT_MEMBER
+#undef OSL_CLOSURE_STRUCT_ARRAY_MEMBER
diff --git a/intern/cycles/kernel/osl/emissive.cpp b/intern/cycles/kernel/osl/emissive.cpp
deleted file mode 100644
index 1a01b215836..00000000000
--- a/intern/cycles/kernel/osl/emissive.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- *
- * Adapted from Open Shading Language
- * Copyright (c) 2009-2010 Sony Pictures Imageworks Inc., et al.
- * All Rights Reserved.
- *
- * Modifications Copyright 2011-2022 Blender Foundation. */
-
-#include <OpenImageIO/fmath.h>
-
-#include <OSL/genclosure.h>
-
-#include "kernel/osl/closures.h"
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/types.h"
-#include "kernel/closure/alloc.h"
-#include "kernel/closure/emissive.h"
-// clang-format on
-
-CCL_NAMESPACE_BEGIN
-
-using namespace OSL;
-
-/// Variable cone emissive closure
-///
-/// This primitive emits in a cone having a configurable
-/// penumbra area where the light decays to 0 reaching the
-/// outer_angle limit. It can also behave as a lambertian emitter
-/// if the provided angles are PI/2, which is the default
-///
-class GenericEmissiveClosure : public CClosurePrimitive {
- public:
- void setup(ShaderData *sd, uint32_t /* path_flag */, float3 weight)
- {
- emission_setup(sd, weight);
- }
-};
-
-ClosureParam *closure_emission_params()
-{
- static ClosureParam params[] = {CLOSURE_STRING_KEYPARAM(GenericEmissiveClosure, label, "label"),
- CLOSURE_FINISH_PARAM(GenericEmissiveClosure)};
- return params;
-}
-
-CCLOSURE_PREPARE(closure_emission_prepare, GenericEmissiveClosure)
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/globals.cpp b/intern/cycles/kernel/osl/globals.cpp
new file mode 100644
index 00000000000..92b91182178
--- /dev/null
+++ b/intern/cycles/kernel/osl/globals.cpp
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include <OSL/oslexec.h>
+
+#include "kernel/device/cpu/compat.h"
+#include "kernel/device/cpu/globals.h"
+
+#include "kernel/types.h"
+
+#include "kernel/osl/globals.h"
+#include "kernel/osl/services.h"
+
+CCL_NAMESPACE_BEGIN
+
+void OSLGlobals::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals)
+{
+ /* no osl used? */
+ if (!osl_globals->use) {
+ kg->osl = NULL;
+ return;
+ }
+
+ /* Per thread kernel data init. */
+ kg->osl = osl_globals;
+
+ OSL::ShadingSystem *ss = kg->osl->ss;
+ OSLThreadData *tdata = new OSLThreadData();
+
+ memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
+ tdata->globals.tracedata = &tdata->tracedata;
+ tdata->osl_thread_info = ss->create_thread_info();
+ tdata->context = ss->get_context(tdata->osl_thread_info);
+
+ tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
+
+ kg->osl_ss = (OSLShadingSystem *)ss;
+ kg->osl_tdata = tdata;
+}
+
+void OSLGlobals::thread_free(KernelGlobalsCPU *kg)
+{
+ if (!kg->osl)
+ return;
+
+ OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
+ OSLThreadData *tdata = kg->osl_tdata;
+ ss->release_context(tdata->context);
+
+ ss->destroy_thread_info(tdata->osl_thread_info);
+
+ delete tdata;
+
+ kg->osl = NULL;
+ kg->osl_ss = NULL;
+ kg->osl_tdata = NULL;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/globals.h b/intern/cycles/kernel/osl/globals.h
index 172091c55f5..2b002a0033e 100644
--- a/intern/cycles/kernel/osl/globals.h
+++ b/intern/cycles/kernel/osl/globals.h
@@ -41,6 +41,10 @@ struct OSLGlobals {
use = false;
}
+ /* per thread data */
+ static void thread_init(struct KernelGlobalsCPU *kg, OSLGlobals *osl_globals);
+ static void thread_free(struct KernelGlobalsCPU *kg);
+
bool use;
/* shading system */
@@ -56,16 +60,8 @@ struct OSLGlobals {
OSL::ShaderGroupRef background_state;
/* attributes */
- struct Attribute {
- TypeDesc type;
- AttributeDescriptor desc;
- ParamValue value;
- };
-
- typedef unordered_map<ustring, Attribute, ustringHash> AttributeMap;
typedef unordered_map<ustring, int, ustringHash> ObjectNameMap;
- vector<AttributeMap> attribute_map;
ObjectNameMap object_name_map;
vector<ustring> object_names;
};
diff --git a/intern/cycles/kernel/osl/shader.h b/intern/cycles/kernel/osl/osl.h
index f0ab49dd6a8..bef23f3eea1 100644
--- a/intern/cycles/kernel/osl/shader.h
+++ b/intern/cycles/kernel/osl/osl.h
@@ -1,10 +1,7 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
-#ifndef __OSL_SHADER_H__
-#define __OSL_SHADER_H__
-
-#ifdef WITH_OSL
+#pragma once
/* OSL Shader Engine
*
@@ -16,30 +13,12 @@
* This means no thread state must be passed along in the kernel itself.
*/
-# include "kernel/types.h"
+#include "kernel/osl/types.h"
CCL_NAMESPACE_BEGIN
-class Scene;
-
-struct ShaderClosure;
-struct ShaderData;
-struct IntegratorStateCPU;
-struct differential3;
-struct KernelGlobalsCPU;
-
-struct OSLGlobals;
-struct OSLShadingSystem;
-
class OSLShader {
public:
- /* init */
- static void register_closures(OSLShadingSystem *ss);
-
- /* per thread data */
- static void thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals);
- static void thread_free(KernelGlobalsCPU *kg);
-
/* eval */
static void eval_surface(const KernelGlobalsCPU *kg,
const void *state,
@@ -54,16 +33,6 @@ class OSLShader {
ShaderData *sd,
uint32_t path_flag);
static void eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd);
-
- /* attributes */
- static int find_attribute(const KernelGlobalsCPU *kg,
- const ShaderData *sd,
- uint id,
- AttributeDescriptor *desc);
};
CCL_NAMESPACE_END
-
-#endif
-
-#endif /* __OSL_SHADER_H__ */
diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp
index 6b7981b7f3a..b744422ee78 100644
--- a/intern/cycles/kernel/osl/services.cpp
+++ b/intern/cycles/kernel/osl/services.cpp
@@ -18,22 +18,17 @@
#include "scene/pointcloud.h"
#include "scene/scene.h"
-#include "kernel/osl/closures.h"
#include "kernel/osl/globals.h"
#include "kernel/osl/services.h"
-#include "kernel/osl/shader.h"
#include "util/foreach.h"
#include "util/log.h"
#include "util/string.h"
-// clang-format off
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
#include "kernel/device/cpu/image.h"
-#include "kernel/util/differential.h"
-
#include "kernel/integrator/state.h"
#include "kernel/integrator/state_flow.h"
@@ -45,10 +40,10 @@
#include "kernel/camera/projection.h"
#include "kernel/integrator/path_state.h"
-#include "kernel/integrator/shader_eval.h"
+
+#include "kernel/svm/svm.h"
#include "kernel/util/color.h"
-// clang-format on
CCL_NAMESPACE_BEGIN
@@ -125,14 +120,14 @@ ustring OSLRenderServices::u_v("v");
ustring OSLRenderServices::u_empty;
OSLRenderServices::OSLRenderServices(OSL::TextureSystem *texture_system)
- : texture_system(texture_system)
+ : OSL::RendererServices(texture_system)
{
}
OSLRenderServices::~OSLRenderServices()
{
- if (texture_system) {
- VLOG_INFO << "OSL texture system stats:\n" << texture_system->getstats();
+ if (m_texturesys) {
+ VLOG_INFO << "OSL texture system stats:\n" << m_texturesys->getstats();
}
}
@@ -452,6 +447,7 @@ static bool set_attribute_float2(float2 f[3], TypeDesc type, bool derivatives, v
return false;
}
+#if 0
static bool set_attribute_float2(float2 f, TypeDesc type, bool derivatives, void *val)
{
float2 fv[3];
@@ -462,6 +458,7 @@ static bool set_attribute_float2(float2 f, TypeDesc type, bool derivatives, void
return set_attribute_float2(fv, type, derivatives, val);
}
+#endif
static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val)
{
@@ -590,6 +587,7 @@ static bool set_attribute_float4(float4 f[3], TypeDesc type, bool derivatives, v
return false;
}
+#if 0
static bool set_attribute_float4(float4 f, TypeDesc type, bool derivatives, void *val)
{
float4 fv[3];
@@ -600,6 +598,7 @@ static bool set_attribute_float4(float4 f, TypeDesc type, bool derivatives, void
return set_attribute_float4(fv, type, derivatives, val);
}
+#endif
static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
{
@@ -741,76 +740,75 @@ static bool set_attribute_matrix(const Transform &tfm, TypeDesc type, void *val)
return false;
}
-static bool get_primitive_attribute(const KernelGlobalsCPU *kg,
- const ShaderData *sd,
- const OSLGlobals::Attribute &attr,
- const TypeDesc &type,
- bool derivatives,
- void *val)
+static bool get_object_attribute(const KernelGlobalsCPU *kg,
+ ShaderData *sd,
+ const AttributeDescriptor &desc,
+ const TypeDesc &type,
+ bool derivatives,
+ void *val)
{
- if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
- attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
+ if (desc.type == NODE_ATTR_FLOAT3) {
float3 fval[3];
- if (primitive_is_volume_attribute(sd, attr.desc)) {
- fval[0] = primitive_volume_attribute_float3(kg, sd, attr.desc);
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc)) {
+ fval[0] = primitive_volume_attribute_float3(kg, sd, desc);
}
- else {
+ else
+#endif
+ {
memset(fval, 0, sizeof(fval));
fval[0] = primitive_surface_attribute_float3(
- kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
}
return set_attribute_float3(fval, type, derivatives, val);
}
- else if (attr.type == TypeFloat2) {
- if (primitive_is_volume_attribute(sd, attr.desc)) {
+ else if (desc.type == NODE_ATTR_FLOAT2) {
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc)) {
assert(!"Float2 attribute not support for volumes");
return false;
}
- else {
+ else
+#endif
+ {
float2 fval[3];
fval[0] = primitive_surface_attribute_float2(
- kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float2(fval, type, derivatives, val);
}
}
- else if (attr.type == TypeDesc::TypeFloat) {
+ else if (desc.type == NODE_ATTR_FLOAT) {
float fval[3];
- if (primitive_is_volume_attribute(sd, attr.desc)) {
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc)) {
memset(fval, 0, sizeof(fval));
- fval[0] = primitive_volume_attribute_float(kg, sd, attr.desc);
+ fval[0] = primitive_volume_attribute_float(kg, sd, desc);
}
- else {
+ else
+#endif
+ {
fval[0] = primitive_surface_attribute_float(
- kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
}
return set_attribute_float(fval, type, derivatives, val);
}
- else if (attr.type == TypeDesc::TypeFloat4 || attr.type == TypeRGBA) {
+ else if (desc.type == NODE_ATTR_FLOAT4 || desc.type == NODE_ATTR_RGBA) {
float4 fval[3];
- if (primitive_is_volume_attribute(sd, attr.desc)) {
+#ifdef __VOLUME__
+ if (primitive_is_volume_attribute(sd, desc)) {
memset(fval, 0, sizeof(fval));
- fval[0] = primitive_volume_attribute_float4(kg, sd, attr.desc);
+ fval[0] = primitive_volume_attribute_float4(kg, sd, desc);
}
- else {
+ else
+#endif
+ {
fval[0] = primitive_surface_attribute_float4(
- kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
+ kg, sd, desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
}
return set_attribute_float4(fval, type, derivatives, val);
}
- else {
- return false;
- }
-}
-
-static bool get_mesh_attribute(const KernelGlobalsCPU *kg,
- const ShaderData *sd,
- const OSLGlobals::Attribute &attr,
- const TypeDesc &type,
- bool derivatives,
- void *val)
-{
- if (attr.type == TypeDesc::TypeMatrix) {
- Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
+ else if (desc.type == NODE_ATTR_MATRIX) {
+ Transform tfm = primitive_attribute_matrix(kg, desc);
return set_attribute_matrix(tfm, type, val);
}
else {
@@ -818,44 +816,6 @@ static bool get_mesh_attribute(const KernelGlobalsCPU *kg,
}
}
-static bool get_object_attribute(const OSLGlobals::Attribute &attr,
- TypeDesc type,
- bool derivatives,
- void *val)
-{
- if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
- attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
- const float *data = (const float *)attr.value.data();
- return set_attribute_float3(make_float3(data[0], data[1], data[2]), type, derivatives, val);
- }
- else if (attr.type == TypeFloat2) {
- const float *data = (const float *)attr.value.data();
- return set_attribute_float2(make_float2(data[0], data[1]), type, derivatives, val);
- }
- else if (attr.type == TypeDesc::TypeFloat) {
- const float *data = (const float *)attr.value.data();
- return set_attribute_float(data[0], type, derivatives, val);
- }
- else if (attr.type == TypeRGBA || attr.type == TypeDesc::TypeFloat4) {
- const float *data = (const float *)attr.value.data();
- return set_attribute_float4(
- make_float4(data[0], data[1], data[2], data[3]), type, derivatives, val);
- }
- else if (attr.type == type) {
- size_t datasize = attr.value.datasize();
-
- memcpy(val, attr.value.data(), datasize);
- if (derivatives) {
- memset((char *)val + datasize, 0, datasize * 2);
- }
-
- return true;
- }
- else {
- return false;
- }
-}
-
bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg,
ShaderData *sd,
ustring name,
@@ -980,6 +940,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg
float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
return set_attribute_float(f, type, derivatives, val);
}
+#ifdef __HAIR__
/* Hair Attributes */
else if (name == u_is_curve) {
float f = (sd->type & PRIMITIVE_CURVE) != 0;
@@ -997,6 +958,8 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg
float f = curve_random(kg, sd);
return set_attribute_float(f, type, derivatives, val);
}
+#endif
+#ifdef __POINTCLOUD__
/* point attributes */
else if (name == u_is_point) {
float f = (sd->type & PRIMITIVE_POINT) != 0;
@@ -1014,6 +977,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg
float f = point_random(kg, sd);
return set_attribute_float(f, type, derivatives, val);
}
+#endif
else if (name == u_normal_map_normal) {
if (sd->type & PRIMITIVE_TRIANGLE) {
float3 f = triangle_smooth_normal_unnormalized(kg, sd, sd->Ng, sd->prim, sd->u, sd->v);
@@ -1024,7 +988,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg
}
}
else {
- return false;
+ return get_background_attribute(kg, sd, name, type, derivatives, val);
}
}
@@ -1102,8 +1066,9 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
if (derivatives) {
- ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
- ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
+ const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
+ ndc[1] = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc[0];
+ ndc[2] = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc[0];
}
}
@@ -1131,7 +1096,6 @@ bool OSLRenderServices::get_attribute(
ShaderData *sd, bool derivatives, ustring object_name, TypeDesc type, ustring name, void *val)
{
const KernelGlobalsCPU *kg = sd->osl_globals;
- int prim_type = 0;
int object;
/* lookup of attribute on another object */
@@ -1145,44 +1109,18 @@ bool OSLRenderServices::get_attribute(
}
else {
object = sd->object;
- prim_type = attribute_primitive_type(kg, sd);
-
- if (object == OBJECT_NONE)
- return get_background_attribute(kg, sd, name, type, derivatives, val);
}
/* find attribute on object */
- object = object * ATTR_PRIM_TYPES + prim_type;
- OSLGlobals::AttributeMap &attribute_map = kg->osl->attribute_map[object];
- OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
-
- if (it != attribute_map.end()) {
- const OSLGlobals::Attribute &attr = it->second;
-
- if (attr.desc.element != ATTR_ELEMENT_OBJECT) {
- /* triangle and vertex attributes */
- if (get_primitive_attribute(kg, sd, attr, type, derivatives, val))
- return true;
- else
- return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
- }
- else {
- /* object attribute */
- return get_object_attribute(attr, type, derivatives, val);
- }
+ const AttributeDescriptor desc = find_attribute(
+ kg, object, sd->prim, object == sd->object ? sd->type : PRIMITIVE_NONE, name.hash());
+ if (desc.offset != ATTR_STD_NOT_FOUND) {
+ return get_object_attribute(kg, sd, desc, type, derivatives, val);
}
else {
/* not found in attribute, check standard object info */
- bool is_std_object_attribute = get_object_standard_attribute(
- kg, sd, name, type, derivatives, val);
-
- if (is_std_object_attribute)
- return true;
-
- return get_background_attribute(kg, sd, name, type, derivatives, val);
+ return get_object_standard_attribute(kg, sd, name, type, derivatives, val);
}
-
- return false;
}
bool OSLRenderServices::get_userdata(
@@ -1209,7 +1147,7 @@ TextureSystem::TextureHandle *OSLRenderServices::get_texture_handle(ustring file
}
/* Get handle from OpenImageIO. */
- OSL::TextureSystem *ts = texture_system;
+ OSL::TextureSystem *ts = m_texturesys;
TextureSystem::TextureHandle *handle = ts->get_texture_handle(filename);
if (handle == NULL) {
return NULL;
@@ -1231,7 +1169,7 @@ bool OSLRenderServices::good(TextureSystem::TextureHandle *texture_handle)
OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
if (handle->oiio_handle) {
- OSL::TextureSystem *ts = texture_system;
+ OSL::TextureSystem *ts = m_texturesys;
return ts->good(handle->oiio_handle);
}
else {
@@ -1353,7 +1291,7 @@ bool OSLRenderServices::texture(ustring filename,
}
case OSLTextureHandle::OIIO: {
/* OpenImageIO texture cache. */
- OSL::TextureSystem *ts = texture_system;
+ OSL::TextureSystem *ts = m_texturesys;
if (handle && handle->oiio_handle) {
if (texture_thread_info == NULL) {
@@ -1457,7 +1395,7 @@ bool OSLRenderServices::texture3d(ustring filename,
}
case OSLTextureHandle::OIIO: {
/* OpenImageIO texture cache. */
- OSL::TextureSystem *ts = texture_system;
+ OSL::TextureSystem *ts = m_texturesys;
if (handle && handle->oiio_handle) {
if (texture_thread_info == NULL) {
@@ -1541,7 +1479,7 @@ bool OSLRenderServices::environment(ustring filename,
ustring *errormessage)
{
OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
- OSL::TextureSystem *ts = texture_system;
+ OSL::TextureSystem *ts = m_texturesys;
bool status = false;
if (handle && handle->oiio_handle) {
@@ -1613,7 +1551,7 @@ bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg,
}
/* Get texture info from OpenImageIO. */
- OSL::TextureSystem *ts = texture_system;
+ OSL::TextureSystem *ts = m_texturesys;
return ts->get_texture_info(filename, subimage, dataname, datatype, data);
}
@@ -1667,8 +1605,8 @@ bool OSLRenderServices::trace(TraceOpt &options,
/* setup ray */
Ray ray;
- ray.P = TO_FLOAT3(P);
- ray.D = TO_FLOAT3(R);
+ ray.P = make_float3(P.x, P.y, P.z);
+ ray.D = make_float3(R.x, R.y, R.z);
ray.tmin = 0.0f;
ray.tmax = (options.maxdist == 1.0e30f) ? FLT_MAX : options.maxdist - options.mindist;
ray.time = sd->time;
@@ -1691,12 +1629,12 @@ bool OSLRenderServices::trace(TraceOpt &options,
/* ray differentials */
differential3 dP;
- dP.dx = TO_FLOAT3(dPdx);
- dP.dy = TO_FLOAT3(dPdy);
+ dP.dx = make_float3(dPdx.x, dPdx.y, dPdx.z);
+ dP.dy = make_float3(dPdy.x, dPdy.y, dPdy.z);
ray.dP = differential_make_compact(dP);
differential3 dD;
- dD.dx = TO_FLOAT3(dRdx);
- dD.dy = TO_FLOAT3(dRdy);
+ dD.dx = make_float3(dRdx.x, dRdx.y, dRdx.z);
+ dD.dy = make_float3(dRdy.x, dRdy.y, dRdy.z);
ray.dD = differential_make_compact(dD);
/* allocate trace data */
@@ -1755,11 +1693,13 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
return set_attribute_float3(sd->Ng, type, derivatives, val);
}
else if (name == u_P) {
- float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
+ const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
+ float3 f[3] = {sd->P, dP.dx, dP.dy};
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_I) {
- float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
+ const differential3 dI = differential_from_compact(sd->I, sd->dI);
+ float3 f[3] = {sd->I, dI.dx, dI.dy};
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_u) {
diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h
index edffd912bad..334b6682e34 100644
--- a/intern/cycles/kernel/osl/services.h
+++ b/intern/cycles/kernel/osl/services.h
@@ -76,6 +76,8 @@ class OSLRenderServices : public OSL::RendererServices {
OSLRenderServices(OSL::TextureSystem *texture_system);
~OSLRenderServices();
+ static void register_closures(OSL::ShadingSystem *ss);
+
bool get_matrix(OSL::ShaderGlobals *sg,
OSL::Matrix44 &result,
OSL::TransformationPtr xform,
@@ -321,7 +323,6 @@ class OSLRenderServices : public OSL::RendererServices {
* globals to be shared between different render sessions. This saves memory,
* and is required because texture handles are cached as part of the shared
* shading system. */
- OSL::TextureSystem *texture_system;
OSLTextureHandleMap textures;
};
diff --git a/intern/cycles/kernel/osl/shader.cpp b/intern/cycles/kernel/osl/shader.cpp
deleted file mode 100644
index af96c0070e3..00000000000
--- a/intern/cycles/kernel/osl/shader.cpp
+++ /dev/null
@@ -1,417 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright 2011-2022 Blender Foundation */
-
-#include <OSL/oslexec.h>
-
-// clang-format off
-#include "kernel/device/cpu/compat.h"
-#include "kernel/device/cpu/globals.h"
-
-#include "kernel/types.h"
-
-#include "kernel/geom/object.h"
-
-#include "kernel/integrator/state.h"
-
-#include "kernel/osl/closures.h"
-#include "kernel/osl/globals.h"
-#include "kernel/osl/services.h"
-#include "kernel/osl/shader.h"
-// clang-format on
-
-#include "scene/attribute.h"
-
-CCL_NAMESPACE_BEGIN
-
-/* Threads */
-
-void OSLShader::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals)
-{
- /* no osl used? */
- if (!osl_globals->use) {
- kg->osl = NULL;
- return;
- }
-
- /* Per thread kernel data init. */
- kg->osl = osl_globals;
-
- OSL::ShadingSystem *ss = kg->osl->ss;
- OSLThreadData *tdata = new OSLThreadData();
-
- memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
- tdata->globals.tracedata = &tdata->tracedata;
- tdata->globals.flipHandedness = false;
- tdata->osl_thread_info = ss->create_thread_info();
- tdata->context = ss->get_context(tdata->osl_thread_info);
-
- tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
-
- kg->osl_ss = (OSLShadingSystem *)ss;
- kg->osl_tdata = tdata;
-}
-
-void OSLShader::thread_free(KernelGlobalsCPU *kg)
-{
- if (!kg->osl)
- return;
-
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
- OSLThreadData *tdata = kg->osl_tdata;
- ss->release_context(tdata->context);
-
- ss->destroy_thread_info(tdata->osl_thread_info);
-
- delete tdata;
-
- kg->osl = NULL;
- kg->osl_ss = NULL;
- kg->osl_tdata = NULL;
-}
-
-/* Globals */
-
-static void shaderdata_to_shaderglobals(const KernelGlobalsCPU *kg,
- ShaderData *sd,
- const void *state,
- uint32_t path_flag,
- OSLThreadData *tdata)
-{
- OSL::ShaderGlobals *globals = &tdata->globals;
-
- /* copy from shader data to shader globals */
- globals->P = TO_VEC3(sd->P);
- globals->dPdx = TO_VEC3(sd->dP.dx);
- globals->dPdy = TO_VEC3(sd->dP.dy);
- globals->I = TO_VEC3(sd->I);
- globals->dIdx = TO_VEC3(sd->dI.dx);
- globals->dIdy = TO_VEC3(sd->dI.dy);
- globals->N = TO_VEC3(sd->N);
- globals->Ng = TO_VEC3(sd->Ng);
- globals->u = sd->u;
- globals->dudx = sd->du.dx;
- globals->dudy = sd->du.dy;
- globals->v = sd->v;
- globals->dvdx = sd->dv.dx;
- globals->dvdy = sd->dv.dy;
- globals->dPdu = TO_VEC3(sd->dPdu);
- globals->dPdv = TO_VEC3(sd->dPdv);
- globals->surfacearea = 1.0f;
- globals->time = sd->time;
-
- /* booleans */
- globals->raytype = path_flag;
- globals->backfacing = (sd->flag & SD_BACKFACING);
-
- /* shader data to be used in services callbacks */
- globals->renderstate = sd;
-
- /* hacky, we leave it to services to fetch actual object matrix */
- globals->shader2common = sd;
- globals->object2common = sd;
-
- /* must be set to NULL before execute */
- globals->Ci = NULL;
-
- /* clear trace data */
- tdata->tracedata.init = false;
-
- /* Used by render-services. */
- sd->osl_globals = kg;
- if (path_flag & PATH_RAY_SHADOW) {
- sd->osl_path_state = nullptr;
- sd->osl_shadow_path_state = (const IntegratorShadowStateCPU *)state;
- }
- else {
- sd->osl_path_state = (const IntegratorStateCPU *)state;
- sd->osl_shadow_path_state = nullptr;
- }
-}
-
-/* Surface */
-
-static void flatten_surface_closure_tree(ShaderData *sd,
- uint32_t path_flag,
- const OSL::ClosureColor *closure,
- float3 weight = make_float3(1.0f, 1.0f, 1.0f))
-{
- /* OSL gives us a closure tree, we flatten it into arrays per
- * closure type, for evaluation, sampling, etc later on. */
-
- switch (closure->id) {
- case OSL::ClosureColor::MUL: {
- OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_surface_closure_tree(sd, path_flag, mul->closure, TO_FLOAT3(mul->weight) * weight);
- break;
- }
- case OSL::ClosureColor::ADD: {
- OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
- flatten_surface_closure_tree(sd, path_flag, add->closureA, weight);
- flatten_surface_closure_tree(sd, path_flag, add->closureB, weight);
- break;
- }
- default: {
- OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
- CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
- if (prim) {
-#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
- weight = weight * TO_FLOAT3(comp->w);
-#endif
- prim->setup(sd, path_flag, weight);
- }
- break;
- }
- }
-}
-
-void OSLShader::eval_surface(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag)
-{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
- /* execute shader for this point */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
- int shader = sd->shader & SHADER_MASK;
-
- /* automatic bump shader */
- if (kg->osl->bump_state[shader]) {
- /* save state */
- float3 P = sd->P;
- float3 dPdx = sd->dP.dx;
- float3 dPdy = sd->dP.dy;
-
- /* set state as if undisplaced */
- if (sd->flag & SD_HAS_DISPLACEMENT) {
- float data[9];
- bool found = kg->osl->services->get_attribute(sd,
- true,
- OSLRenderServices::u_empty,
- TypeDesc::TypeVector,
- OSLRenderServices::u_geom_undisplaced,
- data);
- (void)found;
- assert(found);
-
- memcpy(&sd->P, data, sizeof(float) * 3);
- memcpy(&sd->dP.dx, data + 3, sizeof(float) * 3);
- memcpy(&sd->dP.dy, data + 6, sizeof(float) * 3);
-
- object_position_transform(kg, sd, &sd->P);
- object_dir_transform(kg, sd, &sd->dP.dx);
- object_dir_transform(kg, sd, &sd->dP.dy);
-
- globals->P = TO_VEC3(sd->P);
- globals->dPdx = TO_VEC3(sd->dP.dx);
- globals->dPdy = TO_VEC3(sd->dP.dy);
- }
-
- /* execute bump shader */
- ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
-
- /* reset state */
- sd->P = P;
- sd->dP.dx = dPdx;
- sd->dP.dy = dPdy;
-
- globals->P = TO_VEC3(P);
- globals->dPdx = TO_VEC3(dPdx);
- globals->dPdy = TO_VEC3(dPdy);
- }
-
- /* surface shader */
- if (kg->osl->surface_state[shader]) {
- ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
- }
-
- /* flatten closure tree */
- if (globals->Ci)
- flatten_surface_closure_tree(sd, path_flag, globals->Ci);
-}
-
-/* Background */
-
-static void flatten_background_closure_tree(ShaderData *sd,
- const OSL::ClosureColor *closure,
- float3 weight = make_float3(1.0f, 1.0f, 1.0f))
-{
- /* OSL gives us a closure tree, if we are shading for background there
- * is only one supported closure type at the moment, which has no evaluation
- * functions, so we just sum the weights */
-
- switch (closure->id) {
- case OSL::ClosureColor::MUL: {
- OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_background_closure_tree(sd, mul->closure, weight * TO_FLOAT3(mul->weight));
- break;
- }
- case OSL::ClosureColor::ADD: {
- OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
-
- flatten_background_closure_tree(sd, add->closureA, weight);
- flatten_background_closure_tree(sd, add->closureB, weight);
- break;
- }
- default: {
- OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
- CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
- if (prim) {
-#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
- weight = weight * TO_FLOAT3(comp->w);
-#endif
- prim->setup(sd, 0, weight);
- }
- break;
- }
- }
-}
-
-void OSLShader::eval_background(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag)
-{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
- /* execute shader for this point */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
-
- if (kg->osl->background_state) {
- ss->execute(octx, *(kg->osl->background_state), *globals);
- }
-
- /* return background color immediately */
- if (globals->Ci)
- flatten_background_closure_tree(sd, globals->Ci);
-}
-
-/* Volume */
-
-static void flatten_volume_closure_tree(ShaderData *sd,
- const OSL::ClosureColor *closure,
- float3 weight = make_float3(1.0f, 1.0f, 1.0f))
-{
- /* OSL gives us a closure tree, we flatten it into arrays per
- * closure type, for evaluation, sampling, etc later on. */
-
- switch (closure->id) {
- case OSL::ClosureColor::MUL: {
- OSL::ClosureMul *mul = (OSL::ClosureMul *)closure;
- flatten_volume_closure_tree(sd, mul->closure, TO_FLOAT3(mul->weight) * weight);
- break;
- }
- case OSL::ClosureColor::ADD: {
- OSL::ClosureAdd *add = (OSL::ClosureAdd *)closure;
- flatten_volume_closure_tree(sd, add->closureA, weight);
- flatten_volume_closure_tree(sd, add->closureB, weight);
- break;
- }
- default: {
- OSL::ClosureComponent *comp = (OSL::ClosureComponent *)closure;
- CClosurePrimitive *prim = (CClosurePrimitive *)comp->data();
-
- if (prim) {
-#ifdef OSL_SUPPORTS_WEIGHTED_CLOSURE_COMPONENTS
- weight = weight * TO_FLOAT3(comp->w);
-#endif
- prim->setup(sd, 0, weight);
- }
- }
- }
-}
-
-void OSLShader::eval_volume(const KernelGlobalsCPU *kg,
- const void *state,
- ShaderData *sd,
- uint32_t path_flag)
-{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
- shaderdata_to_shaderglobals(kg, sd, state, path_flag, tdata);
-
- /* execute shader */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
- int shader = sd->shader & SHADER_MASK;
-
- if (kg->osl->volume_state[shader]) {
- ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
- }
-
- /* flatten closure tree */
- if (globals->Ci)
- flatten_volume_closure_tree(sd, globals->Ci);
-}
-
-/* Displacement */
-
-void OSLShader::eval_displacement(const KernelGlobalsCPU *kg, const void *state, ShaderData *sd)
-{
- /* setup shader globals from shader data */
- OSLThreadData *tdata = kg->osl_tdata;
-
- shaderdata_to_shaderglobals(kg, sd, state, 0, tdata);
-
- /* execute shader */
- OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
- OSL::ShaderGlobals *globals = &tdata->globals;
- OSL::ShadingContext *octx = tdata->context;
- int shader = sd->shader & SHADER_MASK;
-
- if (kg->osl->displacement_state[shader]) {
- ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
- }
-
- /* get back position */
- sd->P = TO_FLOAT3(globals->P);
-}
-
-/* Attributes */
-
-int OSLShader::find_attribute(const KernelGlobalsCPU *kg,
- const ShaderData *sd,
- uint id,
- AttributeDescriptor *desc)
-{
- /* for OSL, a hash map is used to lookup the attribute by name. */
- int object = sd->object * ATTR_PRIM_TYPES;
-
- OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
- ustring stdname(std::string("geom:") +
- std::string(Attribute::standard_name((AttributeStandard)id)));
- OSLGlobals::AttributeMap::const_iterator it = attr_map.find(stdname);
-
- if (it != attr_map.end()) {
- const OSLGlobals::Attribute &osl_attr = it->second;
- *desc = osl_attr.desc;
-
- if (sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
- desc->offset = ATTR_STD_NOT_FOUND;
- return ATTR_STD_NOT_FOUND;
- }
-
- /* return result */
- if (osl_attr.desc.element == ATTR_ELEMENT_NONE) {
- desc->offset = ATTR_STD_NOT_FOUND;
- }
- return desc->offset;
- }
- else {
- desc->offset = ATTR_STD_NOT_FOUND;
- return (int)ATTR_STD_NOT_FOUND;
- }
-}
-
-CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/shaders/CMakeLists.txt b/intern/cycles/kernel/osl/shaders/CMakeLists.txt
index 741bce7c399..c79af3f6112 100644
--- a/intern/cycles/kernel/osl/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/shaders/CMakeLists.txt
@@ -57,6 +57,10 @@ set(SRC_OSL
node_math.osl
node_mix.osl
node_mix_closure.osl
+ node_mix_color.osl
+ node_mix_float.osl
+ node_mix_vector.osl
+ node_mix_vector_non_uniform.osl
node_musgrave_texture.osl
node_noise_texture.osl
node_normal.osl
@@ -109,6 +113,7 @@ file(GLOB SRC_OSL_HEADER_DIST ${OSL_SHADER_DIR}/*.h)
set(SRC_OSL_HEADERS
node_color.h
+ node_color_blend.h
node_fresnel.h
node_hash.h
node_math.h
diff --git a/intern/cycles/kernel/osl/shaders/node_color_blend.h b/intern/cycles/kernel/osl/shaders/node_color_blend.h
new file mode 100644
index 00000000000..ab4b4809a97
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_color_blend.h
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+color node_mix_blend(float t, color col1, color col2)
+{
+ return mix(col1, col2, t);
+}
+
+color node_mix_add(float t, color col1, color col2)
+{
+ return mix(col1, col1 + col2, t);
+}
+
+color node_mix_mul(float t, color col1, color col2)
+{
+ return mix(col1, col1 * col2, t);
+}
+
+color node_mix_screen(float t, color col1, color col2)
+{
+ float tm = 1.0 - t;
+
+ return color(1.0) - (color(tm) + t * (color(1.0) - col2)) * (color(1.0) - col1);
+}
+
+color node_mix_overlay(float t, color col1, color col2)
+{
+ float tm = 1.0 - t;
+
+ color outcol = col1;
+
+ if (outcol[0] < 0.5)
+ outcol[0] *= tm + 2.0 * t * col2[0];
+ else
+ outcol[0] = 1.0 - (tm + 2.0 * t * (1.0 - col2[0])) * (1.0 - outcol[0]);
+
+ if (outcol[1] < 0.5)
+ outcol[1] *= tm + 2.0 * t * col2[1];
+ else
+ outcol[1] = 1.0 - (tm + 2.0 * t * (1.0 - col2[1])) * (1.0 - outcol[1]);
+
+ if (outcol[2] < 0.5)
+ outcol[2] *= tm + 2.0 * t * col2[2];
+ else
+ outcol[2] = 1.0 - (tm + 2.0 * t * (1.0 - col2[2])) * (1.0 - outcol[2]);
+
+ return outcol;
+}
+
+color node_mix_sub(float t, color col1, color col2)
+{
+ return mix(col1, col1 - col2, t);
+}
+
+color node_mix_div(float t, color col1, color col2)
+{
+ float tm = 1.0 - t;
+
+ color outcol = col1;
+
+ if (col2[0] != 0.0)
+ outcol[0] = tm * outcol[0] + t * outcol[0] / col2[0];
+ if (col2[1] != 0.0)
+ outcol[1] = tm * outcol[1] + t * outcol[1] / col2[1];
+ if (col2[2] != 0.0)
+ outcol[2] = tm * outcol[2] + t * outcol[2] / col2[2];
+
+ return outcol;
+}
+
+color node_mix_diff(float t, color col1, color col2)
+{
+ return mix(col1, abs(col1 - col2), t);
+}
+
+color node_mix_dark(float t, color col1, color col2)
+{
+ return mix(col1, min(col1, col2), t);
+}
+
+color node_mix_light(float t, color col1, color col2)
+{
+ return mix(col1, max(col1, col2), t);
+}
+
+color node_mix_dodge(float t, color col1, color col2)
+{
+ color outcol = col1;
+
+ if (outcol[0] != 0.0) {
+ float tmp = 1.0 - t * col2[0];
+ if (tmp <= 0.0)
+ outcol[0] = 1.0;
+ else if ((tmp = outcol[0] / tmp) > 1.0)
+ outcol[0] = 1.0;
+ else
+ outcol[0] = tmp;
+ }
+ if (outcol[1] != 0.0) {
+ float tmp = 1.0 - t * col2[1];
+ if (tmp <= 0.0)
+ outcol[1] = 1.0;
+ else if ((tmp = outcol[1] / tmp) > 1.0)
+ outcol[1] = 1.0;
+ else
+ outcol[1] = tmp;
+ }
+ if (outcol[2] != 0.0) {
+ float tmp = 1.0 - t * col2[2];
+ if (tmp <= 0.0)
+ outcol[2] = 1.0;
+ else if ((tmp = outcol[2] / tmp) > 1.0)
+ outcol[2] = 1.0;
+ else
+ outcol[2] = tmp;
+ }
+
+ return outcol;
+}
+
+color node_mix_burn(float t, color col1, color col2)
+{
+ float tmp, tm = 1.0 - t;
+
+ color outcol = col1;
+
+ tmp = tm + t * col2[0];
+ if (tmp <= 0.0)
+ outcol[0] = 0.0;
+ else if ((tmp = (1.0 - (1.0 - outcol[0]) / tmp)) < 0.0)
+ outcol[0] = 0.0;
+ else if (tmp > 1.0)
+ outcol[0] = 1.0;
+ else
+ outcol[0] = tmp;
+
+ tmp = tm + t * col2[1];
+ if (tmp <= 0.0)
+ outcol[1] = 0.0;
+ else if ((tmp = (1.0 - (1.0 - outcol[1]) / tmp)) < 0.0)
+ outcol[1] = 0.0;
+ else if (tmp > 1.0)
+ outcol[1] = 1.0;
+ else
+ outcol[1] = tmp;
+
+ tmp = tm + t * col2[2];
+ if (tmp <= 0.0)
+ outcol[2] = 0.0;
+ else if ((tmp = (1.0 - (1.0 - outcol[2]) / tmp)) < 0.0)
+ outcol[2] = 0.0;
+ else if (tmp > 1.0)
+ outcol[2] = 1.0;
+ else
+ outcol[2] = tmp;
+
+ return outcol;
+}
+
+color node_mix_hue(float t, color col1, color col2)
+{
+ color outcol = col1;
+ color hsv2 = rgb_to_hsv(col2);
+
+ if (hsv2[1] != 0.0) {
+ color hsv = rgb_to_hsv(outcol);
+ hsv[0] = hsv2[0];
+ color tmp = hsv_to_rgb(hsv);
+
+ outcol = mix(outcol, tmp, t);
+ }
+
+ return outcol;
+}
+
+color node_mix_sat(float t, color col1, color col2)
+{
+ float tm = 1.0 - t;
+
+ color outcol = col1;
+
+ color hsv = rgb_to_hsv(outcol);
+
+ if (hsv[1] != 0.0) {
+ color hsv2 = rgb_to_hsv(col2);
+
+ hsv[1] = tm * hsv[1] + t * hsv2[1];
+ outcol = hsv_to_rgb(hsv);
+ }
+
+ return outcol;
+}
+
+color node_mix_val(float t, color col1, color col2)
+{
+ float tm = 1.0 - t;
+
+ color hsv = rgb_to_hsv(col1);
+ color hsv2 = rgb_to_hsv(col2);
+
+ hsv[2] = tm * hsv[2] + t * hsv2[2];
+
+ return hsv_to_rgb(hsv);
+}
+
+color node_mix_color(float t, color col1, color col2)
+{
+ color outcol = col1;
+ color hsv2 = rgb_to_hsv(col2);
+
+ if (hsv2[1] != 0.0) {
+ color hsv = rgb_to_hsv(outcol);
+ hsv[0] = hsv2[0];
+ hsv[1] = hsv2[1];
+ color tmp = hsv_to_rgb(hsv);
+
+ outcol = mix(outcol, tmp, t);
+ }
+
+ return outcol;
+}
+
+color node_mix_soft(float t, color col1, color col2)
+{
+ float tm = 1.0 - t;
+
+ color one = color(1.0);
+ color scr = one - (one - col2) * (one - col1);
+
+ return tm * col1 + t * ((one - col1) * col2 * col1 + col1 * scr);
+}
+
+color node_mix_linear(float t, color col1, color col2)
+{
+ color outcol = col1;
+
+ if (col2[0] > 0.5)
+ outcol[0] = col1[0] + t * (2.0 * (col2[0] - 0.5));
+ else
+ outcol[0] = col1[0] + t * (2.0 * (col2[0]) - 1.0);
+
+ if (col2[1] > 0.5)
+ outcol[1] = col1[1] + t * (2.0 * (col2[1] - 0.5));
+ else
+ outcol[1] = col1[1] + t * (2.0 * (col2[1]) - 1.0);
+
+ if (col2[2] > 0.5)
+ outcol[2] = col1[2] + t * (2.0 * (col2[2] - 0.5));
+ else
+ outcol[2] = col1[2] + t * (2.0 * (col2[2]) - 1.0);
+
+ return outcol;
+}
+
+color node_mix_clamp(color col)
+{
+ color outcol = col;
+
+ outcol[0] = clamp(col[0], 0.0, 1.0);
+ outcol[1] = clamp(col[1], 0.0, 1.0);
+ outcol[2] = clamp(col[2], 0.0, 1.0);
+
+ return outcol;
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_geometry.osl b/intern/cycles/kernel/osl/shaders/node_geometry.osl
index 23d4c2ee66f..cc891abd6e3 100644
--- a/intern/cycles/kernel/osl/shaders/node_geometry.osl
+++ b/intern/cycles/kernel/osl/shaders/node_geometry.osl
@@ -20,7 +20,7 @@ shader node_geometry(normal NormalIn = N,
Normal = NormalIn;
TrueNormal = Ng;
Incoming = I;
- Parametric = point(u, v, 0.0);
+ Parametric = point(1.0 - u - v, u, 0.0);
Backfacing = backfacing();
if (bump_offset == "dx") {
diff --git a/intern/cycles/kernel/osl/shaders/node_mix_color.osl b/intern/cycles/kernel/osl/shaders/node_mix_color.osl
new file mode 100644
index 00000000000..3ddd89ed306
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_mix_color.osl
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "node_color.h"
+#include "node_color_blend.h"
+#include "stdcycles.h"
+
+shader node_mix_color(string blend_type = "mix",
+ int use_clamp = 0,
+ int use_clamp_result = 0,
+ float Factor = 0.5,
+ color A = 0.0,
+ color B = 0.0,
+ output color Result = 0.0)
+{
+ float t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor;
+
+ if (blend_type == "mix")
+ Result = mix(A, B, t);
+ if (blend_type == "add")
+ Result = node_mix_add(t, A, B);
+ if (blend_type == "multiply")
+ Result = node_mix_mul(t, A, B);
+ if (blend_type == "screen")
+ Result = node_mix_screen(t, A, B);
+ if (blend_type == "overlay")
+ Result = node_mix_overlay(t, A, B);
+ if (blend_type == "subtract")
+ Result = node_mix_sub(t, A, B);
+ if (blend_type == "divide")
+ Result = node_mix_div(t, A, B);
+ if (blend_type == "difference")
+ Result = node_mix_diff(t, A, B);
+ if (blend_type == "darken")
+ Result = node_mix_dark(t, A, B);
+ if (blend_type == "lighten")
+ Result = node_mix_light(t, A, B);
+ if (blend_type == "dodge")
+ Result = node_mix_dodge(t, A, B);
+ if (blend_type == "burn")
+ Result = node_mix_burn(t, A, B);
+ if (blend_type == "hue")
+ Result = node_mix_hue(t, A, B);
+ if (blend_type == "saturation")
+ Result = node_mix_sat(t, A, B);
+ if (blend_type == "value")
+ Result = node_mix_val(t, A, B);
+ if (blend_type == "color")
+ Result = node_mix_color(t, A, B);
+ if (blend_type == "soft_light")
+ Result = node_mix_soft(t, A, B);
+ if (blend_type == "linear_light")
+ Result = node_mix_linear(t, A, B);
+
+ if (use_clamp_result)
+ Result = clamp(Result, 0.0, 1.0);
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_mix_float.osl b/intern/cycles/kernel/osl/shaders/node_mix_float.osl
new file mode 100644
index 00000000000..fdc7b4eff6e
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_mix_float.osl
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "stdcycles.h"
+
+shader node_mix_float(
+ int use_clamp = 0, float Factor = 0.5, float A = 0.0, float B = 0.0, output float Result = 0.0)
+{
+ float t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor;
+ Result = mix(A, B, t);
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_mix_vector.osl b/intern/cycles/kernel/osl/shaders/node_mix_vector.osl
new file mode 100644
index 00000000000..d76396afb0d
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_mix_vector.osl
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "stdcycles.h"
+
+shader node_mix_vector(int use_clamp = 0,
+ float Factor = 0.5,
+ vector A = 0.0,
+ vector B = 0.0,
+ output vector Result = 0.0)
+{
+ float t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor;
+ Result = mix(A, B, t);
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_mix_vector_non_uniform.osl b/intern/cycles/kernel/osl/shaders/node_mix_vector_non_uniform.osl
new file mode 100644
index 00000000000..217856bcf2a
--- /dev/null
+++ b/intern/cycles/kernel/osl/shaders/node_mix_vector_non_uniform.osl
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#include "stdcycles.h"
+
+shader node_mix_vector_non_uniform(int use_clamp = 0,
+ vector Factor = 0.5,
+ vector A = 0.0,
+ vector B = 0.0,
+ output vector Result = 0.0)
+{
+ vector t = (use_clamp) ? clamp(Factor, 0.0, 1.0) : Factor;
+ Result = mix(A, B, t);
+}
diff --git a/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl
index 391be8c14d7..fdda1ba9cd1 100644
--- a/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/osl/shaders/node_musgrave_texture.osl
@@ -114,13 +114,12 @@ float noise_musgrave_hybrid_multi_fractal_1d(
{
float p = co;
float pwHL = pow(lacunarity, -H);
- float pwr = pwHL;
- float value = safe_snoise(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0;
+ float value = 0.0;
+ float weight = 1.0;
- for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) {
if (weight > 1.0) {
weight = 1.0;
}
@@ -133,8 +132,12 @@ float noise_musgrave_hybrid_multi_fractal_1d(
}
float rmd = octaves - floor(octaves);
- if (rmd != 0.0) {
- value += rmd * ((safe_snoise(p) + offset) * pwr);
+ if ((rmd != 0.0) && (weight > 0.001)) {
+ if (weight > 1.0) {
+ weight = 1.0;
+ }
+ float signal = (safe_snoise(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
@@ -279,13 +282,12 @@ float noise_musgrave_hybrid_multi_fractal_2d(
{
vector2 p = co;
float pwHL = pow(lacunarity, -H);
- float pwr = pwHL;
- float value = safe_snoise(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0;
+ float value = 0.0;
+ float weight = 1.0;
- for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) {
if (weight > 1.0) {
weight = 1.0;
}
@@ -298,8 +300,12 @@ float noise_musgrave_hybrid_multi_fractal_2d(
}
float rmd = octaves - floor(octaves);
- if (rmd != 0.0) {
- value += rmd * ((safe_snoise(p) + offset) * pwr);
+ if ((rmd != 0.0) && (weight > 0.001)) {
+ if (weight > 1.0) {
+ weight = 1.0;
+ }
+ float signal = (safe_snoise(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
@@ -444,13 +450,12 @@ float noise_musgrave_hybrid_multi_fractal_3d(
{
vector3 p = co;
float pwHL = pow(lacunarity, -H);
- float pwr = pwHL;
- float value = safe_snoise(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0;
+ float value = 0.0;
+ float weight = 1.0;
- for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) {
if (weight > 1.0) {
weight = 1.0;
}
@@ -463,8 +468,12 @@ float noise_musgrave_hybrid_multi_fractal_3d(
}
float rmd = octaves - floor(octaves);
- if (rmd != 0.0) {
- value += rmd * ((safe_snoise(p) + offset) * pwr);
+ if ((rmd != 0.0) && (weight > 0.001)) {
+ if (weight > 1.0) {
+ weight = 1.0;
+ }
+ float signal = (safe_snoise(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
@@ -609,13 +618,12 @@ float noise_musgrave_hybrid_multi_fractal_4d(
{
vector4 p = co;
float pwHL = pow(lacunarity, -H);
- float pwr = pwHL;
- float value = safe_snoise(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0;
+ float value = 0.0;
+ float weight = 1.0;
- for (int i = 1; (weight > 0.001) && (i < (int)octaves); i++) {
+ for (int i = 0; (weight > 0.001) && (i < (int)octaves); i++) {
if (weight > 1.0) {
weight = 1.0;
}
@@ -628,8 +636,12 @@ float noise_musgrave_hybrid_multi_fractal_4d(
}
float rmd = octaves - floor(octaves);
- if (rmd != 0.0) {
- value += rmd * ((safe_snoise(p) + offset) * pwr);
+ if ((rmd != 0.0) && (weight > 0.001)) {
+ if (weight > 1.0) {
+ weight = 1.0;
+ }
+ float signal = (safe_snoise(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
diff --git a/intern/cycles/kernel/osl/types.h b/intern/cycles/kernel/osl/types.h
new file mode 100644
index 00000000000..46e06114360
--- /dev/null
+++ b/intern/cycles/kernel/osl/types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+CCL_NAMESPACE_BEGIN
+
+/* Closure */
+
+enum ClosureTypeOSL {
+ OSL_CLOSURE_MUL_ID = -1,
+ OSL_CLOSURE_ADD_ID = -2,
+
+ OSL_CLOSURE_NONE_ID = 0,
+
+#define OSL_CLOSURE_STRUCT_BEGIN(Upper, lower) OSL_CLOSURE_##Upper##_ID,
+#include "closures_template.h"
+};
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/jitter.h b/intern/cycles/kernel/sample/jitter.h
index b5cfa624406..e748f95fc7d 100644
--- a/intern/cycles/kernel/sample/jitter.h
+++ b/intern/cycles/kernel/sample/jitter.h
@@ -1,182 +1,97 @@
/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
+#include "kernel/sample/util.h"
+#include "util/hash.h"
+
#pragma once
CCL_NAMESPACE_BEGIN
-ccl_device_inline uint32_t laine_karras_permutation(uint32_t x, uint32_t seed)
+ccl_device float pmj_sample_1D(KernelGlobals kg,
+ uint sample,
+ const uint rng_hash,
+ const uint dimension)
{
- x += seed;
- x ^= (x * 0x6c50b47cu);
- x ^= x * 0xb82f1e52u;
- x ^= x * 0xc7afe638u;
- x ^= x * 0x8d22f6e6u;
+ uint seed = rng_hash;
- return x;
-}
+ /* Use the same sample sequence seed for all pixels when using
+ * scrambling distance. */
+ if (kernel_data.integrator.scrambling_distance < 1.0f) {
+ seed = kernel_data.integrator.seed;
+ }
-ccl_device_inline uint32_t nested_uniform_scramble(uint32_t x, uint32_t seed)
-{
- x = reverse_integer_bits(x);
- x = laine_karras_permutation(x, seed);
- x = reverse_integer_bits(x);
+ /* Shuffle the pattern order and sample index to better decorrelate
+ * dimensions and make the most of the finite patterns we have.
+ * The funky sample mask stuff is to ensure that we only shuffle
+ * *within* the current sample pattern, which is necessary to avoid
+ * early repeat pattern use. */
+ const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+ /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
+ const uint sample_mask = NUM_PMJ_SAMPLES - 1;
+ const uint sample_shuffled = nested_uniform_scramble(sample,
+ hash_wang_seeded_uint(dimension, seed));
+ sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
+
+ /* Fetch the sample. */
+ const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
+ (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
+ float x = kernel_data_fetch(sample_pattern_lut, index * 2);
+
+ /* Do limited Cranley-Patterson rotation when using scrambling distance. */
+ if (kernel_data.integrator.scrambling_distance < 1.0f) {
+ const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
+ kernel_data.integrator.scrambling_distance;
+ x += jitter_x;
+ x -= floorf(x);
+ }
return x;
}
-ccl_device_inline uint cmj_hash(uint i, uint p)
+ccl_device float2 pmj_sample_2D(KernelGlobals kg,
+ uint sample,
+ const uint rng_hash,
+ const uint dimension)
{
- i ^= p;
- i ^= i >> 17;
- i ^= i >> 10;
- i *= 0xb36534e5;
- i ^= i >> 12;
- i ^= i >> 21;
- i *= 0x93fc4795;
- i ^= 0xdf6e307f;
- i ^= i >> 17;
- i *= 1 | p >> 18;
-
- return i;
-}
-
-ccl_device_inline uint cmj_hash_simple(uint i, uint p)
-{
- i = (i ^ 61) ^ p;
- i += i << 3;
- i ^= i >> 4;
- i *= 0x27d4eb2d;
- return i;
-}
-
-ccl_device_inline float cmj_randfloat(uint i, uint p)
-{
- return cmj_hash(i, p) * (1.0f / 4294967808.0f);
-}
-
-ccl_device_inline float cmj_randfloat_simple(uint i, uint p)
-{
- return cmj_hash_simple(i, p) * (1.0f / (float)0xFFFFFFFF);
-}
+ uint seed = rng_hash;
-ccl_device_inline float cmj_randfloat_simple_dist(uint i, uint p, float d)
-{
- return cmj_hash_simple(i, p) * (d / (float)0xFFFFFFFF);
-}
-
-ccl_device float pmj_sample_1D(KernelGlobals kg, uint sample, uint rng_hash, uint dimension)
-{
- uint hash = rng_hash;
- float jitter_x = 0.0f;
+ /* Use the same sample sequence seed for all pixels when using
+ * scrambling distance. */
if (kernel_data.integrator.scrambling_distance < 1.0f) {
- hash = kernel_data.integrator.seed;
-
- jitter_x = cmj_randfloat_simple_dist(
- dimension, rng_hash, kernel_data.integrator.scrambling_distance);
+ seed = kernel_data.integrator.seed;
}
- /* Perform Owen shuffle of the sample number to reorder the samples. */
-#ifdef _SIMPLE_HASH_
- const uint rv = cmj_hash_simple(dimension, hash);
-#else /* Use a _REGULAR_HASH_. */
- const uint rv = cmj_hash(dimension, hash);
-#endif
-#ifdef _XOR_SHUFFLE_
-# warning "Using XOR shuffle."
- const uint s = sample ^ rv;
-#else /* Use _OWEN_SHUFFLE_ for reordering. */
- const uint s = nested_uniform_scramble(sample, rv);
-#endif
-
- /* Based on the sample number a sample pattern is selected and offset by the dimension. */
- const uint sample_set = s / NUM_PMJ_SAMPLES;
- const uint d = (dimension + sample_set);
- const uint dim = d % NUM_PMJ_PATTERNS;
-
- /* The PMJ sample sets contain a sample with (x,y) with NUM_PMJ_SAMPLES so for 1D
- * the x part is used for even dims and the y for odd. */
- int index = 2 * ((dim >> 1) * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES)) + (dim & 1);
-
- float fx = kernel_data_fetch(sample_pattern_lut, index);
-
-#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
- /* Use Cranley-Patterson rotation to displace the sample pattern. */
-# ifdef _SIMPLE_HASH_
- float dx = cmj_randfloat_simple(d, hash);
-# else
- float dx = cmj_randfloat(d, hash);
-# endif
- /* Jitter sample locations and map back into [0 1]. */
- fx = fx + dx + jitter_x;
- fx = fx - floorf(fx);
-#else
-# warning "Not using Cranley-Patterson Rotation."
-#endif
-
- return fx;
-}
-
-ccl_device void pmj_sample_2D(KernelGlobals kg,
- uint sample,
- uint rng_hash,
- uint dimension,
- ccl_private float *x,
- ccl_private float *y)
-{
- uint hash = rng_hash;
- float jitter_x = 0.0f;
- float jitter_y = 0.0f;
+ /* Shuffle the pattern order and sample index to better decorrelate
+ * dimensions and make the most of the finite patterns we have.
+ * The funky sample mask stuff is to ensure that we only shuffle
+ * *within* the current sample pattern, which is necessary to avoid
+ * early repeat pattern use. */
+ const uint pattern_i = hash_shuffle_uint(dimension, NUM_PMJ_PATTERNS, seed);
+ /* NUM_PMJ_SAMPLES should be a power of two, so this results in a mask. */
+ const uint sample_mask = NUM_PMJ_SAMPLES - 1;
+ const uint sample_shuffled = nested_uniform_scramble(sample,
+ hash_wang_seeded_uint(dimension, seed));
+ sample = (sample & ~sample_mask) | (sample_shuffled & sample_mask);
+
+ /* Fetch the sample. */
+ const uint index = ((pattern_i * NUM_PMJ_SAMPLES) + sample) %
+ (NUM_PMJ_SAMPLES * NUM_PMJ_PATTERNS);
+ float x = kernel_data_fetch(sample_pattern_lut, index * 2);
+ float y = kernel_data_fetch(sample_pattern_lut, index * 2 + 1);
+
+ /* Do limited Cranley-Patterson rotation when using scrambling distance. */
if (kernel_data.integrator.scrambling_distance < 1.0f) {
- hash = kernel_data.integrator.seed;
-
- jitter_x = cmj_randfloat_simple_dist(
- dimension, rng_hash, kernel_data.integrator.scrambling_distance);
- jitter_y = cmj_randfloat_simple_dist(
- dimension + 1, rng_hash, kernel_data.integrator.scrambling_distance);
+ const float jitter_x = hash_wang_seeded_float(dimension, rng_hash) *
+ kernel_data.integrator.scrambling_distance;
+ const float jitter_y = hash_wang_seeded_float(dimension, rng_hash ^ 0xca0e1151) *
+ kernel_data.integrator.scrambling_distance;
+ x += jitter_x;
+ y += jitter_y;
+ x -= floorf(x);
+ y -= floorf(y);
}
- /* Perform a shuffle on the sample number to reorder the samples. */
-#ifdef _SIMPLE_HASH_
- const uint rv = cmj_hash_simple(dimension, hash);
-#else /* Use a _REGULAR_HASH_. */
- const uint rv = cmj_hash(dimension, hash);
-#endif
-#ifdef _XOR_SHUFFLE_
-# warning "Using XOR shuffle."
- const uint s = sample ^ rv;
-#else /* Use _OWEN_SHUFFLE_ for reordering. */
- const uint s = nested_uniform_scramble(sample, rv);
-#endif
-
- /* Based on the sample number a sample pattern is selected and offset by the dimension. */
- const uint sample_set = s / NUM_PMJ_SAMPLES;
- const uint d = dimension + sample_set;
- uint dim = d % NUM_PMJ_PATTERNS;
- int index = 2 * (dim * NUM_PMJ_SAMPLES + (s % NUM_PMJ_SAMPLES));
-
- float fx = kernel_data_fetch(sample_pattern_lut, index);
- float fy = kernel_data_fetch(sample_pattern_lut, index + 1);
-
-#ifndef _NO_CRANLEY_PATTERSON_ROTATION_
- /* Use Cranley-Patterson rotation to displace the sample pattern. */
-# ifdef _SIMPLE_HASH_
- float dx = cmj_randfloat_simple(d, hash);
- float dy = cmj_randfloat_simple(d + 1, hash);
-# else
- float dx = cmj_randfloat(d, hash);
- float dy = cmj_randfloat(d + 1, hash);
-# endif
- /* Jitter sample locations and map back to the unit square [0 1]x[0 1]. */
- float sx = fx + dx + jitter_x;
- float sy = fy + dy + jitter_y;
- sx = sx - floorf(sx);
- sy = sy - floorf(sy);
-#else
-# warning "Not using Cranley Patterson Rotation."
-#endif
-
- (*x) = sx;
- (*y) = sy;
+ return make_float2(x, y);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/pattern.h b/intern/cycles/kernel/sample/pattern.h
index 89500d51872..e12f333b3a5 100644
--- a/intern/cycles/kernel/sample/pattern.h
+++ b/intern/cycles/kernel/sample/pattern.h
@@ -4,6 +4,7 @@
#pragma once
#include "kernel/sample/jitter.h"
+#include "kernel/sample/sobol_burley.h"
#include "util/hash.h"
CCL_NAMESPACE_BEGIN
@@ -12,33 +13,6 @@ CCL_NAMESPACE_BEGIN
* this single threaded on a CPU for repeatable results. */
//#define __DEBUG_CORRELATION__
-/* High Dimensional Sobol.
- *
- * Multidimensional sobol with generator matrices. Dimension 0 and 1 are equal
- * to classic Van der Corput and Sobol sequences. */
-
-#ifdef __SOBOL__
-
-/* Skip initial numbers that for some dimensions have clear patterns that
- * don't cover the entire sample space. Ideally we would have a better
- * progressive pattern that doesn't suffer from this problem, because even
- * with this offset some dimensions are quite poor.
- */
-# define SOBOL_SKIP 64
-
-ccl_device uint sobol_dimension(KernelGlobals kg, int index, int dimension)
-{
- uint result = 0;
- uint i = index + SOBOL_SKIP;
- for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
- j += x;
- result ^= __float_as_uint(kernel_data_fetch(sample_pattern_lut, 32 * dimension + j - 1));
- }
- return result;
-}
-
-#endif /* __SOBOL__ */
-
ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
uint rng_hash,
int sample,
@@ -48,58 +22,29 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals kg,
return (float)drand48();
#endif
-#ifdef __SOBOL__
- if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
-#endif
- {
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
+ return sobol_burley_sample_1D(sample, dimension, rng_hash);
+ }
+ else {
return pmj_sample_1D(kg, sample, rng_hash, dimension);
}
-
-#ifdef __SOBOL__
- /* Sobol sequence value using direction vectors. */
- uint result = sobol_dimension(kg, sample, dimension);
- float r = (float)result * (1.0f / (float)0xFFFFFFFF);
-
- /* Cranly-Patterson rotation using rng seed */
- float shift;
-
- /* Hash rng with dimension to solve correlation issues.
- * See T38710, T50116.
- */
- uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
- shift = tmp_rng * (kernel_data.integrator.scrambling_distance / (float)0xFFFFFFFF);
-
- return r + shift - floorf(r + shift);
-#endif
}
-ccl_device_forceinline void path_rng_2D(KernelGlobals kg,
- uint rng_hash,
- int sample,
- int dimension,
- ccl_private float *fx,
- ccl_private float *fy)
+ccl_device_forceinline float2 path_rng_2D(KernelGlobals kg,
+ uint rng_hash,
+ int sample,
+ int dimension)
{
#ifdef __DEBUG_CORRELATION__
- *fx = (float)drand48();
- *fy = (float)drand48();
- return;
-#endif
-
-#ifdef __SOBOL__
- if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ)
+ return make_float2((float)drand48(), (float)drand48());
#endif
- {
- pmj_sample_2D(kg, sample, rng_hash, dimension, fx, fy);
- return;
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_SOBOL_BURLEY) {
+ return sobol_burley_sample_2D(sample, dimension, rng_hash);
+ }
+ else {
+ return pmj_sample_2D(kg, sample, rng_hash, dimension);
}
-
-#ifdef __SOBOL__
- /* Sobol. */
- *fx = path_rng_1D(kg, rng_hash, sample, dimension);
- *fy = path_rng_1D(kg, rng_hash, sample, dimension + 1);
-#endif
}
/**
@@ -145,18 +90,33 @@ ccl_device_inline uint path_rng_hash_init(KernelGlobals kg,
return rng_hash;
}
-ccl_device_inline bool sample_is_even(int pattern, int sample)
+/**
+ * Splits samples into two different classes, A and B, which can be
+ * compared for variance estimation.
+ */
+ccl_device_inline bool sample_is_class_A(int pattern, int sample)
{
- if (pattern == SAMPLING_PATTERN_PMJ) {
- /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
- * We can use this to get divide sample sequence into two classes for easier variance
- * estimation. */
- return popcount(uint(sample) & 0xaaaaaaaa) & 1;
- }
- else {
- /* TODO(Stefan): Are there reliable ways of dividing CMJ and Sobol into two classes? */
- return sample & 0x1;
+#if 0
+ if (!(pattern == SAMPLING_PATTERN_PMJ || pattern == SAMPLING_PATTERN_SOBOL_BURLEY)) {
+ /* Fallback: assign samples randomly.
+ * This is guaranteed to work "okay" for any sampler, but isn't good.
+ * (NOTE: the seed constant is just a random number to guard against
+ * possible interactions with other uses of the hash. There's nothing
+ * special about it.)
+ */
+ return hash_hp_seeded_uint(sample, 0xa771f873) & 1;
}
-}
+#else
+ (void)pattern;
+#endif
+ /* This follows the approach from section 10.2.1 of "Progressive
+ * Multi-Jittered Sample Sequences" by Christensen et al., but
+ * implemented with efficient bit-fiddling.
+ *
+ * This approach also turns out to work equally well with Sobol-Burley
+ * (see https://developer.blender.org/D15746#429471).
+ */
+ return popcount(uint(sample) & 0xaaaaaaaa) & 1;
+}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/sobol_burley.h b/intern/cycles/kernel/sample/sobol_burley.h
new file mode 100644
index 00000000000..47796ae7998
--- /dev/null
+++ b/intern/cycles/kernel/sample/sobol_burley.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+/*
+ * A shuffled, Owen-scrambled Sobol sampler, implemented with the
+ * techniques from the paper "Practical Hash-based Owen Scrambling"
+ * by Brent Burley, 2020, Journal of Computer Graphics Techniques.
+ *
+ * Note that unlike a standard high-dimensional Sobol sequence, this
+ * Sobol sampler uses padding to achieve higher dimensions, as described
+ * in Burley's paper.
+ */
+
+#pragma once
+
+#include "kernel/sample/util.h"
+#include "util/hash.h"
+#include "util/math.h"
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/*
+ * Computes a single dimension of a sample from an Owen-scrambled
+ * Sobol sequence. This is used in the main sampling functions,
+ * sobol_burley_sample_#D(), below.
+ *
+ * - rev_bit_index: the sample index, with reversed order bits.
+ * - dimension: the sample dimension.
+ * - scramble_seed: the Owen scrambling seed.
+ *
+ * Note that the seed must be well randomized before being
+ * passed to this function.
+ */
+ccl_device_forceinline float sobol_burley(uint rev_bit_index,
+ const uint dimension,
+ const uint scramble_seed)
+{
+ uint result = 0;
+
+ if (dimension == 0) {
+ /* Fast-path for dimension 0, which is just Van der corput.
+ * This makes a notable difference in performance since we reuse
+ * dimensions for padding, and dimension 0 is reused the most. */
+ result = reverse_integer_bits(rev_bit_index);
+ }
+ else {
+ uint i = 0;
+ while (rev_bit_index != 0) {
+ uint j = count_leading_zeros(rev_bit_index);
+ result ^= sobol_burley_table[dimension][i + j];
+ i += j + 1;
+
+ /* We can't do "<<= j + 1" because that can overflow the shift
+ * operator, which doesn't do what we need on at least x86. */
+ rev_bit_index <<= j;
+ rev_bit_index <<= 1;
+ }
+ }
+
+ /* Apply Owen scrambling. */
+ result = reverse_integer_bits(reversed_bit_owen(result, scramble_seed));
+
+ return uint_to_float_excl(result);
+}
+
+/*
+ * Computes a 1D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device float sobol_burley_sample_1D(uint index, uint const dimension, uint seed)
+{
+ /* Include the dimension in the seed, so we get decorrelated
+ * sequences for different dimensions via shuffling. */
+ seed ^= hash_hp_uint(dimension);
+
+ /* Shuffle. */
+ index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xbff95bfe);
+
+ return sobol_burley(index, 0, seed ^ 0x635c77bd);
+}
+
+/*
+ * Computes a 2D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device float2 sobol_burley_sample_2D(uint index, const uint dimension_set, uint seed)
+{
+ /* Include the dimension set in the seed, so we get decorrelated
+ * sequences for different dimension sets via shuffling. */
+ seed ^= hash_hp_uint(dimension_set);
+
+ /* Shuffle. */
+ index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xf8ade99a);
+
+ return make_float2(sobol_burley(index, 0, seed ^ 0xe0aaaf76),
+ sobol_burley(index, 1, seed ^ 0x94964d4e));
+}
+
+/*
+ * Computes a 3D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device float3 sobol_burley_sample_3D(uint index, const uint dimension_set, uint seed)
+{
+ /* Include the dimension set in the seed, so we get decorrelated
+ * sequences for different dimension sets via shuffling. */
+ seed ^= hash_hp_uint(dimension_set);
+
+ /* Shuffle. */
+ index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xcaa726ac);
+
+ return make_float3(sobol_burley(index, 0, seed ^ 0x9e78e391),
+ sobol_burley(index, 1, seed ^ 0x67c33241),
+ sobol_burley(index, 2, seed ^ 0x78c395c5));
+}
+
+/*
+ * Computes a 4D Owen-scrambled and shuffled Sobol sample.
+ */
+ccl_device float4 sobol_burley_sample_4D(uint index, const uint dimension_set, uint seed)
+{
+ /* Include the dimension set in the seed, so we get decorrelated
+ * sequences for different dimension sets via shuffling. */
+ seed ^= hash_hp_uint(dimension_set);
+
+ /* Shuffle. */
+ index = reversed_bit_owen(reverse_integer_bits(index), seed ^ 0xc2c1a055);
+
+ return make_float4(sobol_burley(index, 0, seed ^ 0x39468210),
+ sobol_burley(index, 1, seed ^ 0xe9d8a845),
+ sobol_burley(index, 2, seed ^ 0x5f32b482),
+ sobol_burley(index, 3, seed ^ 0x1524cc56));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/sample/util.h b/intern/cycles/kernel/sample/util.h
new file mode 100644
index 00000000000..29cda179aa2
--- /dev/null
+++ b/intern/cycles/kernel/sample/util.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2011-2022 Blender Foundation */
+
+#pragma once
+
+#include "util/types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/*
+ * Performs base-2 Owen scrambling on a reversed-bit unsigned integer.
+ *
+ * This is equivalent to the Laine-Karras permutation, but much higher
+ * quality. See https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
+ */
+ccl_device_inline uint reversed_bit_owen(uint n, uint seed)
+{
+ n ^= n * 0x3d20adea;
+ n += seed;
+ n *= (seed >> 16) | 1;
+ n ^= n * 0x05526c56;
+ n ^= n * 0x53a22864;
+
+ return n;
+}
+
+/*
+ * Performs base-2 Owen scrambling on an unsigned integer.
+ */
+ccl_device_inline uint nested_uniform_scramble(uint i, uint seed)
+{
+ return reverse_integer_bits(reversed_bit_owen(reverse_integer_bits(i), seed));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/ao.h b/intern/cycles/kernel/svm/ao.h
index c57c68d6230..70f52de789b 100644
--- a/intern/cycles/kernel/svm/ao.h
+++ b/intern/cycles/kernel/svm/ao.h
@@ -49,10 +49,10 @@ ccl_device float svm_ao(
int unoccluded = 0;
for (int sample = 0; sample < num_samples; sample++) {
- float disk_u, disk_v;
- path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+ const float2 rand_disk = path_branched_rng_2D(
+ kg, &rng_state, sample, num_samples, PRNG_SURFACE_AO);
- float2 d = concentric_sample_disk(disk_u, disk_v);
+ float2 d = concentric_sample_disk(rand_disk.x, rand_disk.y);
float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
/* Create ray. */
diff --git a/intern/cycles/kernel/svm/aov.h b/intern/cycles/kernel/svm/aov.h
index 9b818f0e6f8..c574b28c078 100644
--- a/intern/cycles/kernel/svm/aov.h
+++ b/intern/cycles/kernel/svm/aov.h
@@ -3,7 +3,7 @@
#pragma once
-#include "kernel/film/write_passes.h"
+#include "kernel/film/aov_passes.h"
CCL_NAMESPACE_BEGIN
@@ -27,12 +27,7 @@ ccl_device void svm_node_aov_color(KernelGlobals kg,
IF_KERNEL_NODES_FEATURE(AOV)
{
const float3 val = stack_load_float3(stack, node.y);
- const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
- const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
- kernel_data.film.pass_stride;
- ccl_global float *buffer = render_buffer + render_buffer_offset +
- (kernel_data.film.pass_aov_color + node.z);
- kernel_write_pass_float4(buffer, make_float4(val.x, val.y, val.z, 1.0f));
+ film_write_aov_pass_color(kg, state, render_buffer, node.z, val);
}
}
@@ -47,12 +42,7 @@ ccl_device void svm_node_aov_value(KernelGlobals kg,
IF_KERNEL_NODES_FEATURE(AOV)
{
const float val = stack_load_float(stack, node.y);
- const uint32_t render_pixel_index = INTEGRATOR_STATE(state, path, render_pixel_index);
- const uint64_t render_buffer_offset = (uint64_t)render_pixel_index *
- kernel_data.film.pass_stride;
- ccl_global float *buffer = render_buffer + render_buffer_offset +
- (kernel_data.film.pass_aov_value + node.z);
- kernel_write_pass_float(buffer, val);
+ film_write_aov_pass_value(kg, state, render_buffer, node.z, val);
}
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/attribute.h b/intern/cycles/kernel/svm/attribute.h
index a3609d8b4b0..5f0d1609f08 100644
--- a/intern/cycles/kernel/svm/attribute.h
+++ b/intern/cycles/kernel/svm/attribute.h
@@ -140,6 +140,16 @@ ccl_device_noinline void svm_node_attr(KernelGlobals kg,
}
}
+ccl_device_forceinline float3 svm_node_bump_P_dx(const ccl_private ShaderData *sd)
+{
+ return sd->P + differential_from_compact(sd->Ng, sd->dP).dx;
+}
+
+ccl_device_forceinline float3 svm_node_bump_P_dy(const ccl_private ShaderData *sd)
+{
+ return sd->P + differential_from_compact(sd->Ng, sd->dP).dy;
+}
+
ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
@@ -167,7 +177,7 @@ ccl_device_noinline void svm_node_attr_bump_dx(KernelGlobals kg,
if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
/* No generated attribute, fall back to object coordinates. */
- float3 f = sd->P + sd->dP.dx;
+ float3 f = svm_node_bump_P_dx(sd);
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &f);
}
@@ -265,7 +275,7 @@ ccl_device_noinline void svm_node_attr_bump_dy(KernelGlobals kg,
if (node.y == ATTR_STD_GENERATED && desc.element == ATTR_ELEMENT_NONE) {
/* No generated attribute, fall back to object coordinates. */
- float3 f = sd->P + sd->dP.dy;
+ float3 f = svm_node_bump_P_dy(sd);
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &f);
}
diff --git a/intern/cycles/kernel/svm/bevel.h b/intern/cycles/kernel/svm/bevel.h
index 4617a056a52..c1e227959f8 100644
--- a/intern/cycles/kernel/svm/bevel.h
+++ b/intern/cycles/kernel/svm/bevel.h
@@ -128,8 +128,8 @@ ccl_device float3 svm_bevel(
path_state_rng_load(state, &rng_state);
for (int sample = 0; sample < num_samples; sample++) {
- float disk_u, disk_v;
- path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
+ float2 rand_disk = path_branched_rng_2D(
+ kg, &rng_state, sample, num_samples, PRNG_SURFACE_BEVEL);
/* Pick random axis in local frame and point on disk. */
float3 disk_N, disk_T, disk_B;
@@ -138,13 +138,13 @@ ccl_device float3 svm_bevel(
disk_N = sd->Ng;
make_orthonormals(disk_N, &disk_T, &disk_B);
- float axisu = disk_u;
+ float axisu = rand_disk.x;
if (axisu < 0.5f) {
pick_pdf_N = 0.5f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.25f;
- disk_u *= 2.0f;
+ rand_disk.x *= 2.0f;
}
else if (axisu < 0.75f) {
float3 tmp = disk_N;
@@ -153,7 +153,7 @@ ccl_device float3 svm_bevel(
pick_pdf_N = 0.25f;
pick_pdf_T = 0.5f;
pick_pdf_B = 0.25f;
- disk_u = (disk_u - 0.5f) * 4.0f;
+ rand_disk.x = (rand_disk.x - 0.5f) * 4.0f;
}
else {
float3 tmp = disk_N;
@@ -162,12 +162,12 @@ ccl_device float3 svm_bevel(
pick_pdf_N = 0.25f;
pick_pdf_T = 0.25f;
pick_pdf_B = 0.5f;
- disk_u = (disk_u - 0.75f) * 4.0f;
+ rand_disk.x = (rand_disk.x - 0.75f) * 4.0f;
}
/* Sample point on disk. */
- float phi = M_2PI_F * disk_u;
- float disk_r = disk_v;
+ float phi = M_2PI_F * rand_disk.x;
+ float disk_r = rand_disk.y;
float disk_height;
/* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
diff --git a/intern/cycles/kernel/svm/bump.h b/intern/cycles/kernel/svm/bump.h
index 566c45f5f25..1009a6a4241 100644
--- a/intern/cycles/kernel/svm/bump.h
+++ b/intern/cycles/kernel/svm/bump.h
@@ -14,23 +14,21 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
{
/* save state */
stack_store_float3(stack, offset + 0, sd->P);
- stack_store_float3(stack, offset + 3, sd->dP.dx);
- stack_store_float3(stack, offset + 6, sd->dP.dy);
+ stack_store_float(stack, offset + 3, sd->dP);
/* set state as if undisplaced */
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
if (desc.offset != ATTR_STD_NOT_FOUND) {
- float3 P, dPdx, dPdy;
- P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy);
+ differential3 dP;
+ float3 P = primitive_surface_attribute_float3(kg, sd, desc, &dP.dx, &dP.dy);
object_position_transform(kg, sd, &P);
- object_dir_transform(kg, sd, &dPdx);
- object_dir_transform(kg, sd, &dPdy);
+ object_dir_transform(kg, sd, &dP.dx);
+ object_dir_transform(kg, sd, &dP.dy);
sd->P = P;
- sd->dP.dx = dPdx;
- sd->dP.dy = dPdy;
+ sd->dP = differential_make_compact(dP);
}
}
@@ -41,8 +39,7 @@ ccl_device_noinline void svm_node_leave_bump_eval(KernelGlobals kg,
{
/* restore state */
sd->P = stack_load_float3(stack, offset + 0);
- sd->dP.dx = stack_load_float3(stack, offset + 3);
- sd->dP.dy = stack_load_float3(stack, offset + 6);
+ sd->dP = stack_load_float(stack, offset + 3);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h
index d8cd8d6f5c2..82274adeea8 100644
--- a/intern/cycles/kernel/svm/closure.h
+++ b/intern/cycles/kernel/svm/closure.h
@@ -3,9 +3,14 @@
#pragma once
-#ifdef __PRINCIPLED__
-# include "kernel/svm/closure_principled.h"
-#endif
+#include "kernel/closure/alloc.h"
+#include "kernel/closure/bsdf.h"
+#include "kernel/closure/bsdf_util.h"
+#include "kernel/closure/emissive.h"
+
+#include "kernel/util/color.h"
+
+#include "kernel/svm/closure_principled.h"
CCL_NAMESPACE_BEGIN
@@ -28,7 +33,7 @@ ccl_device void svm_node_glass_setup(ccl_private ShaderData *sd,
else {
bsdf->alpha_y = 0.0f;
bsdf->alpha_x = 0.0f;
- bsdf->ior = 0.0f;
+ bsdf->ior = eta;
sd->flag |= bsdf_reflection_setup(bsdf);
}
}
@@ -84,13 +89,11 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
return offset;
}
-#ifdef __PRINCIPLED__
if (type == CLOSURE_BSDF_PRINCIPLED_ID) {
/* Principled BSDF uses different parameter packing. */
svm_node_closure_principled(kg, sd, stack, node, data_node, mix_weight, path_flag, &offset);
return offset;
}
-#endif /* __PRINCIPLED__ */
float3 N = stack_valid(data_node.x) ? stack_load_float3(stack, data_node.x) : sd->N;
if (!(sd->type & PRIMITIVE_CURVE)) {
@@ -104,7 +107,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
switch (type) {
case CLOSURE_BSDF_DIFFUSE_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private OrenNayarBsdf *bsdf = (ccl_private OrenNayarBsdf *)bsdf_alloc(
sd, sizeof(OrenNayarBsdf), weight);
@@ -124,7 +127,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
break;
}
case CLOSURE_BSDF_TRANSLUCENT_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private DiffuseBsdf *bsdf = (ccl_private DiffuseBsdf *)bsdf_alloc(
sd, sizeof(DiffuseBsdf), weight);
@@ -135,7 +138,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
break;
}
case CLOSURE_BSDF_TRANSPARENT_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
bsdf_transparent_setup(sd, weight, path_flag);
break;
}
@@ -148,7 +151,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (!kernel_data.integrator.caustics_reflective && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
sd, sizeof(MicrofacetBsdf), weight);
@@ -159,11 +162,11 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
float roughness = sqr(param1);
bsdf->N = N;
- bsdf->ior = 0.0f;
+ bsdf->ior = 1.0f;
bsdf->extra = NULL;
if (data_node.y == SVM_STACK_INVALID) {
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->T = zero_float3();
bsdf->alpha_x = roughness;
bsdf->alpha_y = roughness;
}
@@ -196,7 +199,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
sd->flag |= bsdf_microfacet_ggx_setup(bsdf);
else if (type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) {
kernel_assert(stack_valid(data_node.w));
- float3 color = stack_load_float3(stack, data_node.w);
+ Spectrum color = rgb_to_spectrum(stack_load_float3(stack, data_node.w));
sd->flag |= bsdf_microfacet_multi_ggx_setup(kg, bsdf, sd, color);
}
else {
@@ -212,13 +215,13 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
sd, sizeof(MicrofacetBsdf), weight);
if (bsdf) {
bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->T = zero_float3();
bsdf->extra = NULL;
float eta = fmaxf(param2, 1e-5f);
@@ -256,7 +259,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
break;
}
#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
/* index of refraction */
float eta = fmaxf(param2, 1e-5f);
@@ -277,7 +280,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (bsdf) {
bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->T = zero_float3();
bsdf->extra = NULL;
svm_node_glass_setup(sd, bsdf, type, eta, roughness, false);
}
@@ -296,7 +299,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (bsdf) {
bsdf->N = N;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->T = zero_float3();
bsdf->extra = NULL;
svm_node_glass_setup(sd, bsdf, type, eta, roughness, true);
}
@@ -310,7 +313,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
!kernel_data.integrator.caustics_refractive && (path_flag & PATH_RAY_DIFFUSE))
break;
#endif
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
sd, sizeof(MicrofacetBsdf), weight);
if (!bsdf) {
@@ -321,7 +324,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
bsdf->N = N;
bsdf->extra = NULL;
- bsdf->T = make_float3(0.0f, 0.0f, 0.0f);
+ bsdf->T = zero_float3();
float roughness = sqr(param1);
bsdf->alpha_x = roughness;
@@ -330,14 +333,14 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
kernel_assert(stack_valid(data_node.z));
- float3 color = stack_load_float3(stack, data_node.z);
+ Spectrum color = rgb_to_spectrum(stack_load_float3(stack, data_node.z));
/* setup bsdf */
sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(kg, bsdf, sd, color);
break;
}
case CLOSURE_BSDF_ASHIKHMIN_VELVET_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private VelvetBsdf *bsdf = (ccl_private VelvetBsdf *)bsdf_alloc(
sd, sizeof(VelvetBsdf), weight);
@@ -356,7 +359,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
ATTR_FALLTHROUGH;
#endif
case CLOSURE_BSDF_DIFFUSE_TOON_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private ToonBsdf *bsdf = (ccl_private ToonBsdf *)bsdf_alloc(
sd, sizeof(ToonBsdf), weight);
@@ -378,7 +381,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
uint4 data_node3 = read_node(kg, &offset);
uint4 data_node4 = read_node(kg, &offset);
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
uint offset_ofs, ior_ofs, color_ofs, parametrization;
svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
@@ -436,7 +439,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
switch (parametrization) {
case NODE_PRINCIPLED_HAIR_DIRECT_ABSORPTION: {
float3 absorption_coefficient = stack_load_float3(stack, absorption_coefficient_ofs);
- bsdf->sigma = absorption_coefficient;
+ bsdf->sigma = rgb_to_spectrum(absorption_coefficient);
break;
}
case NODE_PRINCIPLED_HAIR_PIGMENT_CONCENTRATION: {
@@ -456,20 +459,21 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
/* Benedikt Bitterli's melanin ratio remapping. */
float eumelanin = melanin * (1.0f - melanin_redness);
float pheomelanin = melanin * melanin_redness;
- float3 melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin,
- pheomelanin);
+ Spectrum melanin_sigma = bsdf_principled_hair_sigma_from_concentration(eumelanin,
+ pheomelanin);
/* Optional tint. */
float3 tint = stack_load_float3(stack, tint_ofs);
- float3 tint_sigma = bsdf_principled_hair_sigma_from_reflectance(tint,
- radial_roughness);
+ Spectrum tint_sigma = bsdf_principled_hair_sigma_from_reflectance(
+ rgb_to_spectrum(tint), radial_roughness);
bsdf->sigma = melanin_sigma + tint_sigma;
break;
}
case NODE_PRINCIPLED_HAIR_REFLECTANCE: {
float3 color = stack_load_float3(stack, color_ofs);
- bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(color, radial_roughness);
+ bsdf->sigma = bsdf_principled_hair_sigma_from_reflectance(rgb_to_spectrum(color),
+ radial_roughness);
break;
}
default: {
@@ -486,7 +490,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
}
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private HairBsdf *bsdf = (ccl_private HairBsdf *)bsdf_alloc(
sd, sizeof(HairBsdf), weight);
@@ -523,7 +527,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
case CLOSURE_BSSRDF_BURLEY_ID:
case CLOSURE_BSSRDF_RANDOM_WALK_ID:
case CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID: {
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, weight);
if (bssrdf) {
@@ -533,7 +537,7 @@ ccl_device_noinline int svm_node_closure_bsdf(KernelGlobals kg,
if (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
param1 = 0.0f;
- bssrdf->radius = stack_load_float3(stack, data_node.z) * param1;
+ bssrdf->radius = rgb_to_spectrum(stack_load_float3(stack, data_node.z) * param1);
bssrdf->albedo = sd->svm_closure_weight;
bssrdf->N = N;
bssrdf->roughness = FLT_MAX;
@@ -583,10 +587,10 @@ ccl_device_noinline void svm_node_closure_volume(KernelGlobals kg,
density = mix_weight * fmaxf(density, 0.0f);
/* Compute scattering coefficient. */
- float3 weight = sd->svm_closure_weight;
+ Spectrum weight = sd->svm_closure_weight;
if (type == CLOSURE_VOLUME_ABSORPTION_ID) {
- weight = make_float3(1.0f, 1.0f, 1.0f) - weight;
+ weight = one_spectrum() - weight;
}
weight *= density;
@@ -654,11 +658,11 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
if (density > CLOSURE_WEIGHT_CUTOFF) {
/* Compute scattering color. */
- float3 color = sd->svm_closure_weight;
+ Spectrum color = sd->svm_closure_weight;
const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y);
if (attr_color.offset != ATTR_STD_NOT_FOUND) {
- color *= primitive_volume_attribute_float3(kg, sd, attr_color);
+ color *= rgb_to_spectrum(primitive_volume_attribute_float3(kg, sd, attr_color));
}
/* Add closure for volume scattering. */
@@ -673,10 +677,13 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
}
/* Add extinction weight. */
- float3 zero = make_float3(0.0f, 0.0f, 0.0f);
- float3 one = make_float3(1.0f, 1.0f, 1.0f);
- float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)), zero);
- float3 absorption = max(one - color, zero) * max(one - absorption_color, zero);
+ float3 absorption_color = max(sqrt(stack_load_float3(stack, absorption_color_offset)),
+ zero_float3());
+
+ Spectrum zero = zero_spectrum();
+ Spectrum one = one_spectrum();
+ Spectrum absorption = max(one - color, zero) *
+ max(one - rgb_to_spectrum(absorption_color), zero);
volume_extinction_setup(sd, (color + absorption) * density);
}
@@ -696,7 +703,7 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
if (emission > CLOSURE_WEIGHT_CUTOFF) {
float3 emission_color = stack_load_float3(stack, emission_color_offset);
- emission_setup(sd, emission * emission_color);
+ emission_setup(sd, rgb_to_spectrum(emission * emission_color));
}
if (blackbody > CLOSURE_WEIGHT_CUTOFF) {
@@ -720,7 +727,7 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
float3 blackbody_tint = stack_load_float3(stack, node.w);
float3 bb = blackbody_tint * intensity *
rec709_to_rgb(kg, svm_math_blackbody_color_rec709(T));
- emission_setup(sd, bb);
+ emission_setup(sd, rgb_to_spectrum(bb));
}
}
#endif
@@ -732,7 +739,7 @@ ccl_device_noinline void svm_node_closure_emission(ccl_private ShaderData *sd,
uint4 node)
{
uint mix_weight_offset = node.y;
- float3 weight = sd->svm_closure_weight;
+ Spectrum weight = sd->svm_closure_weight;
if (stack_valid(mix_weight_offset)) {
float mix_weight = stack_load_float(stack, mix_weight_offset);
@@ -751,7 +758,7 @@ ccl_device_noinline void svm_node_closure_background(ccl_private ShaderData *sd,
uint4 node)
{
uint mix_weight_offset = node.y;
- float3 weight = sd->svm_closure_weight;
+ Spectrum weight = sd->svm_closure_weight;
if (stack_valid(mix_weight_offset)) {
float mix_weight = stack_load_float(stack, mix_weight_offset);
@@ -788,14 +795,15 @@ ccl_device_noinline void svm_node_closure_holdout(ccl_private ShaderData *sd,
/* Closure Nodes */
-ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, float3 weight)
+ccl_device_inline void svm_node_closure_store_weight(ccl_private ShaderData *sd, Spectrum weight)
{
sd->svm_closure_weight = weight;
}
ccl_device void svm_node_closure_set_weight(ccl_private ShaderData *sd, uint r, uint g, uint b)
{
- float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
+ Spectrum weight = rgb_to_spectrum(
+ make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b)));
svm_node_closure_store_weight(sd, weight);
}
@@ -803,7 +811,7 @@ ccl_device void svm_node_closure_weight(ccl_private ShaderData *sd,
ccl_private float *stack,
uint weight_offset)
{
- float3 weight = stack_load_float3(stack, weight_offset);
+ Spectrum weight = rgb_to_spectrum(stack_load_float3(stack, weight_offset));
svm_node_closure_store_weight(sd, weight);
}
@@ -816,7 +824,7 @@ ccl_device_noinline void svm_node_emission_weight(KernelGlobals kg,
uint strength_offset = node.z;
float strength = stack_load_float(stack, strength_offset);
- float3 weight = stack_load_float3(stack, color_offset) * strength;
+ Spectrum weight = rgb_to_spectrum(stack_load_float3(stack, color_offset)) * strength;
svm_node_closure_store_weight(sd, weight);
}
diff --git a/intern/cycles/kernel/svm/closure_principled.h b/intern/cycles/kernel/svm/closure_principled.h
index c4a74a4da13..fd6df508e12 100644
--- a/intern/cycles/kernel/svm/closure_principled.h
+++ b/intern/cycles/kernel/svm/closure_principled.h
@@ -8,8 +8,8 @@ CCL_NAMESPACE_BEGIN
/* Principled v1 components */
ccl_device_inline void principled_v1_diffuse(ccl_private ShaderData *sd,
- float3 weight,
- float3 base_color,
+ Spectrum weight,
+ Spectrum base_color,
float diffuse_weight,
float3 N,
float roughness)
@@ -34,11 +34,11 @@ ccl_device_inline void principled_v1_diffuse(ccl_private ShaderData *sd,
ccl_device_inline void principled_v1_diffuse_sss(ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
+ Spectrum weight,
int path_flag,
uint data_1,
uint data_2,
- float3 base_color,
+ Spectrum base_color,
float diffuse_weight,
float3 N,
float roughness)
@@ -52,10 +52,10 @@ ccl_device_inline void principled_v1_diffuse_sss(ccl_private ShaderData *sd,
float subsurface = stack_load_float(stack, subsurface_offset);
float subsurface_anisotropy = stack_load_float(stack, aniso_offset);
float subsurface_ior = stack_load_float(stack, ior_offset);
- float3 subsurface_color = stack_load_float3(stack, color_offset);
- float3 subsurface_radius = stack_load_float3(stack, radius_offset);
+ Spectrum subsurface_color = rgb_to_spectrum(stack_load_float3(stack, color_offset));
+ Spectrum subsurface_radius = rgb_to_spectrum(stack_load_float3(stack, radius_offset));
- float3 mixed_ss_base_color = mix(base_color, subsurface_color, subsurface);
+ Spectrum mixed_ss_base_color = mix(base_color, subsurface_color, subsurface);
/* disable in case of diffuse ancestor, can't see it well then and
* adds considerably noise due to probabilities of continuing path
@@ -67,7 +67,7 @@ ccl_device_inline void principled_v1_diffuse_sss(ccl_private ShaderData *sd,
/* diffuse */
if (fabsf(average(mixed_ss_base_color)) > CLOSURE_WEIGHT_CUTOFF) {
if (subsurface > CLOSURE_WEIGHT_CUTOFF) {
- float3 subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
+ Spectrum subsurf_weight = weight * mixed_ss_base_color * diffuse_weight;
ccl_private Bssrdf *bssrdf = bssrdf_alloc(sd, subsurf_weight);
if (bssrdf == NULL) {
@@ -99,11 +99,11 @@ ccl_device_inline void principled_v1_diffuse_sss(ccl_private ShaderData *sd,
ccl_device_inline void principled_v1_specular(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
+ Spectrum weight,
int path_flag,
ClosureType distribution,
uint data,
- float3 base_color,
+ Spectrum base_color,
float3 N,
float specular_weight,
float metallic,
@@ -156,8 +156,8 @@ ccl_device_inline void principled_v1_specular(KernelGlobals kg,
// normalize lum. to isolate hue+sat
float m_cdlum = linear_rgb_to_gray(kg, base_color);
- float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : one_float3();
- float3 specular_color = mix(one_float3(), m_ctint, specular_tint);
+ Spectrum m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : one_spectrum();
+ Spectrum specular_color = mix(one_spectrum(), m_ctint, specular_tint);
bsdf->extra->cspec0 = mix(specular * 0.08f * specular_color, base_color, metallic);
bsdf->extra->color = base_color;
@@ -171,8 +171,8 @@ ccl_device_inline void principled_v1_specular(KernelGlobals kg,
}
ccl_device_inline void principled_v1_glass_refl(ccl_private ShaderData *sd,
- float3 weight,
- float3 base_color,
+ Spectrum weight,
+ Spectrum base_color,
float reflection_weight,
float3 N,
float roughness,
@@ -203,15 +203,15 @@ ccl_device_inline void principled_v1_glass_refl(ccl_private ShaderData *sd,
bsdf->ior = ior;
bsdf->extra->color = base_color;
- bsdf->extra->cspec0 = mix(one_float3(), base_color, specular_tint);
+ bsdf->extra->cspec0 = mix(one_spectrum(), base_color, specular_tint);
/* setup bsdf */
sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd);
}
ccl_device_inline void principled_v1_glass_refr(ccl_private ShaderData *sd,
- float3 weight,
- float3 base_color,
+ Spectrum weight,
+ Spectrum base_color,
float refraction_weight,
float3 N,
float roughness,
@@ -241,11 +241,11 @@ ccl_device_inline void principled_v1_glass_refr(ccl_private ShaderData *sd,
ccl_device_inline void principled_v1_glass_single(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
+ Spectrum weight,
int path_flag,
ClosureType distribution,
uint data,
- float3 base_color,
+ Spectrum base_color,
float glass_weight,
float3 N,
float roughness,
@@ -295,10 +295,10 @@ ccl_device_inline void principled_v1_glass_single(KernelGlobals kg,
ccl_device_inline void principled_v1_glass_multi(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
+ Spectrum weight,
int path_flag,
uint data,
- float3 base_color,
+ Spectrum base_color,
float glass_weight,
float3 N,
float roughness,
@@ -338,7 +338,7 @@ ccl_device_inline void principled_v1_glass_multi(KernelGlobals kg,
bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / eta : eta;
bsdf->extra->color = base_color;
- bsdf->extra->cspec0 = mix(one_float3(), base_color, specular_tint);
+ bsdf->extra->cspec0 = mix(one_spectrum(), base_color, specular_tint);
/* setup bsdf */
sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(kg, bsdf, sd);
@@ -347,9 +347,9 @@ ccl_device_inline void principled_v1_glass_multi(KernelGlobals kg,
ccl_device_inline void principled_v1_sheen(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
+ Spectrum weight,
uint data,
- float3 base_color,
+ Spectrum base_color,
float diffuse_weight,
float3 N)
{
@@ -364,11 +364,11 @@ ccl_device_inline void principled_v1_sheen(KernelGlobals kg,
// normalize lum. to isolate hue+sat
float m_cdlum = linear_rgb_to_gray(kg, base_color);
- float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : one_float3();
+ Spectrum m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : one_spectrum();
/* color of the sheen component */
float sheen_tint = stack_load_float(stack, sheen_tint_offset);
- float3 sheen_color = mix(one_float3(), m_ctint, sheen_tint);
+ Spectrum sheen_color = mix(one_spectrum(), m_ctint, sheen_tint);
ccl_private PrincipledSheenBsdf *bsdf = (ccl_private PrincipledSheenBsdf *)bsdf_alloc(
sd, sizeof(PrincipledSheenBsdf), sheen_weight * sheen_color * weight);
@@ -386,7 +386,7 @@ ccl_device_inline void principled_v1_sheen(KernelGlobals kg,
ccl_device_inline void principled_v1_clearcoat(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
+ Spectrum weight,
int path_flag,
uint data)
{
@@ -432,10 +432,10 @@ ccl_device_inline void principled_v1_clearcoat(KernelGlobals kg,
ccl_device_inline void principled_v2_diffuse_sss(ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
+ Spectrum weight,
int path_flag,
uint data,
- float3 base_color,
+ Spectrum base_color,
float ior,
float3 N)
{
@@ -448,7 +448,8 @@ ccl_device_inline void principled_v2_diffuse_sss(ccl_private ShaderData *sd,
svm_unpack_node_uchar4(data, &scale_offset, &aniso_offset, &radius_offset, &method);
float aniso = stack_load_float(stack, aniso_offset);
- float3 radius = stack_load_float3(stack, radius_offset) * stack_load_float(stack, scale_offset);
+ Spectrum radius = rgb_to_spectrum(stack_load_float3(stack, radius_offset)) *
+ stack_load_float(stack, scale_offset);
/* Fall back to diffuse if there has been a diffuse bounce before or the radius is too small. */
if ((path_flag & PATH_RAY_DIFFUSE_ANCESTOR) == 0 && reduce_max(radius) > 1e-7f) {
@@ -486,19 +487,19 @@ ccl_device_inline void principled_v2_diffuse_sss(ccl_private ShaderData *sd,
sd->flag |= bsdf_diffuse_setup(bsdf);
}
-ccl_device_inline float3 principled_v2_clearcoat(KernelGlobals kg,
- ccl_private ShaderData *sd,
- ccl_private float *stack,
- float3 weight,
- int path_flag,
- uint data)
+ccl_device_inline Spectrum principled_v2_clearcoat(KernelGlobals kg,
+ ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ Spectrum weight,
+ int path_flag,
+ uint data)
{
uint clearcoat_offset, roughness_offset, tint_offset, normal_offset;
svm_unpack_node_uchar4(data, &clearcoat_offset, &roughness_offset, &tint_offset, &normal_offset);
float3 N = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
- float3 tint = saturate(stack_load_float3(stack, tint_offset));
- if (tint != one_float3()) {
+ Spectrum tint = saturate(rgb_to_spectrum(stack_load_float3(stack, tint_offset)));
+ if (tint != one_spectrum()) {
/* Tint is normalized to perpendicular incidence.
* Therefore, if we define the coating thickness as length 1, the length along the ray is
* t = sqrt(1+tan^2(angle(N, I))) = sqrt(1+tan^2(acos(dotNI))) = 1 / dotNI.
@@ -565,8 +566,8 @@ ccl_device_inline float3 principled_v2_clearcoat(KernelGlobals kg,
ccl_device_inline float principled_v2_sheen(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
- float3 N,
+ Spectrum weight,
+ Spectrum N,
uint data)
{
uint sheen_offset, sheen_tint_offset, sheen_roughness_offset, dummy;
@@ -597,8 +598,8 @@ ccl_device_inline float principled_v2_sheen(KernelGlobals kg,
ccl_device_inline float principled_v2_specular(KernelGlobals kg,
ccl_private ShaderData *sd,
ccl_private float *stack,
- float3 weight,
- float3 base_color,
+ Spectrum weight,
+ Spectrum base_color,
float roughness,
float metallic,
float ior,
@@ -632,7 +633,7 @@ ccl_device_inline float principled_v2_specular(KernelGlobals kg,
T = stack_load_float3(stack, tangent_offset);
T = rotate_around_axis(T, N, stack_load_float(stack, rotation_offset) * M_2PI_F);
}
- float3 edge_color = stack_load_float3(stack, edge_offset);
+ Spectrum edge_color = rgb_to_spectrum(stack_load_float3(stack, edge_offset));
ccl_private MicrofacetBsdf *bsdf = (ccl_private MicrofacetBsdf *)bsdf_alloc(
sd, sizeof(MicrofacetBsdf), weight);
@@ -664,7 +665,7 @@ ccl_device_inline float principled_v2_specular(KernelGlobals kg,
ccl_device_inline void principled_v2_glass(KernelGlobals kg,
ccl_private ShaderData *sd,
- float3 weight,
+ Spectrum weight,
float transmission,
float roughness,
float ior,
@@ -683,7 +684,7 @@ ccl_device_inline void principled_v2_glass(KernelGlobals kg,
bsdf->ior = (sd->flag & SD_BACKFACING) ? 1.0f / ior : ior;
/* setup bsdf */
- sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(kg, bsdf, sd, one_float3());
+ sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(kg, bsdf, sd, one_spectrum());
}
ccl_device void svm_node_closure_principled_v2(KernelGlobals kg,
@@ -695,7 +696,7 @@ ccl_device void svm_node_closure_principled_v2(KernelGlobals kg,
int path_flag,
int *offset)
{
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
/* Load shared parameter data. */
uint base_color_offset, normal_offset, dummy;
@@ -704,7 +705,7 @@ ccl_device void svm_node_closure_principled_v2(KernelGlobals kg,
svm_unpack_node_uchar4(
node_1.z, &roughness_offset, &metallic_offset, &ior_offset, &transmission_offset);
- float3 base_color = stack_load_float3(stack, base_color_offset);
+ Spectrum base_color = rgb_to_spectrum(stack_load_float3(stack, base_color_offset));
float3 N = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
if (!(sd->type & PRIMITIVE_CURVE)) {
N = ensure_valid_reflection(sd->Ng, sd->I, N);
@@ -764,7 +765,7 @@ ccl_device void svm_node_closure_principled(KernelGlobals kg,
svm_unpack_node_uchar4(
node_1.z, &roughness_offset, &metallic_offset, &transmission_offset, &specular_tint_offset);
- float3 base_color = stack_load_float3(stack, base_color_offset);
+ Spectrum base_color = rgb_to_spectrum(stack_load_float3(stack, base_color_offset));
float3 N = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
if (!(sd->type & PRIMITIVE_CURVE)) {
N = ensure_valid_reflection(sd->Ng, sd->I, N);
@@ -782,7 +783,7 @@ ccl_device void svm_node_closure_principled(KernelGlobals kg,
* since it models both the metallic specular as well as the non-glass dielectric specular.
* This only affects materials mixing diffuse, glass AND metal though. */
float specular_weight = (1.0f - transmission);
- float3 weight = sd->svm_closure_weight * mix_weight;
+ Spectrum weight = sd->svm_closure_weight * mix_weight;
/* Diffuse and subsurface */
principled_v1_diffuse_sss(
diff --git a/intern/cycles/kernel/svm/color_util.h b/intern/cycles/kernel/svm/color_util.h
index 41f44378ff0..96adb6fd64c 100644
--- a/intern/cycles/kernel/svm/color_util.h
+++ b/intern/cycles/kernel/svm/color_util.h
@@ -247,10 +247,8 @@ ccl_device float3 svm_mix_clamp(float3 col)
return saturate(col);
}
-ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
+ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float t, float3 c1, float3 c2)
{
- float t = saturatef(fac);
-
switch (type) {
case NODE_MIX_BLEND:
return svm_mix_blend(t, c1, c2);
@@ -282,7 +280,7 @@ ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float
return svm_mix_sat(t, c1, c2);
case NODE_MIX_VAL:
return svm_mix_val(t, c1, c2);
- case NODE_MIX_COLOR:
+ case NODE_MIX_COL:
return svm_mix_color(t, c1, c2);
case NODE_MIX_SOFT:
return svm_mix_soft(t, c1, c2);
@@ -295,6 +293,12 @@ ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float
return make_float3(0.0f, 0.0f, 0.0f);
}
+ccl_device_noinline_cpu float3 svm_mix_clamped_factor(NodeMix type, float t, float3 c1, float3 c2)
+{
+ float fac = saturatef(t);
+ return svm_mix(type, fac, c1, c2);
+}
+
ccl_device_inline float3 svm_brightness_contrast(float3 color, float brightness, float contrast)
{
float a = 1.0f + contrast;
diff --git a/intern/cycles/kernel/svm/displace.h b/intern/cycles/kernel/svm/displace.h
index 128023263fd..230f8c73820 100644
--- a/intern/cycles/kernel/svm/displace.h
+++ b/intern/cycles/kernel/svm/displace.h
@@ -24,18 +24,17 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) :
sd->N;
- float3 dPdx = sd->dP.dx;
- float3 dPdy = sd->dP.dy;
+ differential3 dP = differential_from_compact(sd->Ng, sd->dP);
if (use_object_space) {
object_inverse_normal_transform(kg, sd, &normal_in);
- object_inverse_dir_transform(kg, sd, &dPdx);
- object_inverse_dir_transform(kg, sd, &dPdy);
+ object_inverse_dir_transform(kg, sd, &dP.dx);
+ object_inverse_dir_transform(kg, sd, &dP.dy);
}
/* get surface tangents from normal */
- float3 Rx = cross(dPdy, normal_in);
- float3 Ry = cross(normal_in, dPdx);
+ float3 Rx = cross(dP.dy, normal_in);
+ float3 Ry = cross(normal_in, dP.dx);
/* get bump values */
uint c_offset, x_offset, y_offset, strength_offset;
@@ -46,7 +45,7 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
float h_y = stack_load_float(stack, y_offset);
/* compute surface gradient and determinant */
- float det = dot(dPdx, Rx);
+ float det = dot(dP.dx, Rx);
float3 surfgrad = (h_x - h_c) * Rx + (h_y - h_c) * Ry;
float absdet = fabsf(det);
diff --git a/intern/cycles/kernel/svm/geometry.h b/intern/cycles/kernel/svm/geometry.h
index 4b5368dd765..cbd87d84409 100644
--- a/intern/cycles/kernel/svm/geometry.h
+++ b/intern/cycles/kernel/svm/geometry.h
@@ -34,7 +34,7 @@ ccl_device_noinline void svm_node_geometry(KernelGlobals kg,
data = sd->Ng;
break;
case NODE_GEOM_uv:
- data = make_float3(sd->u, sd->v, 0.0f);
+ data = make_float3(1.0f - sd->u - sd->v, sd->u, 0.0f);
break;
default:
data = make_float3(0.0f, 0.0f, 0.0f);
@@ -54,10 +54,10 @@ ccl_device_noinline void svm_node_geometry_bump_dx(KernelGlobals kg,
switch (type) {
case NODE_GEOM_P:
- data = sd->P + sd->dP.dx;
+ data = svm_node_bump_P_dx(sd);
break;
case NODE_GEOM_uv:
- data = make_float3(sd->u + sd->du.dx, sd->v + sd->dv.dx, 0.0f);
+ data = make_float3(1.0f - sd->u - sd->du.dx - sd->v - sd->dv.dx, sd->u + sd->du.dx, 0.0f);
break;
default:
svm_node_geometry(kg, sd, stack, type, out_offset);
@@ -81,10 +81,10 @@ ccl_device_noinline void svm_node_geometry_bump_dy(KernelGlobals kg,
switch (type) {
case NODE_GEOM_P:
- data = sd->P + sd->dP.dy;
+ data = svm_node_bump_P_dy(sd);
break;
case NODE_GEOM_uv:
- data = make_float3(sd->u + sd->du.dy, sd->v + sd->dv.dy, 0.0f);
+ data = make_float3(1.0f - sd->u - sd->du.dy - sd->v - sd->dv.dy, sd->u + sd->du.dy, 0.0f);
break;
default:
svm_node_geometry(kg, sd, stack, type, out_offset);
diff --git a/intern/cycles/kernel/svm/mix.h b/intern/cycles/kernel/svm/mix.h
index a9796096410..ead2fc44685 100644
--- a/intern/cycles/kernel/svm/mix.h
+++ b/intern/cycles/kernel/svm/mix.h
@@ -21,10 +21,94 @@ ccl_device_noinline int svm_node_mix(KernelGlobals kg,
float fac = stack_load_float(stack, fac_offset);
float3 c1 = stack_load_float3(stack, c1_offset);
float3 c2 = stack_load_float3(stack, c2_offset);
- float3 result = svm_mix((NodeMix)node1.y, fac, c1, c2);
+ float3 result = svm_mix_clamped_factor((NodeMix)node1.y, fac, c1, c2);
stack_store_float3(stack, node1.z, result);
return offset;
}
+ccl_device_noinline void svm_node_mix_color(ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint options,
+ uint input_offset,
+ uint result_offset)
+{
+ uint use_clamp, blend_type, use_clamp_result;
+ uint fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset;
+ svm_unpack_node_uchar3(options, &use_clamp, &blend_type, &use_clamp_result);
+ svm_unpack_node_uchar3(
+ input_offset, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset);
+
+ float t = stack_load_float(stack, fac_in_stack_offset);
+ if (use_clamp > 0) {
+ t = saturatef(t);
+ }
+ float3 a = stack_load_float3(stack, a_in_stack_offset);
+ float3 b = stack_load_float3(stack, b_in_stack_offset);
+ float3 result = svm_mix((NodeMix)blend_type, t, a, b);
+ if (use_clamp_result) {
+ result = saturate(result);
+ }
+ stack_store_float3(stack, result_offset, result);
+}
+
+ccl_device_noinline void svm_node_mix_float(ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint use_clamp,
+ uint input_offset,
+ uint result_offset)
+{
+ uint fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset;
+ svm_unpack_node_uchar3(
+ input_offset, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset);
+
+ float t = stack_load_float(stack, fac_in_stack_offset);
+ if (use_clamp > 0) {
+ t = saturatef(t);
+ }
+ float a = stack_load_float(stack, a_in_stack_offset);
+ float b = stack_load_float(stack, b_in_stack_offset);
+ float result = a * (1 - t) + b * t;
+
+ stack_store_float(stack, result_offset, result);
+}
+
+ccl_device_noinline void svm_node_mix_vector(ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint input_offset,
+ uint result_offset)
+{
+ uint use_clamp, fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset;
+ svm_unpack_node_uchar4(
+ input_offset, &use_clamp, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset);
+
+ float t = stack_load_float(stack, fac_in_stack_offset);
+ if (use_clamp > 0) {
+ t = saturatef(t);
+ }
+ float3 a = stack_load_float3(stack, a_in_stack_offset);
+ float3 b = stack_load_float3(stack, b_in_stack_offset);
+ float3 result = a * (one_float3() - t) + b * t;
+ stack_store_float3(stack, result_offset, result);
+}
+
+ccl_device_noinline void svm_node_mix_vector_non_uniform(ccl_private ShaderData *sd,
+ ccl_private float *stack,
+ uint input_offset,
+ uint result_offset)
+{
+ uint use_clamp, fac_in_stack_offset, a_in_stack_offset, b_in_stack_offset;
+ svm_unpack_node_uchar4(
+ input_offset, &use_clamp, &fac_in_stack_offset, &a_in_stack_offset, &b_in_stack_offset);
+
+ float3 t = stack_load_float3(stack, fac_in_stack_offset);
+ if (use_clamp > 0) {
+ t = saturate(t);
+ }
+ float3 a = stack_load_float3(stack, a_in_stack_offset);
+ float3 b = stack_load_float3(stack, b_in_stack_offset);
+ float3 result = a * (one_float3() - t) + b * t;
+ stack_store_float3(stack, result_offset, result);
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/musgrave.h b/intern/cycles/kernel/svm/musgrave.h
index 521c96d9f37..8bf172f0981 100644
--- a/intern/cycles/kernel/svm/musgrave.h
+++ b/intern/cycles/kernel/svm/musgrave.h
@@ -119,13 +119,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_1d(
{
float p = co;
float pwHL = powf(lacunarity, -H);
- float pwr = pwHL;
- float value = snoise_1d(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0f;
+ float value = 0.0f;
+ float weight = 1.0f;
- for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
if (weight > 1.0f) {
weight = 1.0f;
}
@@ -138,8 +137,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_1d(
}
float rmd = octaves - floorf(octaves);
- if (rmd != 0.0f) {
- value += rmd * ((snoise_1d(p) + offset) * pwr);
+ if ((rmd != 0.0f) && (weight > 0.001f)) {
+ if (weight > 1.0f) {
+ weight = 1.0f;
+ }
+ float signal = (snoise_1d(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
@@ -290,13 +293,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_2d(
{
float2 p = co;
float pwHL = powf(lacunarity, -H);
- float pwr = pwHL;
- float value = snoise_2d(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0f;
+ float value = 0.0f;
+ float weight = 1.0f;
- for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
if (weight > 1.0f) {
weight = 1.0f;
}
@@ -309,8 +311,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_2d(
}
float rmd = octaves - floorf(octaves);
- if (rmd != 0.0f) {
- value += rmd * ((snoise_2d(p) + offset) * pwr);
+ if ((rmd != 0.0f) && (weight > 0.001f)) {
+ if (weight > 1.0f) {
+ weight = 1.0f;
+ }
+ float signal = (snoise_2d(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
@@ -461,13 +467,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_3d(
{
float3 p = co;
float pwHL = powf(lacunarity, -H);
- float pwr = pwHL;
- float value = snoise_3d(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0f;
+ float value = 0.0f;
+ float weight = 1.0f;
- for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
if (weight > 1.0f) {
weight = 1.0f;
}
@@ -480,8 +485,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_3d(
}
float rmd = octaves - floorf(octaves);
- if (rmd != 0.0f) {
- value += rmd * ((snoise_3d(p) + offset) * pwr);
+ if ((rmd != 0.0f) && (weight > 0.001f)) {
+ if (weight > 1.0f) {
+ weight = 1.0f;
+ }
+ float signal = (snoise_3d(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
@@ -632,13 +641,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_4d(
{
float4 p = co;
float pwHL = powf(lacunarity, -H);
- float pwr = pwHL;
- float value = snoise_4d(p) + offset;
- float weight = gain * value;
- p *= lacunarity;
+ float pwr = 1.0f;
+ float value = 0.0f;
+ float weight = 1.0f;
- for (int i = 1; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
+ for (int i = 0; (weight > 0.001f) && (i < float_to_int(octaves)); i++) {
if (weight > 1.0f) {
weight = 1.0f;
}
@@ -651,8 +659,12 @@ ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal_4d(
}
float rmd = octaves - floorf(octaves);
- if (rmd != 0.0f) {
- value += rmd * ((snoise_4d(p) + offset) * pwr);
+ if ((rmd != 0.0f) && (weight > 0.001f)) {
+ if (weight > 1.0f) {
+ weight = 1.0f;
+ }
+ float signal = (snoise_4d(p) + offset) * pwr;
+ value += rmd * weight * signal;
}
return value;
diff --git a/intern/cycles/kernel/svm/node_types_template.h b/intern/cycles/kernel/svm/node_types_template.h
index 39d279be4cb..aab9b9f1158 100644
--- a/intern/cycles/kernel/svm/node_types_template.h
+++ b/intern/cycles/kernel/svm/node_types_template.h
@@ -103,6 +103,10 @@ SHADER_NODE_TYPE(NODE_AOV_START)
SHADER_NODE_TYPE(NODE_AOV_COLOR)
SHADER_NODE_TYPE(NODE_AOV_VALUE)
SHADER_NODE_TYPE(NODE_FLOAT_CURVE)
+SHADER_NODE_TYPE(NODE_MIX_COLOR)
+SHADER_NODE_TYPE(NODE_MIX_FLOAT)
+SHADER_NODE_TYPE(NODE_MIX_VECTOR)
+SHADER_NODE_TYPE(NODE_MIX_VECTOR_NON_UNIFORM)
/* Padding for struct alignment. */
SHADER_NODE_TYPE(NODE_PAD1)
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 9d6d3e9222c..3ca632c5f0b 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -585,6 +585,18 @@ ccl_device void svm_eval_nodes(KernelGlobals kg,
SVM_CASE(NODE_AOV_VALUE)
svm_node_aov_value<node_feature_mask>(kg, state, sd, stack, node, render_buffer);
break;
+ SVM_CASE(NODE_MIX_COLOR)
+ svm_node_mix_color(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_MIX_FLOAT)
+ svm_node_mix_float(sd, stack, node.y, node.z, node.w);
+ break;
+ SVM_CASE(NODE_MIX_VECTOR)
+ svm_node_mix_vector(sd, stack, node.y, node.z);
+ break;
+ SVM_CASE(NODE_MIX_VECTOR_NON_UNIFORM)
+ svm_node_mix_vector_non_uniform(sd, stack, node.y, node.z);
+ break;
default:
kernel_assert(!"Unknown node type was passed to the SVM machine");
return;
diff --git a/intern/cycles/kernel/svm/tex_coord.h b/intern/cycles/kernel/svm/tex_coord.h
index 2a0130e11d4..8154c542e6f 100644
--- a/intern/cycles/kernel/svm/tex_coord.h
+++ b/intern/cycles/kernel/svm/tex_coord.h
@@ -106,7 +106,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
switch (type) {
case NODE_TEXCO_OBJECT: {
- data = sd->P + sd->dP.dx;
+ data = svm_node_bump_P_dx(sd);
if (node.w == 0) {
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &data);
@@ -130,9 +130,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
Transform tfm = kernel_data.cam.worldtocamera;
if (sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P + sd->dP.dx);
+ data = transform_point(&tfm, svm_node_bump_P_dx(sd));
else
- data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
+ data = transform_point(&tfm, svm_node_bump_P_dx(sd) + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
@@ -140,7 +140,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
data = camera_world_to_ndc(kg, sd, sd->ray_P);
else
- data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
+ data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dx(sd));
data.z = 0.0f;
break;
}
@@ -160,7 +160,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dx(KernelGlobals kg,
break;
}
case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P + sd->dP.dx;
+ data = svm_node_bump_P_dx(sd);
# ifdef __VOLUME__
if (sd->object != OBJECT_NONE)
@@ -191,7 +191,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
switch (type) {
case NODE_TEXCO_OBJECT: {
- data = sd->P + sd->dP.dy;
+ data = svm_node_bump_P_dy(sd);
if (node.w == 0) {
if (sd->object != OBJECT_NONE) {
object_inverse_position_transform(kg, sd, &data);
@@ -215,9 +215,9 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
Transform tfm = kernel_data.cam.worldtocamera;
if (sd->object != OBJECT_NONE)
- data = transform_point(&tfm, sd->P + sd->dP.dy);
+ data = transform_point(&tfm, svm_node_bump_P_dy(sd));
else
- data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
+ data = transform_point(&tfm, svm_node_bump_P_dy(sd) + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
@@ -225,7 +225,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
data = camera_world_to_ndc(kg, sd, sd->ray_P);
else
- data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
+ data = camera_world_to_ndc(kg, sd, svm_node_bump_P_dy(sd));
data.z = 0.0f;
break;
}
@@ -245,7 +245,7 @@ ccl_device_noinline int svm_node_tex_coord_bump_dy(KernelGlobals kg,
break;
}
case NODE_TEXCO_VOLUME_GENERATED: {
- data = sd->P + sd->dP.dy;
+ data = svm_node_bump_P_dy(sd);
# ifdef __VOLUME__
if (sd->object != OBJECT_NONE)
diff --git a/intern/cycles/kernel/svm/types.h b/intern/cycles/kernel/svm/types.h
index ef44afa9c24..bce403a2df5 100644
--- a/intern/cycles/kernel/svm/types.h
+++ b/intern/cycles/kernel/svm/types.h
@@ -12,7 +12,7 @@ CCL_NAMESPACE_BEGIN
/* SVM stack offsets with this value indicate that it's not on the stack */
#define SVM_STACK_INVALID 255
-#define SVM_BUMP_EVAL_STATE_SIZE 9
+#define SVM_BUMP_EVAL_STATE_SIZE 4
/* Nodes */
@@ -133,7 +133,7 @@ typedef enum NodeMix {
NODE_MIX_HUE,
NODE_MIX_SAT,
NODE_MIX_VAL,
- NODE_MIX_COLOR,
+ NODE_MIX_COL,
NODE_MIX_SOFT,
NODE_MIX_LINEAR,
NODE_MIX_CLAMP /* used for the clamp UI option */
diff --git a/intern/cycles/kernel/svm/wireframe.h b/intern/cycles/kernel/svm/wireframe.h
index e5fe08e5d04..91fadf4cfc4 100644
--- a/intern/cycles/kernel/svm/wireframe.h
+++ b/intern/cycles/kernel/svm/wireframe.h
@@ -14,6 +14,7 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float wireframe(KernelGlobals kg,
ccl_private ShaderData *sd,
+ const differential3 dP,
float size,
int pixel_size,
ccl_private float3 *P)
@@ -46,8 +47,8 @@ ccl_device_inline float wireframe(KernelGlobals kg,
if (pixel_size) {
// Project the derivatives of P to the viewing plane defined
// by I so we have a measure of how big is a pixel at this point
- float pixelwidth_x = len(sd->dP.dx - dot(sd->dP.dx, sd->I) * sd->I);
- float pixelwidth_y = len(sd->dP.dy - dot(sd->dP.dy, sd->I) * sd->I);
+ float pixelwidth_x = len(dP.dx - dot(dP.dx, sd->I) * sd->I);
+ float pixelwidth_y = len(dP.dy - dot(dP.dy, sd->I) * sd->I);
// Take the average of both axis' length
pixelwidth = (pixelwidth_x + pixelwidth_y) * 0.5f;
}
@@ -86,16 +87,17 @@ ccl_device_noinline void svm_node_wireframe(KernelGlobals kg,
int pixel_size = (int)use_pixel_size;
/* Calculate wireframe */
- float f = wireframe(kg, sd, size, pixel_size, &sd->P);
+ const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
+ float f = wireframe(kg, sd, dP, size, pixel_size, &sd->P);
/* TODO(sergey): Think of faster way to calculate derivatives. */
if (bump_offset == NODE_BUMP_OFFSET_DX) {
- float3 Px = sd->P - sd->dP.dx;
- f += (f - wireframe(kg, sd, size, pixel_size, &Px)) / len(sd->dP.dx);
+ float3 Px = sd->P - dP.dx;
+ f += (f - wireframe(kg, sd, dP, size, pixel_size, &Px)) / len(dP.dx);
}
else if (bump_offset == NODE_BUMP_OFFSET_DY) {
- float3 Py = sd->P - sd->dP.dy;
- f += (f - wireframe(kg, sd, size, pixel_size, &Py)) / len(sd->dP.dy);
+ float3 Py = sd->P - dP.dy;
+ f += (f - wireframe(kg, sd, dP, size, pixel_size, &Py)) / len(dP.dy);
}
if (stack_valid(out_fac))
diff --git a/intern/cycles/kernel/tables.h b/intern/cycles/kernel/tables.h
index c1fdbba3fa7..399eea1e2b1 100644
--- a/intern/cycles/kernel/tables.h
+++ b/intern/cycles/kernel/tables.h
@@ -63,4 +63,57 @@ ccl_inline_constant float cie_colour_match[][3] = {
{0.0001f, 0.0000f, 0.0000f}, {0.0001f, 0.0000f, 0.0000f}, {0.0000f, 0.0000f, 0.0000f}
};
+/*
+ * The direction vectors for the first four dimensions of the Sobol
+ * sequence, stored with reversed-order bits.
+ *
+ * This is used in the Sobol-Burley sampler implementation. We don't
+ * need more than four dimensions because we achieve higher dimensions
+ * with padding. They're stored with reversed bits because we need
+ * them reversed for the fast hash-based Owen scrambling anyway, and
+ * this avoids doing that at run time.
+ */
+ccl_inline_constant unsigned int sobol_burley_table[4][32] = {
+ {
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+ 0x00000010, 0x00000020, 0x00000040, 0x00000080,
+ 0x00000100, 0x00000200, 0x00000400, 0x00000800,
+ 0x00001000, 0x00002000, 0x00004000, 0x00008000,
+ 0x00010000, 0x00020000, 0x00040000, 0x00080000,
+ 0x00100000, 0x00200000, 0x00400000, 0x00800000,
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ },
+ {
+ 0x00000001, 0x00000003, 0x00000005, 0x0000000f,
+ 0x00000011, 0x00000033, 0x00000055, 0x000000ff,
+ 0x00000101, 0x00000303, 0x00000505, 0x00000f0f,
+ 0x00001111, 0x00003333, 0x00005555, 0x0000ffff,
+ 0x00010001, 0x00030003, 0x00050005, 0x000f000f,
+ 0x00110011, 0x00330033, 0x00550055, 0x00ff00ff,
+ 0x01010101, 0x03030303, 0x05050505, 0x0f0f0f0f,
+ 0x11111111, 0x33333333, 0x55555555, 0xffffffff,
+ },
+ {
+ 0x00000001, 0x00000003, 0x00000006, 0x00000009,
+ 0x00000017, 0x0000003a, 0x00000071, 0x000000a3,
+ 0x00000116, 0x00000339, 0x00000677, 0x000009aa,
+ 0x00001601, 0x00003903, 0x00007706, 0x0000aa09,
+ 0x00010117, 0x0003033a, 0x00060671, 0x000909a3,
+ 0x00171616, 0x003a3939, 0x00717777, 0x00a3aaaa,
+ 0x01170001, 0x033a0003, 0x06710006, 0x09a30009,
+ 0x16160017, 0x3939003a, 0x77770071, 0xaaaa00a3,
+ },
+ {
+ 0x00000001, 0x00000003, 0x00000004, 0x0000000a,
+ 0x0000001f, 0x0000002e, 0x00000045, 0x000000c9,
+ 0x0000011b, 0x000002a4, 0x0000079a, 0x00000b67,
+ 0x0000101e, 0x0000302d, 0x00004041, 0x0000a0c3,
+ 0x0001f104, 0x0002e28a, 0x000457df, 0x000c9bae,
+ 0x0011a105, 0x002a7289, 0x0079e7db, 0x00b6dba4,
+ 0x0100011a, 0x030002a7, 0x0400079e, 0x0a000b6d,
+ 0x1f001001, 0x2e003003, 0x45004004, 0xc900a00a,
+ },
+};
+
/* clang-format on */
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index 9cb610503b7..b0d1992738d 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -19,10 +19,6 @@
#include "kernel/svm/types.h"
-#ifndef __KERNEL_GPU__
-# define __KERNEL_CPU__
-#endif
-
CCL_NAMESPACE_BEGIN
/* Constants */
@@ -51,69 +47,52 @@ CCL_NAMESPACE_BEGIN
#define INTEGRATOR_SHADOW_ISECT_SIZE_CPU 1024U
#define INTEGRATOR_SHADOW_ISECT_SIZE_GPU 4U
-#ifdef __KERNEL_CPU__
-# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
-#else
+#ifdef __KERNEL_GPU__
# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_GPU
+#else
+# define INTEGRATOR_SHADOW_ISECT_SIZE INTEGRATOR_SHADOW_ISECT_SIZE_CPU
#endif
/* Kernel features */
-#define __SOBOL__
-#define __DPDU__
-#define __BACKGROUND__
+#define __AO__
#define __CAUSTICS_TRICKS__
-#define __VISIBILITY_FLAG__
-#define __RAY_DIFFERENTIALS__
-#define __CAMERA_CLIPPING__
-#define __INTERSECTION_REFINE__
#define __CLAMP_SAMPLE__
-#define __PATCH_EVAL__
-#define __SHADOW_CATCHER__
#define __DENOISING_FEATURES__
-#define __SHADER_RAYTRACE__
-#define __AO__
-#define __PASSES__
+#define __DPDU__
#define __HAIR__
+#define __OBJECT_MOTION__
+#define __PASSES__
+#define __PATCH_EVAL__
#define __POINTCLOUD__
+#define __RAY_DIFFERENTIALS__
+#define __SHADER_RAYTRACE__
+#define __SHADOW_CATCHER__
+#define __SHADOW_RECORD_ALL__
+#define __SUBSURFACE__
#define __SVM__
-#define __EMISSION__
-#define __HOLDOUT__
#define __TRANSPARENT_SHADOWS__
-#define __BACKGROUND_MIS__
-#define __LAMP_MIS__
-#define __CAMERA_MOTION__
-#define __OBJECT_MOTION__
-#define __BAKING__
-#define __PRINCIPLED__
-#define __SUBSURFACE__
+#define __VISIBILITY_FLAG__
#define __VOLUME__
-#define __CMJ__
-#define __SHADOW_RECORD_ALL__
-#define __BRANCHED_PATH__
/* Device specific features */
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# ifdef WITH_OSL
# define __OSL__
# endif
+# ifdef WITH_PATH_GUIDING
+# define __PATH_GUIDING__
+# endif
# define __VOLUME_RECORD_ALL__
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */
-#ifdef __KERNEL_GPU_RAYTRACING__
-# undef __BAKING__
-#endif /* __KERNEL_GPU_RAYTRACING__ */
-
-/* MNEE currently causes "Compute function exceeds available temporary registers"
- * on Metal, disabled for now. */
-#ifndef __KERNEL_METAL__
+/* MNEE caused "Compute function exceeds available temporary registers" in macOS < 13 due to a bug
+ * in spill buffer allocation sizing. */
+#if !defined(__KERNEL_METAL__) || (__KERNEL_METAL_MACOS__ >= 13)
# define __MNEE__
#endif
/* Scene-based selective features compilation. */
#ifdef __KERNEL_FEATURES__
-# if !(__KERNEL_FEATURES & KERNEL_FEATURE_CAMERA_MOTION)
-# undef __CAMERA_MOTION__
-# endif
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_OBJECT_MOTION)
# undef __OBJECT_MOTION__
# endif
@@ -129,9 +108,6 @@ CCL_NAMESPACE_BEGIN
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_SUBSURFACE)
# undef __SUBSURFACE__
# endif
-# if !(__KERNEL_FEATURES & KERNEL_FEATURE_BAKING)
-# undef __BAKING__
-# endif
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_PATCH_EVALUATION)
# undef __PATCH_EVAL__
# endif
@@ -141,9 +117,6 @@ CCL_NAMESPACE_BEGIN
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_SHADOW_CATCHER)
# undef __SHADOW_CATCHER__
# endif
-# if !(__KERNEL_FEATURES & KERNEL_FEATURE_PRINCIPLED)
-# undef __PRINCIPLED__
-# endif
# if !(__KERNEL_FEATURES & KERNEL_FEATURE_DENOISING)
# undef __DENOISING_FEATURES__
# endif
@@ -159,36 +132,50 @@ CCL_NAMESPACE_BEGIN
# define __BVH_LOCAL__
#endif
-/* Path Tracing
- * note we need to keep the u/v pairs at even values */
+/* Sampling Patterns */
+/* Unique numbers for sampling patterns in each bounce. */
enum PathTraceDimension {
- PRNG_FILTER_U = 0,
- PRNG_FILTER_V = 1,
- PRNG_LENS_U = 2,
- PRNG_LENS_V = 3,
- PRNG_TIME = 4,
- PRNG_UNUSED_0 = 5,
- PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
- PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
- PRNG_BASE_NUM = 10,
-
- PRNG_BSDF_U = 0,
- PRNG_BSDF_V = 1,
- PRNG_LIGHT_U = 2,
- PRNG_LIGHT_V = 3,
- PRNG_LIGHT_TERMINATE = 4,
- PRNG_TERMINATE = 5,
- PRNG_PHASE_CHANNEL = 6,
- PRNG_SCATTER_DISTANCE = 7,
- PRNG_BOUNCE_NUM = 8,
-
- PRNG_BEVEL_U = 6, /* reuse volume dimension, correlation won't harm */
- PRNG_BEVEL_V = 7,
+ /* Init bounce */
+ PRNG_FILTER = 0,
+ PRNG_LENS = 1,
+ PRNG_TIME = 2,
+
+ /* Shade bounce */
+ PRNG_TERMINATE = 0,
+ PRNG_LIGHT = 1,
+ PRNG_LIGHT_TERMINATE = 2,
+ /* Surface */
+ PRNG_SURFACE_BSDF = 3,
+ PRNG_SURFACE_AO = 4,
+ PRNG_SURFACE_BEVEL = 5,
+ PRNG_SURFACE_BSDF_GUIDING = 6,
+ /* Volume */
+ PRNG_VOLUME_PHASE = 3,
+ PRNG_VOLUME_PHASE_CHANNEL = 4,
+ PRNG_VOLUME_SCATTER_DISTANCE = 5,
+ PRNG_VOLUME_OFFSET = 6,
+ PRNG_VOLUME_SHADE_OFFSET = 7,
+ PRNG_VOLUME_PHASE_GUIDING = 8,
+
+ /* Subsurface random walk bounces */
+ PRNG_SUBSURFACE_BSDF = 0,
+ PRNG_SUBSURFACE_PHASE_CHANNEL = 1,
+ PRNG_SUBSURFACE_SCATTER_DISTANCE = 2,
+ PRNG_SUBSURFACE_GUIDE_STRATEGY = 3,
+ PRNG_SUBSURFACE_GUIDE_DIRECTION = 4,
+
+ /* Subsurface disk bounce */
+ PRNG_SUBSURFACE_DISK = 0,
+ PRNG_SUBSURFACE_DISK_RESAMPLE = 1,
+
+ /* High enough number so we don't need to change it when adding new dimensions,
+ * low enough so there is no uint16_t overflow with many bounces. */
+ PRNG_BOUNCE_NUM = 16,
};
enum SamplingPattern {
- SAMPLING_PATTERN_SOBOL = 0,
+ SAMPLING_PATTERN_SOBOL_BURLEY = 0,
SAMPLING_PATTERN_PMJ = 1,
SAMPLING_NUM_PATTERNS,
@@ -405,6 +392,14 @@ typedef enum PassType {
PASS_SHADOW_CATCHER_SAMPLE_COUNT,
PASS_SHADOW_CATCHER_MATTE,
+ /* Guiding related debug rendering passes */
+ /* The estimated sample color from the PathSegmentStorage. If everything is integrated correctly
+ * the output should be similar to PASS_COMBINED. */
+ PASS_GUIDING_COLOR,
+ /* The guiding probability at the first bounce. */
+ PASS_GUIDING_PROBABILITY,
+ /* The avg. roughness at the first bounce. */
+ PASS_GUIDING_AVG_ROUGHNESS,
PASS_CATEGORY_DATA_END = 63,
PASS_BAKE_PRIMITIVE,
@@ -425,9 +420,9 @@ typedef enum CryptomatteType {
} CryptomatteType;
typedef struct BsdfEval {
- float3 diffuse;
- float3 glossy;
- float3 sum;
+ Spectrum diffuse;
+ Spectrum glossy;
+ Spectrum sum;
} BsdfEval;
/* Closure Filter */
@@ -473,6 +468,16 @@ typedef enum LightType {
LIGHT_TRIANGLE
} LightType;
+/* Guiding Distribution Type */
+
+typedef enum GuidingDistributionType {
+ GUIDING_TYPE_PARALLAX_AWARE_VMM = 0,
+ GUIDING_TYPE_DIRECTIONAL_QUAD_TREE = 1,
+ GUIDING_TYPE_VMM = 2,
+
+ GUIDING_NUM_TYPES,
+} GuidingDistributionType;
+
/* Camera Type */
enum CameraType { CAMERA_PERSPECTIVE, CAMERA_ORTHOGRAPHIC, CAMERA_PANORAMA };
@@ -485,6 +490,7 @@ enum PanoramaType {
PANORAMA_FISHEYE_EQUISOLID = 2,
PANORAMA_MIRRORBALL = 3,
PANORAMA_FISHEYE_LENS_POLYNOMIAL = 4,
+ PANORAMA_EQUIANGULAR_CUBEMAP_FACE = 5,
PANORAMA_NUM_TYPES,
};
@@ -673,12 +679,11 @@ typedef struct AttributeDescriptor {
/* For looking up attributes on objects and geometry. */
typedef struct AttributeMap {
- uint id; /* Global unique identifier. */
- uint element; /* AttributeElement. */
- int offset; /* Offset into __attributes global arrays. */
- uint8_t type; /* NodeAttributeType. */
- uint8_t flags; /* AttributeFlag. */
- uint8_t pad[2];
+ uint64_t id; /* Global unique identifier. */
+ int offset; /* Offset into __attributes global arrays. */
+ uint16_t element; /* AttributeElement. */
+ uint8_t type; /* NodeAttributeType. */
+ uint8_t flags; /* AttributeFlag. */
} AttributeMap;
/* Closure data */
@@ -721,7 +726,7 @@ typedef struct AttributeMap {
* padded to be 16 bytes, while it's only 12 bytes on the GPU. */
#define SHADER_CLOSURE_BASE \
- float3 weight; \
+ Spectrum weight; \
ClosureType type; \
float sample_weight; \
float3 N
@@ -730,10 +735,9 @@ typedef struct ccl_align(16) ShaderClosure
{
SHADER_CLOSURE_BASE;
-#ifdef __KERNEL_CPU__
- float pad[2];
-#endif
- float data[10];
+ /* Extra space for closures to store data, somewhat arbitrary but closures
+ * assert that their size fits. */
+ char pad[sizeof(Spectrum) * 2 + sizeof(float) * 4];
}
ShaderClosure;
@@ -886,10 +890,10 @@ typedef struct ccl_align(16) ShaderData
float ray_length;
#ifdef __RAY_DIFFERENTIALS__
- /* differential of P. these are orthogonal to Ng, not N */
- differential3 dP;
- /* differential of I */
- differential3 dI;
+ /* Radius of differential of P. */
+ float dP;
+ /* Radius of differential of I. */
+ float dI;
/* differential of u, v */
differential du;
differential dv;
@@ -924,12 +928,12 @@ typedef struct ccl_align(16) ShaderData
/* Closure data, we store a fixed array of closures */
int num_closure;
int num_closure_left;
- float3 svm_closure_weight;
+ Spectrum svm_closure_weight;
/* Closure weights summed directly, so we can evaluate
* emission and shadow transparency with MAX_CLOSURE 0. */
- float3 closure_emission_background;
- float3 closure_transparent_extinction;
+ Spectrum closure_emission_background;
+ Spectrum closure_transparent_extinction;
/* At the end so we can adjust size in ShaderDataTinyStorage. */
struct ShaderClosure closure[MAX_CLOSURE];
@@ -960,7 +964,7 @@ ShaderDataCausticsStorage;
* Used for decoupled direct/indirect light closure storage. */
typedef struct ShaderVolumeClosure {
- float3 weight;
+ Spectrum weight;
float sample_weight;
float g;
} ShaderVolumeClosure;
@@ -1165,7 +1169,7 @@ typedef struct KernelBake {
int use;
int object_index;
int tri_offset;
- int pad1;
+ int use_camera;
} KernelBake;
static_assert_align(KernelBake, 16);
@@ -1384,10 +1388,14 @@ typedef struct KernelShaderEvalInput {
} KernelShaderEvalInput;
static_assert_align(KernelShaderEvalInput, 16);
-/* Pre-computed sample table sizes for PMJ02 sampler. */
+/* Pre-computed sample table sizes for PMJ02 sampler.
+ *
+ * NOTE: divisions *must* be a power of two, and patterns
+ * ideally should be as well.
+ */
#define NUM_PMJ_DIVISIONS 32
#define NUM_PMJ_SAMPLES ((NUM_PMJ_DIVISIONS) * (NUM_PMJ_DIVISIONS))
-#define NUM_PMJ_PATTERNS 1
+#define NUM_PMJ_PATTERNS 64
/* Device kernels.
*
@@ -1494,42 +1502,41 @@ enum KernelFeatureFlag : uint32_t {
KERNEL_FEATURE_HAIR = (1U << 12U),
KERNEL_FEATURE_HAIR_THICK = (1U << 13U),
KERNEL_FEATURE_OBJECT_MOTION = (1U << 14U),
- KERNEL_FEATURE_CAMERA_MOTION = (1U << 15U),
/* Denotes whether baking functionality is needed. */
- KERNEL_FEATURE_BAKING = (1U << 16U),
+ KERNEL_FEATURE_BAKING = (1U << 15U),
/* Use subsurface scattering materials. */
- KERNEL_FEATURE_SUBSURFACE = (1U << 17U),
+ KERNEL_FEATURE_SUBSURFACE = (1U << 16U),
/* Use volume materials. */
- KERNEL_FEATURE_VOLUME = (1U << 18U),
+ KERNEL_FEATURE_VOLUME = (1U << 17U),
/* Use OpenSubdiv patch evaluation */
- KERNEL_FEATURE_PATCH_EVALUATION = (1U << 19U),
+ KERNEL_FEATURE_PATCH_EVALUATION = (1U << 18U),
/* Use Transparent shadows */
- KERNEL_FEATURE_TRANSPARENT = (1U << 20U),
+ KERNEL_FEATURE_TRANSPARENT = (1U << 19U),
/* Use shadow catcher. */
- KERNEL_FEATURE_SHADOW_CATCHER = (1U << 21U),
-
- /* Per-uber shader usage flags. */
- KERNEL_FEATURE_PRINCIPLED = (1U << 22U),
+ KERNEL_FEATURE_SHADOW_CATCHER = (1U << 29U),
/* Light render passes. */
- KERNEL_FEATURE_LIGHT_PASSES = (1U << 23U),
+ KERNEL_FEATURE_LIGHT_PASSES = (1U << 21U),
/* Shadow render pass. */
- KERNEL_FEATURE_SHADOW_PASS = (1U << 24U),
+ KERNEL_FEATURE_SHADOW_PASS = (1U << 22U),
/* AO. */
- KERNEL_FEATURE_AO_PASS = (1U << 25U),
- KERNEL_FEATURE_AO_ADDITIVE = (1U << 26U),
+ KERNEL_FEATURE_AO_PASS = (1U << 23U),
+ KERNEL_FEATURE_AO_ADDITIVE = (1U << 24U),
KERNEL_FEATURE_AO = (KERNEL_FEATURE_AO_PASS | KERNEL_FEATURE_AO_ADDITIVE),
/* MNEE. */
- KERNEL_FEATURE_MNEE = (1U << 27U),
+ KERNEL_FEATURE_MNEE = (1U << 25U),
+
+ /* Path guiding. */
+ KERNEL_FEATURE_PATH_GUIDING = (1U << 26U),
};
/* Shader node feature mask, to specialize shader evaluation for kernels. */
@@ -1556,15 +1563,15 @@ enum KernelFeatureFlag : uint32_t {
/* Must be constexpr on the CPU to avoid compile errors because the state types
* are different depending on the main, shadow or null path. For GPU we don't have
* C++17 everywhere so can't use it. */
-#ifdef __KERNEL_CPU__
+#ifdef __KERNEL_GPU__
+# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
+# define IF_KERNEL_NODES_FEATURE(feature) \
+ if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
+#else
# define IF_KERNEL_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
# define IF_KERNEL_NODES_FEATURE(feature) \
if constexpr ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
-#else
-# define IF_KERNEL_FEATURE(feature) if ((node_feature_mask & (KERNEL_FEATURE_##feature)) != 0U)
-# define IF_KERNEL_NODES_FEATURE(feature) \
- if ((node_feature_mask & (KERNEL_FEATURE_NODE_##feature)) != 0U)
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/color.h b/intern/cycles/kernel/util/color.h
index c85ef262d88..4983b9048d4 100644
--- a/intern/cycles/kernel/util/color.h
+++ b/intern/cycles/kernel/util/color.h
@@ -33,4 +33,19 @@ ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c)
return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
}
+ccl_device_inline Spectrum rgb_to_spectrum(float3 rgb)
+{
+ return rgb;
+}
+
+ccl_device_inline float3 spectrum_to_rgb(Spectrum s)
+{
+ return s;
+}
+
+ccl_device float spectrum_to_gray(KernelGlobals kg, Spectrum c)
+{
+ return linear_rgb_to_gray(kg, spectrum_to_rgb(c));
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/differential.h b/intern/cycles/kernel/util/differential.h
index 3682e91ea66..aad9bb6bb22 100644
--- a/intern/cycles/kernel/util/differential.h
+++ b/intern/cycles/kernel/util/differential.h
@@ -101,53 +101,59 @@ ccl_device differential3 differential3_zero()
return d;
}
-/* Compact ray differentials that are just a scale to reduce memory usage and
- * access cost in GPU.
+/* Compact ray differentials that are just a radius to reduce memory usage and access cost
+ * on GPUs, basically cone tracing.
*
- * See above for more accurate reference implementations.
- *
- * TODO: also store the more compact version in ShaderData and recompute where
- * needed? */
+ * See above for more accurate reference implementations of ray differentials. */
ccl_device_forceinline float differential_zero_compact()
{
return 0.0f;
}
-ccl_device_forceinline float differential_make_compact(const differential3 D)
+ccl_device_forceinline float differential_make_compact(const float dD)
{
- return 0.5f * (len(D.dx) + len(D.dy));
+ return dD;
}
-ccl_device_forceinline void differential_transfer_compact(ccl_private differential3 *surface_dP,
- const float ray_dP,
- const float3 /* ray_D */,
- const float ray_dD,
- const float3 surface_Ng,
- const float ray_t)
+ccl_device_forceinline float differential_make_compact(const differential3 dD)
{
- /* ray differential transfer through homogeneous medium, to
- * compute dPdx/dy at a shading point from the incoming ray */
- float scale = ray_dP + ray_t * ray_dD;
+ return 0.5f * (len(dD.dx) + len(dD.dy));
+}
- float3 dx, dy;
- make_orthonormals(surface_Ng, &dx, &dy);
- surface_dP->dx = dx * scale;
- surface_dP->dy = dy * scale;
+ccl_device_forceinline float differential_incoming_compact(const float dD)
+{
+ return dD;
}
-ccl_device_forceinline void differential_incoming_compact(ccl_private differential3 *dI,
- const float3 D,
- const float dD)
+ccl_device_forceinline float differential_transfer_compact(const float ray_dP,
+ const float3 /* ray_D */,
+ const float ray_dD,
+ const float ray_t)
{
- /* compute dIdx/dy at a shading point, we just need to negate the
- * differential of the ray direction */
+ return ray_dP + ray_t * ray_dD;
+}
+ccl_device_forceinline differential3 differential_from_compact(const float3 D, const float dD)
+{
float3 dx, dy;
make_orthonormals(D, &dx, &dy);
- dI->dx = dD * dx;
- dI->dy = dD * dy;
+ differential3 d;
+ d.dx = dD * dx;
+ d.dy = dD * dy;
+ return d;
+}
+
+ccl_device void differential_dudv_compact(ccl_private differential *du,
+ ccl_private differential *dv,
+ float3 dPdu,
+ float3 dPdv,
+ float dP,
+ float3 Ng)
+{
+ /* TODO: can we speed this up? */
+ differential_dudv(du, dv, dPdu, dPdv, differential_from_compact(Ng, dP), Ng);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/util/profiling.h b/intern/cycles/kernel/util/profiling.h
index 39cabd35967..b8afaf1166d 100644
--- a/intern/cycles/kernel/util/profiling.h
+++ b/intern/cycles/kernel/util/profiling.h
@@ -3,13 +3,13 @@
#pragma once
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# include "util/profiling.h"
#endif
CCL_NAMESPACE_BEGIN
-#ifdef __KERNEL_CPU__
+#ifndef __KERNEL_GPU__
# define PROFILING_INIT(kg, event) \
ProfilingHelper profiling_helper((ProfilingState *)&kg->profiler, event)
# define PROFILING_EVENT(event) profiling_helper.set_event(event)
@@ -22,6 +22,6 @@ CCL_NAMESPACE_BEGIN
# define PROFILING_EVENT(event)
# define PROFILING_INIT_FOR_SHADER(kg, event)
# define PROFILING_SHADER(object, shader)
-#endif /* __KERNEL_CPU__ */
+#endif /* !__KERNEL_GPU__ */
CCL_NAMESPACE_END